##################################
#                                #
# Last modified 2017/08/04       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string

def run():

    if len(sys.argv) < 7:
        print 'usage: python %s DESeq_file geneNameFieldID LFCFieldID genes/transcripts.bed chrFieldID geneNameFieldID [-chr chrXX1,chrXX2,...] [-diff pos|neg]' % sys.argv[0]
        print '\tthe script will print to stdout'
        sys.exit(1)
    
    DE = sys.argv[1]
    DEgeneFieldID = int(sys.argv[2])
    LFCgeneFieldID = int(sys.argv[3])
    genes = sys.argv[4]
    chrFieldID = int(sys.argv[5])
    geneFieldID = int(sys.argv[6])

    doChr = False
    if '-chr' in sys.argv:
        doChr = True
        WantedChr = {}
        for chr in sys.argv[sys.argv.index('-chr') + 1].split(','):
            WantedChr[chr] = 1

    doDiff = False
    if '-diff' in sys.argv:
        doDiff = True
        DiffWanted = sys.argv[sys.argv.index('-diff') + 1]

    GenesDict = {}

    linelist = open(genes)
    for line in linelist:
        if line.startswith('#'):
            continue
        fields=line.strip().split('\t')
        geneName = fields[geneFieldID]
        chr = fields[chrFieldID]
        if doChr:
            if WantedChr.has_key(chr):
                pass
            else:
                continue
        left = int(fields[chrFieldID + 1])
        right = int(fields[chrFieldID + 2])
        if GenesDict.has_key(geneName):
            (gchr,gleft,gright) = GenesDict[geneName]
            if gchr != chr:
                print 'identical gene names on multiple chromosomes encountered, exiting'
                print geneName
                sys.exit(1)
            GenesDict[geneName] = (chr,min(left,gleft),max(right,gright))
        else:
            GenesDict[geneName] = (chr,left,right)

    outlinelist = []

    linelist = open(DE)
    for line in linelist:
        if line.startswith('#'):
            continue
        fields=line.strip().split('\t')
        geneName = fields[DEgeneFieldID]
        LFC = float(fields[LFCgeneFieldID])
        if doDiff:
            if DiffWanted == 'pos' and LFC < 0:
                continue
            if DiffWanted == 'neg' and LFC > 0:
                continue
        (chr,left,right) = GenesDict[geneName]
        outlinelist.append(((chr,left,right,LFC)))

    outlinelist.sort()

    for (chr,left,right,LFC) in outlinelist:
        outline = chr + '\t' + str(left) + '\t' + str(right) + '\t' + str(LFC)
        print outline

  
run()