##################################
#                                #
# Last modified 11/23/2010       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string
from sets import Set

def run():

    print sys.argv

    if len(sys.argv) < 5:
        print 'usage: python %s inputfilename overlapped-peaks.txt GTFfilename IDR-threshold outputfile_suffix' % sys.argv[0]
        sys.exit(1)
    
    input = sys.argv[1]
    overlapped = sys.argv[2]
    gtf = sys.argv[3]
    IDRthreshold = float(sys.argv[4])
    outfilename = sys.argv[5]

    PassingDict={}
    NotPassingDict={}
    linelist = open(overlapped)
    for line in linelist:
        fields=line.strip().split(' ')
        try:
            IDR=float(fields[5])
        except:
            print 'skipping line:', line.strip()
            continue
        ID=fields[1][1:-1]
        if IDR <= IDRthreshold:
            PassingDict[ID]=line.strip()
        else:
            NotPassingDict[ID]=line.strip()

    linelist = open(input)
    for line in linelist:
        fields=line.strip().split('\t')
        ID=fields[0]
        if fields[1]=='-1' or fields[3]== '-1':
            NotPassingDict[ID]=line.strip().replace('\t',' ')

    outfile_passing = open('passing-'+str(IDRthreshold)+'-'+outfilename, 'w')
    outfile_not_passing = open('not-passing-'+str(IDRthreshold)+'-'+outfilename, 'w')

    ExonNumberDict={}
    linelist = open(gtf)
    for line in linelist:
        fields=line.strip().split('\t')
        if fields[2]!='exon':
            continue
        chr=fields[0]
        left=fields[3]
        right=fields[4]
        strand=fields[6]
        ID=fields[8].split('transcript_id "')[1].split('";')[0]
        if ExonNumberDict.has_key(ID):
            pass
        else:
            ExonNumberDict[ID]=[]
        ExonNumberDict[ID].append((chr,left,right,strand))

    for ID in ExonNumberDict:
        ExonNumberDict[ID]=list(Set(ExonNumberDict[ID]))
        if PassingDict.has_key(ID):
            if len(ExonNumberDict[ID])==1:
                outline=PassingDict[ID]+'\tsingle-exon\t1'
                outfile_passing.write(outline+'\n')
            if len(ExonNumberDict[ID])>1:
                outline=PassingDict[ID]+'\tmulti-exon\t'+str(len(ExonNumberDict[ID]))
                outfile_passing.write(outline+'\n')
        if NotPassingDict.has_key(ID):
            if len(ExonNumberDict[ID])==1:
                outline=NotPassingDict[ID]+'\tsingle-exon\t1'
                outfile_not_passing.write(outline+'\n')
            if len(ExonNumberDict[ID])>1:
                outline=NotPassingDict[ID]+'\tmulti-exon\t'+str(len(ExonNumberDict[ID]))
                outfile_not_passing.write(outline+'\n')

    outfile_passing.close()
    outfile_not_passing.close()
            
run()