##################################
#                                #
# Last modified 11/23/2010       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string
from sets import Set

def run():

    print sys.argv

    if len(sys.argv) < 5:
        print 'usage: python %s IDR-formatted npeaks.txt gtf IDR-threshold outputfile_suffix' % sys.argv[0]
        sys.exit(1)
    
    input = sys.argv[1]
    npeaks = sys.argv[2]
    gtf = sys.argv[3]
    IDRthreshold = float(sys.argv[4])
    outfilename = sys.argv[5]

    linelist = open(npeaks)
    lastIDR=0
    for line in linelist:
        fields=line.strip().split(' ')
        if 'IDR.cutoff' in line:
            continue
        IDR=float(fields[0])
        if IDRthreshold >= lastIDR and IDR >= IDRthreshold:
            minRep1=float(fields[3])
            minRep2=float(fields[4])
            print line
            break
        else:
            lastIDR=IDR

    print IDR, minRep1, minRep2

    PassingDict={}
    NotPassingDict={}
    linelist = open(input)
    for line in linelist:
        fields=line.strip().split('\t')
        ID=fields[0]
        rep1=float(fields[1])
        rep2=float(fields[3])
        if rep1 >= minRep1 and rep2 >= minRep2:
            PassingDict[ID]=line.strip()
        else:
            NotPassingDict[ID]=line.strip()

    outfile_passing = open('passing-'+str(IDRthreshold)+'-'+outfilename, 'w')
    outfile_not_passing = open('not-passing-'+str(IDRthreshold)+'-'+outfilename, 'w')

    ExonNumberDict={}
    linelist = open(gtf)
    for line in linelist:
        fields=line.strip().split('\t')
        if fields[2]!='exon':
            continue
        chr=fields[0]
        left=fields[3]
        right=fields[4]
        strand=fields[6]
        ID=fields[8].split('transcript_id "')[1].split('";')[0]
        if ExonNumberDict.has_key(ID):
            pass
        else:
            ExonNumberDict[ID]=[]
        ExonNumberDict[ID].append((chr,left,right,strand))

    for ID in ExonNumberDict:
        ExonNumberDict[ID]=list(Set(ExonNumberDict[ID]))
        if PassingDict.has_key(ID):
            if len(ExonNumberDict[ID])==1:
                outline=PassingDict[ID]+'\tsingle-exon\t1'
                outfile_passing.write(outline+'\n')
            if len(ExonNumberDict[ID])>1:
                outline=PassingDict[ID]+'\tmulti-exon\t'+str(len(ExonNumberDict[ID]))
                outfile_passing.write(outline+'\n')
        if NotPassingDict.has_key(ID):
            if len(ExonNumberDict[ID])==1:
                outline=NotPassingDict[ID]+'\tsingle-exon\t1'
                outfile_not_passing.write(outline+'\n')
            if len(ExonNumberDict[ID])>1:
                outline=NotPassingDict[ID]+'\tmulti-exon\t'+str(len(ExonNumberDict[ID]))
                outfile_not_passing.write(outline+'\n')

    outfile_passing.close()
    outfile_not_passing.close()
            
run()