##################################
#                                #
# Last modified 03/21/2010       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string
import math

def run():

    if len(sys.argv) < 3:
        print 'usage: python %s <Known Splices format: chr:x-y+/-> <list of splice file; format for each file: chr left right orientation Ntotal Nstaggered> outfilename [-minStaggered num]' % sys.argv[0]
        sys.exit(1)
    
    splices = sys.argv[1]
    listofsplices = sys.argv[2]
    outfilename = sys.argv[3]
    doMinStag=False
    if '-minStaggered' in sys.argv:
        doMinStag=True
        minStag=int(sys.argv[sys.argv.index('-minStaggered')+1])

    outfile = open(outfilename, 'w')

    KnownSplicesDict={}

    linelist  = open(splices)
    for line in linelist:
        strip=line.strip()
        KnownSplicesDict[strip]=''

    linelist = open(listofsplices)
    DeNovoSplicesDict={}
    FileList=[]
    for line in linelist:
        file=line.strip()
        FileList.append(file)
    for file in FileList:
        print file
        linelist=open(file)
        for line in linelist:
            fields=line.strip().split('\t')
            chr=fields[0]
            start=str(int(fields[1])-1)
            stop=str(int(fields[2]))
            orientation=fields[3]
            Ntotal=int(fields[4])
            Nstaggered=int(fields[5])
            ID=chr+':'+start+'-'+stop+orientation
            if DeNovoSplicesDict.has_key(ID):
                DeNovoSplicesDict[ID]['Ntotal']+=Ntotal
                DeNovoSplicesDict[ID]['Nstaggered']+=Nstaggered
            else:    
                DeNovoSplicesDict[ID]={}
                DeNovoSplicesDict[ID]['chr']=chr
                DeNovoSplicesDict[ID]['start']=start
                DeNovoSplicesDict[ID]['stop']=stop
                DeNovoSplicesDict[ID]['orientation']=orientation
                DeNovoSplicesDict[ID]['Ntotal']=Ntotal
                DeNovoSplicesDict[ID]['Nstaggered']=Nstaggered
    keys=DeNovoSplicesDict.keys()
    keys.sort()
    for ID in keys:
        if KnownSplicesDict.has_key(ID):
            continue
        else:
            chr=DeNovoSplicesDict[ID]['chr']
            start=DeNovoSplicesDict[ID]['start']
            stop=DeNovoSplicesDict[ID]['stop']
            orientation=DeNovoSplicesDict[ID]['orientation']
            Ntotal=DeNovoSplicesDict[ID]['Ntotal']
            Nstaggered=DeNovoSplicesDict[ID]['Nstaggered']
        if doMinStag and Nstaggered<minStag:
            continue
        outline=chr+'\t'+str(start)+'\t'+str(stop)+'\t'+orientation+'\t'+str(Ntotal)+'\t'+str(Nstaggered)+'\n'
        outfile.write(outline)

    outfile.close()

run()
