##################################
#                                #
# Last modified 08/23/2010       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string
import math
from sets import Set

def run():

    if len(sys.argv) < 5:
        print 'usage: python %s <Novel-5ends-filtered-all> <min upstream dist> <min downstream dist> <FPKM1,FPKM2,..FPKMN> upstreamoutputfilename' % sys.argv[0]
        sys.exit(1)
    
    input = sys.argv[1]
    minUp=int(sys.argv[2])
    minDown=int(sys.argv[3])
    FPKMs=sys.argv[4].split(',')
    FPKMList=[]
    for FPKM in FPKMs:
        FPKMList.append(float(FPKM))
    outfilename = sys.argv[5]

    outfile = open(outfilename, 'w')

    FPKMList.sort()

    outline='#gene+\tID\tchr\t200left\t200right\tTSS\tstrand\tclosestTSS'
    for FPKM in FPKMList: 
        outline=outline+'\t'+str(FPKM)
        
    lineslist=open(input)
    for line in lineslist:
        fields=line.strip().split('\t')
        if line.startswith('#'):
            for field in fields[8:len(fields)]:
                outline=outline+'\t'+field
            outfile.write(outline+'\n')
            continue
        gene=fields[0]
        ID=fields[1]
        TSS=int(fields[3])
        chr=fields[2]
        TSSleft=TSS-200
        TSSright=TSS+200
        strand=fields[4]
        closest=int(fields[6])
        if math.fabs(closest) < minUp or math.fabs(closest) < minDown:
            continue
        outline=gene+'\t'+ID+'\t'+chr+'\t'+str(TSSleft)+'\t'+str(TSSright)+'\t'+str(TSS)+'\t'+strand+'\t'+str(closest)
        ScoreDict={}
        for FPKM in FPKMList:
            ScoreDict[FPKM]=0
        for field in fields[8:len(fields)]:
            FPKM_lo=float(field)
            for FPKM in FPKMList:
                if FPKM_lo >= FPKM:
                    ScoreDict[FPKM]+=1
        for FPKM in FPKMList:
            outline=outline+'\t'+str(ScoreDict[FPKM])
        for field in fields[8:len(fields)]:
            outline=outline+'\t'+field
        outfile.write(outline+'\n')

    outfile.close()
   
run()
