##################################
#                                #
# Last modified 10/07/2011       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string
import math
from sets import Set

def run():

    if len(sys.argv) < 4:
        print 'usage: python %s gtf fpkm_tracking minFPKM outfilename [-FPKM_lo]' % sys.argv[0]
        sys.exit(1)

    gtf = sys.argv[1]
    fpkm = sys.argv[2]
    outfilename = sys.argv[4]
    minFPKM = float(sys.argv[3])

    doFPKMlo=False
    if '-FPKM_lo' in sys.argv:
        doFPKMlo=True

    IDtoKeepDict={}

    lineslist  = open(fpkm)
    for line in lineslist:
        fields=line.strip().split('\t')
        if line.startswith('tracking_id'):
            statusID = fields.index('status')
            FPKM_ID = fields.index('FPKM')
            FPKM_lo_ID = fields.index('FPKM_conf_lo')
            continue
        ID = fields[0]
        status=fields[statusID]
        if status != 'OK':
            continue
        if doFPKMlo:
            FPKM = float(fields[FPKM_lo_ID])
        else:
            FPKM = float(fields[FPKM_ID])
        if FPKM >= minFPKM:
            IDtoKeepDict[ID]=''

    outfile = open(outfilename, 'w')

    lineslist  = open(gtf)
    for line in lineslist:
        if line[0]=='#':
            outfile.write(line)
            continue
        fields=line.strip().split('\t')
        transcriptID=fields[8].split('transcript_id "')[1].split('";')[0]
        if IDtoKeepDict.has_key(transcriptID):
            outfile.write(line)

    outfile.close()
        
run()

