##################################
#                                #
# Last modified 08/29/2010       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys

def run():

    if len(sys.argv) < 4:
        print 'usage: python %s tracking FPKM_lo_cutoff numDatasets outfilename' % sys.argv[0]
        print '	-datasetsInCellLine file format: <name> <tab> <number of datasets>'
        print '	Note: the name has to be unique and it has to be present in the tracking file annotation>'
        sys.exit(1)

    tracking = sys.argv[1]
    cutoff = float(sys.argv[2])
    minNumDatasets=int(sys.argv[3])
    outputfilename = sys.argv[4]

    outfile=open(outputfilename,'w')
    
    listoflines = open(tracking)
    o=0
    for line in listoflines:
        o+=1
        if o % 100000 == 0:
            print o
        fields=line.strip().split('\t')
        present=0
        for field in fields[4:len(fields)]:
            if field=='-':
                pass
            if field.startswith('q'):
                LoConfFPKM=float(field.split('|')[4])
                if LoConfFPKM >= cutoff:
                    present+=1
        if present >= minNumDatasets:
            outfile.write(line)

    outfile.close()

run()

