##################################
#                                #
# Last modified 11/17/2009       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys

try:
	import psyco
	psyco.full()
except:
	pass

def run():

    if len(sys.argv) < 3:
        print 'usage: python %s inputfilename fieldID outfilename [-uniqueOnly] [-add]' % sys.argv[0]
        sys.exit(1)

    inputfilename = sys.argv[1]
    fieldID = int(sys.argv[2])
    outputfilename = sys.argv[3]
    doUniqueOnly=False
    if '-uniqueOnly' in sys.argv:
        doUniqueOnly=True

    outfile = open(outputfilename, 'w')

    listoflines = open(inputfilename)
    lineslist = listoflines.readlines()
    geneDict={}
    for line in lineslist:
        if line[0] == '#':
            outfile.write(line)
            continue
        if line[0:6]=='noname':
            continue
        fields = line.strip().split('\t')
        GeneName=fields[0].split('_alt')[0]
        if doUniqueOnly:
            if geneDict.has_key(GeneName):
                geneDict[GeneName].append(line)
            else:
                geneDict[GeneName]=[]
                geneDict[GeneName].append(line)
            continue
        if geneDict.has_key(GeneName):
            geneDict[GeneName].append((line,float(fields[fieldID])))
        else:
            geneDict[GeneName]=[]
            geneDict[GeneName].append((line,float(fields[fieldID])))

    for GeneName in geneDict.keys():
        if doUniqueOnly and len(geneDict[GeneName])!=1:
            continue
        if doUniqueOnly and len(geneDict[GeneName])==1:
            outfile.write(geneDict[GeneName][0])
            continue
        max=0.0
        line=''
        for (line,score) in geneDict[GeneName]:
            if score>=max:
                max=score
                outline=line
        newfields=outline.split('chr')
        outline=GeneName+'\tchr'+newfields[1]
        outfile.write(outline)
    outfile.close()

run()


