##################################
#                                #
# Last modified 5/6/2009         # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string

try:
	import psyco
	psyco.full()
except:
	pass

def run():

    if len(sys.argv) < 4:
        print 'usage: python %s listofgenes genefieldID ERANGEgetallgenesfile outfilename [-htscheck ERANGEhtsfilename ]' % sys.argv[0]
        sys.exit(1)

    inputfilename = sys.argv[1]
    genefieldID = int(sys.argv[2])
    getallgenesfilename = sys.argv[3]
    outputfilename = sys.argv[4]
    doHtscheck=False
    if '-nohtscheck' in sys.argv:
        doHtscheck=True
        ERANGEfilename = sys.argv[sys.argv.index(-nohtscheck)+1]


    outfile = open(outputfilename, 'w')

    listoflines = open(inputfilename)
    lineslist = listoflines.readlines()
    listofgenes=[]
    for line in lineslist:
        fields=line.split('\n')[0].split('\t')
        listofgenes.append(fields[genefieldID].split(' ')[0])

    print 'finished parsing gene list'

 
    if doHtscheck:
        listofsites=[]
        listoflines = open(getallgenesfilename)
        lineslist = listoflines.readlines()
        for line in lineslist:
            fields=line.split('\n')[0].split('\t')
            listofsites.append(fields[0]+'\t'+fields[1]+'\t'+fields[2])

        print 'finished parsing getalgenes list'

        listoflines = open(ERANGEfilename)
        lineslist = listoflines.readlines()
        i=len(lineslist)
        for line in lineslist:
            if line[0]=='#':
                continue
            print i
            i=i-1
            linefields = line.split('\n')[0].split('\t')
            for site in listofsites:
                sitefields = site.split('\t')
                if sitefields[0].split(' ')[0] in listofgenes and linefields[1]==sitefields[1] and linefields[2]==sitefields[2]:    
                    outfile.write(line)
                    continue
    else:
        listofsites=[]
        listoflines = open(getallgenesfilename)
        lineslist = listoflines.readlines()
        for line in lineslist:
            fields=line.split('\n')[0].split('\t')
            if fields[0].split(' ')[0] in listofgenes:
                outline = line.split(' ')[2]
                outfile.write(outline)
                
    outfile.close()

run()

