##################################
#                                #
# Last modified 5/6/2009         # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string
from sets import Set

try:
	import psyco
	psyco.full()
except:
	pass

def run():

    if len(sys.argv) < 3:
        print 'usage: python %s listofgenesfilename genes+sites-filename outfilename' % sys.argv[0]
        sys.exit(1)

    inputfilename = sys.argv[1]
    ERANGEfilename = sys.argv[2]
    outputfilename = sys.argv[3]

    outfile = open(outputfilename, 'w')

    listoflines = open(inputfilename)
    lineslist = listoflines.readlines()
    listofgenes=[]
   
    for line in lineslist:
        fields=line.split('\n')[0].split('\t')
        gene = string.capitalize(fields[0].split(' ')[0])
        listofgenes.append(gene) 
    listofgenes = list(Set(listofgenes))

    listoflines = open(ERANGEfilename)
    lineslist = listoflines.readlines()
    
    genessitesDict = {}
    for gene in listofgenes:
        genessitesDict[gene]=[]

    for line in lineslist:
        fields=line.split('\n')[0].split('\t')
        if fields[1] in listofgenes:
            genessitesDict[fields[1]].append(fields[9])

    listoflines = open(inputfilename)
    lineslist = listoflines.readlines()
    for line in lineslist:
        fields=line.split('\n')[0].split('\t')
        gene = string.capitalize(fields[0].split(' ')[0])
        outfile.write(gene)                
        outfile.write('\t')                
        outfile.write(str(len(genessitesDict[gene])))                
        outfile.write('\t')                
        for site in genessitesDict[gene]:
            outfile.write(site)               
            outfile.write(', ')                       
        outfile.write('\n')               

    outfile.close()

run()
