##################################
#                                #
# Last modified 5/6/2009         # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string
from sets import Set

def run():

    if len(sys.argv) < 4:
        print 'usage: python %s  expressionfilename boundgenesfilename listofgenes outfilename [-doSet]' % sys.argv[0]
        sys.exit(1)

    expressiondatafilename = sys.argv[1]
    boundgenesfilename = sys.argv[2]
    listofgenesfilename = sys.argv[3]
    outfilename = sys.argv[4]
    doSet=False
    if '-doSet' in sys.argv:
        doSet=True

    outfile = open(outfilename, 'w')

    bindingdatafile = open(boundgenesfilename)
    bindingdatalist = bindingdatafile.readlines()
    bindingdata = {}
    for line in bindingdatalist:
        fields = line.split('\n')[0].split('\t')[0].split(' ')
        if fields[0][0:3]=='Igk':
            continue
        bindingdata[fields[0]]=[]
        bindingdata[fields[0]].append(line.split('\n')[0])

    listofgenesfile = open(listofgenesfilename)
    lineslist = listofgenesfile.readlines()
    genes = {}
    expressiondata = {}
    for line in lineslist:
        fields = line.split('\n')[0].split('\t')
        if fields[1] in bindingdata.keys():
            expressiondata[fields[1]]={}
            expressiondata[fields[1]]['chromosome']=fields[2]
            expressiondata[fields[1]]['leftPos']=int(fields[3])
            expressiondata[fields[1]]['rightPos']=int(fields[4])
            expressiondata[fields[1]]['orientation']=fields[5]

    expressiondatafile = open(expressiondatafilename)
    expressiondatalist = expressiondatafile.readlines()
    expressionline1 = expressiondatalist[0]
    expressiondatalist.remove(expressiondatalist[0])
    for line in expressiondatalist:
        fields = line.split('\n')[0].split('\t') 
        if fields[1][0:3]=='Igk':
            continue
        if fields[1] in bindingdata.keys():
            expressiondata[fields[1]]['expression']=line.split('\n')[0]
        else:
            continue

    outfile.write(expressionline1.split('\n')[0])
    outfile.write('\t')
    outfile.write('chromosome')
    outfile.write('\t')
    outfile.write('leftPos')
    outfile.write('\t')
    outfile.write('rightPos')
    outfile.write('\t')
    outfile.write('orientation')
    outfile.write('\t')
    outfile.write('DistancefromTSS')            
    outfile.write('\n')
    for k in bindingdata.keys():
        if doSet:
            outfile.write(expressiondata[k]['expression'])
            print 'Set'
            outfile.write(bindingdata[k][0])
            outfile.write('\n')            
        else:
            if outfile.write(expressiondata[k]['expression']) == '':
                continue
            for site in bindingdata[k]:
                sitePeak = (int(site.split('\t')[2])+int(site.split('\t')[3]))//2
                outfile.write(expressiondata[k]['expression'])
                print expressiondata[k]['expression']
                outfile.write('\t')            
                outfile.write(expressiondata[k]['chromosome'])
                print expressiondata[k]['chromosome']
                outfile.write('\t')
                outfile.write(str(expressiondata[k]['leftPos']))
                outfile.write('\t')
                outfile.write(str(expressiondata[k]['rightPos']))
                outfile.write('\t')
                outfile.write(expressiondata[k]['orientation'])
                outfile.write('\t')
                if expressiondata[k]['orientation']=='F':
                    outfile.write(str(sitePeak-expressiondata[k]['leftPos']))
                    outfile.write('\t')
                if expressiondata[k]['orientation']=='R':
                    outfile.write(str(sitePeak-expressiondata[k]['rightPos']))
                    outfile.write('\t')
                outfile.write('\t')
                outfile.write(site)     
                outfile.write('\n')            


run()
