##################################
#                                #
# Last modified 7/6/2009         # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string

def run():

    if len(sys.argv) < 4:
        print 'usage: python %s  inputfwigfilename genemodelsfile readlength outputfilename [-bed]' % sys.argv[0]
        sys.exit(1)
    
    cachePages = 2000000

    inputfilename = sys.argv[1]
    genemodels = sys.argv[2]
    readlength = int(sys.argv[3])
    outfilename = sys.argv[4]

    doBed=False
    if '-bed' in sys.argv:
        doBed=True

    outfile = open(outfilename, 'w')

    inputdatafile = open(genemodels)
    linelist = inputdatafile.readlines()
    genes={}
    inputdatafile = open(inputfilename)
    wiglineslist = inputdatafile.readlines()
    print 'start'
    if doBed:
        outfile.write('Chromosome\tStart\tStop\tRPKM\n')
        for line in linelist:
            fields=line.split('\n')[0].split('\t')
            chr=fields[0]
            start=int(fields[1])
            stop=int(fields[2])
            score=0.0
            for k in wiglineslist:
                wigfields=k.split('\n')[0].split('\t')[0].split(' ')
                if wigfields[0]!=chr:
                    continue
                else:
                    if int(wigfields[2])<start:
                        continue
                    if int(wigfields[2])>start and int(wigfields[1])<=stop:
                        score+=(min(int(wigfields[2]),stop)-max(int(wigfields[1]),start))*float(wigfields[3])
                    if int(wigfields[2])>stop:
                        print score
                        break
            outline='%s\t%s\t%s\t%s\n' % (chr, start, stop, score)
            outfile.write(outline)
    else:
        for line in linelist:
            fields=line.split('\n')[0].split('\t')
            genes[fields[0]]={}
            genes[fields[0]]['chromosome']=fields[1]
            genes[fields[0]]['start']=int(fields[3])
            genes[fields[0]]['stop']=int(fields[4])
            genes[fields[0]]['featurestarts']=fields[8].split(',')
            genes[fields[0]]['featureends']=fields[9].split(',')
            genes[fields[0]]['score']=0.0
            genes[fields[0]]['mRNALength']=0.0
            print fields[0]
            print genes[fields[0]]['chromosome']
            for i in range(len(genes[fields[0]]['featurestarts'])-1):
                print i
                genes[fields[0]]['mRNALength']+=int(genes[fields[0]]['featureends'][i])-int(genes[fields[0]]['featurestarts'][i])
                for k in wiglineslist:
                    wigfields=k.split('\n')[0].split('\t')[0].split(' ')
                    if wigfields[0]!=genes[fields[0]]['chromosome']:
                        continue
                    else:
                        if int(wigfields[2])<int(genes[fields[0]]['featurestarts'][i]):
                            continue
                        if int(wigfields[2])>int(genes[fields[0]]['featurestarts'][i]) and int(wigfields[1])<=int(genes[fields[0]]['featureends'][i]):
                            genes[fields[0]]['score']+=(min(int(wigfields[2]),int(genes[fields[0]]['featureends'][i]))-max(int(wigfields[1]),int(genes[fields[0]]['featurestarts'][i])))*float(wigfields[3])
                        if int(wigfields[2])>int(genes[fields[0]]['featureends'][i]):
                            print genes[fields[0]]['score']
                            break
 
        outfile.write('Gene_Name'+'\t'+'mRNA_Length'+'\t'+'RPKM'+'\n')
        for gene in genes.keys():                    
            outfile.write(gene+'\t'+str(genes[gene]['mRNALength'])+'\t'+str(genes[gene]['score']/(readlength*genes[gene]['mRNALength']/1000))+'\n')
   
run()
