##################################
#                                #
# Last modified 05/29/2013       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string

def run():

    if len(sys.argv) < 2:
        print 'usage: python %s list_of_GTF_files outputfile_prefix [-scores]' % sys.argv[0]
        print '\tNote: it is assumed that the GTF file contain exon-level with attributes fields as follows:'
        print '\t\tgene_ids "CUFF.111723,"; transcript_ids "CUFF.111723.1,"; RPKM1 "0.659235"; RPKM2 "0.443044"; iIDR "0.002";
        sys.exit(1)
    
    files = sys.argv[1]
    outfilename = sys.argv[2]

    GTFDict={}
    linelist = open(files)
    F = 0
    ChromosomesDict = {}
    for line in linelist:
        F+=1
        fields1 = line.strip().split('\t')
        file = fields1[0]
        lines = open(file)
        print file
        GTFDict[F]={}
        for line2 in lines:
            fields = line2.strip().split('\t')
            if fields[2] != 'exon':
                continue
            chr = fields[0]
            ChromosomesDict[chr]=''
            left = int(fields[3])
            right = int(fields[4])
            transcripts = fields[8].split('transcript_ids "')[1].split('";')[0].split(',')
            for transcript in transcripts:
                if transcript == '':
                    continue
                transcript =  transcript + '::' + str(F)
                RPKM1 = float(fields[8].split('RPKM1 "')[1].split('";')[0])
                if GTFDict[F].has_key(chr):
                    pass
                else:
                    GTFDict[F][chr] = {}
                if GTFDict[F][chr].has_key((transcript,FPKM)):
                    pass
                else:
                    GTFDict[F][chr][(transcript,FPKM)] = []
                GTFDict[F][chr][(transcript,FPKM)].append((left,right))

    outfileExons = open(outfilename + '.exons.wig', 'w')
    outfileIntrons = open(outfilename + '.introns.wig', 'w')
    chromosomes = ChromosomesDict.keys()
    chromosomes.sort()
    for chr in chromosomes:
        print chr
        ExonicCoverageDict={}
        IntronicCoverageDict={}
        for F in GTFDict.keys():
            ExonicCoverageDict2={}
            if GTFDict[F].has_key(chr):
                pass
            else:
                continue
            for (transcript,FPKM) in GTFDict[F][chr].keys():
                for (left,right) in GTFDict[F][chr][(transcript,FPKM)]:
                    for i in range(left,right):
                        if ExonicCoverageDict2.has_key(i):
                            pass
                        else:
                            ExonicCoverageDict2[i]=0
                        ExonicCoverageDict2[i]+=FPKM
            for i in ExonicCoverageDict2.keys():
                if ExonicCoverageDict.has_key(i):
                    if ExonicCoverageDict[i] < ExonicCoverageDict2[i]:
                        ExonicCoverageDict[i] = ExonicCoverageDict2[i]
                else:
                    ExonicCoverageDict[i] = ExonicCoverageDict2[i]
        print chr, 'exons parsed'
        ExonicCoverageDict2={}
        for F in GTFDict.keys():
            IntronicCoverageDict2={}
            if GTFDict[F].has_key(chr):
                pass
            else:
                continue
            for (transcript,FPKM) in GTFDict[F][chr].keys():
                GTFDict[F][chr][(transcript,FPKM)].sort()
                if len(GTFDict[F][chr][(transcript,FPKM)]) == 1:
                    continue
                for i in range(len(GTFDict[F][chr][(transcript,FPKM)])-1):
                    left = GTFDict[F][chr][(transcript,FPKM)][i][1]
                    right = GTFDict[F][chr][(transcript,FPKM)][i][0]
                    for i in range(left,right):
                        if ExonicCoverageDict.has_key(i):
                            continue
                        if IntronicCoverageDict2.has_key(i):
                            pass
                        else:
                            IntronicCoverageDict2[i]=0
                        IntronicCoverageDict2[i]+=FPKM
            for i in IntronicCoverageDict2.keys():
                if IntronicCoverageDict.has_key(i):
                    if IntronicCoverageDict[i] < IntronicCoverageDict2[i]:
                        IntronicCoverageDict[i] = IntronicCoverageDict2[i]
                else:
                    IntronicCoverageDict[i] = IntronicCoverageDict2[i]
        print chr, 'exons parsed, introns parsed'
        IntronicCoverageDict2={}
        positions = ExonicCoverageDict.keys()
        positions.sort()
        k=0
        for i in positions:
            k+=1
            if k % 5000000 == 0:
                print chr, 'exons', i
            outline = chr + '\t' + str(i) + '\t' + str(i+1) + '\t' + str(ExonicCoverageDict[i])
            outfileExons.write(outline + '\n')
        positions = IntronicCoverageDict.keys()
        positions.sort()
        k=0
        for i in positions:
            k+=1
            if k % 5000000 == 0:
                print chr, 'introns', i
            outline = chr + '\t' + str(i) + '\t' + str(i+1) + '\t' + str(IntronicCoverageDict[i])
            outfileIntrons.write(outline + '\n')


    outfileExons.close()
    outfileIntrons.close()
            
run()
