##################################
#                                #
# Last modified 2017/11/21       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string
import os
import math
from sets import Set

def TranslateAndSum(FeatureCoverageDictCDS,CDSCov,strand,CoverageDict):

    CDSCovkeys = CDSCov.keys()
    CDSCovkeys.sort()
    if strand == '-':
        CDSCovkeys.reverse()
    
    NumCDSPos = len(FeatureCoverageDictCDS.keys())

    ratio = len(CDSCovkeys)/(NumCDSPos + 0.0)

    k=0
    finalvector = []
    if ratio >= 1:
        while k < len(CDSCovkeys) - ratio:
            b = k + ratio
            counts=[]
            for i in range(int(k),int(b)):
                counts.append(CDSCov[CDSCovkeys[i]])
            finalvector.append(sum(counts)/(len(counts) + 0.0))
            k = b
    else:
        while k < len(CDSCovkeys) - ratio:
            b = k
            finalvector.append(CDSCov[CDSCovkeys[int(b)]])
            b += ratio
            k = b

    FCCDSkeys = FeatureCoverageDictCDS.keys()
    FCCDSkeys.sort()
    for i in range(len(finalvector)):
        FeatureCoverageDictCDS[FCCDSkeys[i]] += finalvector[i]

    return FeatureCoverageDictCDS

def AssignScores(chr,FeatureCoverageDict,GeneDict,radius,CoverageDict):
 
    for geneID in GeneDict[chr]:
        NoExons = True
        for transcriptID in GeneDict[chr][geneID].keys():
            if len(GeneDict[chr][geneID][transcriptID]['exons']) > 0:
                NoExons = False
        if NoExons:
            continue
        longestTranscript = ('',0)
        longestORF = ('',0)
        coordinates = []
        for transcriptID in GeneDict[chr][geneID].keys():
            TCDSL = 0
            if transcriptID == 'gene_name':
                continue
            GeneDict[chr][geneID][transcriptID]['exons'].sort()
            GeneDict[chr][geneID][transcriptID]['CDS'].sort()
            strand = GeneDict[chr][geneID][transcriptID]['exons'][0][3]
            for (chr,left,right,strand) in GeneDict[chr][geneID][transcriptID]['CDS']:
                coordinates.append(left)
                coordinates.append(right)
                TCDSL += (right-left)
            if TCDSL >= longestORF[1]:
                longestORF = (transcriptID,TCDSL)
        CDSCov = {}
        UTR5Cov = {}
        UTR3Cov = {}
        IntronCov = {}
        for (chr,left,right,strand) in GeneDict[chr][geneID][longestORF[0]]['CDS']:
            for i in range(left,right):
                if CoverageDict.has_key(i):
                    CDSCov[i] = CoverageDict[i]
                else:
                    CDSCov[i] = 0
        for IN in range(len(GeneDict[chr][geneID][longestORF[0]]['exons']) - 1):
            (chr,left1,right1,strand) = GeneDict[chr][geneID][longestORF[0]]['exons'][IN]
            (chr,left2,right2,strand) = GeneDict[chr][geneID][longestORF[0]]['exons'][IN+1]
            for i in range(right1,left2):
                if CoverageDict.has_key(i):
                    IntronCov[i] = CoverageDict[i]
                else:
                    IntronCov[i] = 0
        if strand == '+':
            TSS = GeneDict[chr][geneID][longestORF[0]]['exons'][0][1]
            TTS = GeneDict[chr][geneID][longestORF[0]]['exons'][-1][2]
            for i in range(TSS - radius,TSS):
                if CoverageDict.has_key(i):
                    FeatureCoverageDict['upstream'][i-TSS] += CoverageDict[i]
            for i in range(TTS + 1,TTS + radius + 1):
                if CoverageDict.has_key(i):
                    FeatureCoverageDict['downstream'][i-TTS] += CoverageDict[i]
            UTR5Ended = False
            for (chr,left,right,strand) in GeneDict[chr][geneID][longestORF[0]]['exons']:
                if UTR5Ended:
                    break
                for i in range(left,right):
                    if CDSCov.has_key(i):
                        UTR5Ended = True
                        break
                    else:
                        if CoverageDict.has_key(i):
                            UTR5Cov[i] = CoverageDict[i]
                        else:
                            UTR5Cov[i] = 0
            for (chr,left,right,strand) in GeneDict[chr][geneID][longestORF[0]]['exons']:
                for i in range(left,right):
                    if CDSCov.has_key(i) or UTR5Cov.has_key(i):
                        continue
                    else:
                        if CoverageDict.has_key(i):
                            UTR3Cov[i] = CoverageDict[i]
                        else:
                            UTR3Cov[i] = 0
        if strand == '-':
            TSS = GeneDict[chr][geneID][longestORF[0]]['exons'][-1][2]
            TTS = GeneDict[chr][geneID][longestORF[0]]['exons'][0][1]
            for i in range(TSS + 1,TSS + radius + 1):
                if CoverageDict.has_key(i):
                    FeatureCoverageDict['upstream'][TSS - i] += CoverageDict[i]
            for i in range(TTS - radius,TTS):
                if CoverageDict.has_key(i):
                    FeatureCoverageDict['downstream'][TTS - i] += CoverageDict[i]
            UTR3Ended = False
            for (chr,left,right,strand) in GeneDict[chr][geneID][longestORF[0]]['exons']:
                if UTR3Ended:
                    break
                for i in range(left,right):
                    if CDSCov.has_key(i):
                        UTR3Ended = True
                        break
                    else:
                        if CoverageDict.has_key(i):
                            UTR5Cov[i] = CoverageDict[i]
                        else:
                            UTR5Cov[i] = 0
            for (chr,left,right,strand) in GeneDict[chr][geneID][longestORF[0]]['exons']:
                for i in range(left,right):
                    if CDSCov.has_key(i) or UTR3Cov.has_key(i):
                        continue
                    else:
                        if CoverageDict.has_key(i):
                            UTR3Cov[i] = CoverageDict[i]
                        else:
                            UTR3Cov[i] = 0

        FeatureCoverageDict['upstreamN'] += 1
        FeatureCoverageDict['downstreamN'] += 1
        if len(CDSCov.keys()) > 0:
            FeatureCoverageDict['CDSN'] += 1
        if len(UTR5Cov.keys()) > 0:
            FeatureCoverageDict['UTR5N'] += 1
        if len(UTR3Cov.keys()) > 0:
            FeatureCoverageDict['UTR3N'] += 1
        if len(IntronCov.keys()) > 0:
            FeatureCoverageDict['intronN'] += 1

        FeatureCoverageDict['CDS'] = TranslateAndSum(FeatureCoverageDict['CDS'],CDSCov,strand,CoverageDict)
        if len(FeatureCoverageDict['UTR5'].keys()) == 0:
            FeatureCoverageDict['UTR5'] = {0:0}
        else:
            FeatureCoverageDict['UTR5'] = TranslateAndSum(FeatureCoverageDict['UTR5'],UTR5Cov,strand,CoverageDict)
        if len(FeatureCoverageDict['UTR3'].keys()) == 0:
            FeatureCoverageDict['UTR3'] = {0:0}
        else:
            FeatureCoverageDict['UTR3'] = TranslateAndSum(FeatureCoverageDict['UTR3'],UTR3Cov,strand,CoverageDict)
        if len(FeatureCoverageDict['intron']) == 0:
            pass
        else:
            FeatureCoverageDict['intron'] = TranslateAndSum(FeatureCoverageDict['intron'],IntronCov,strand,CoverageDict)

    return(FeatureCoverageDict)

def run():

    if len(sys.argv) < 7:
        print 'usage: python %s GTF radius_around_gene minGeneLength maxGeneLength wig outfile [-NCBIGFFFNAProk] [-UTRCDS]' % sys.argv[0]
        print '\tNote: the script will only work properly with sorted wiggle files, i.e. all entries for each chromosome are consecutive'
        print '\tNote: the wig file can be compressed'
        sys.exit(1)

    GTF = sys.argv[1]
    radius = int(sys.argv[2])
    minGeneLength = int(sys.argv[3])
    maxGeneLength = int(sys.argv[4])
    wig = sys.argv[5]
    outfilename = sys.argv[6]

    doNCBIGFFFNAProk = False
    if '-NCBIGFFFNAProk' in sys.argv:
        doNCBIGFFFNAProk = True

    doUTRCDS = False
    if '-UTRCDS' in sys.argv:
        doUTRCDS = True

    GeneDict = {}

    linelist = open(GTF)
    for line in linelist:
        if line.startswith('#') or line.strip() == '':
            continue
        fields = line.strip().split('\t')
        if doNCBIGFFFNAProk:
           if fields[2] != 'CDS':
               continue
        elif doUTRCDS:
#           if fields[2] != 'CDS' and fields[2] != '5UTR' and fields[2] != '3UTR' and fields[2] != 'stop_codon' and fields[2] != 'start_codon':
           if fields[2] != 'CDS' and fields[2] != '5UTR' and fields[2] != '3UTR' and fields[2] != 'stop_codon':
               continue
        else:
            if fields[2] != 'exon' and fields[2] != 'CDS':
                continue
        if doNCBIGFFFNAProk:
            geneID = fields[8].split('ID=')[1].split(';')[0]
            transcriptID = geneID
            geneName = fields[8].split(';product=')[1].split(';')[0]
            transcriptName = geneName
        else:
            geneID=fields[8].split('gene_id "')[1].split('"')[0]
            transcriptID=fields[8].split('transcript_id "')[1].split('"')[0]
            if 'gene_name "' in fields[8]:
                geneName=fields[8].split('gene_name "')[1].split('"')[0]
            else:
                geneName = geneID
        chr = fields[0]
        if GeneDict.has_key(chr):
            pass
        else:
            GeneDict[chr] = {}
        left = int(fields[3])
        right = int(fields[4])
        strand = fields[6]
        if GeneDict[chr].has_key(geneID):
            pass
        else:
            GeneDict[chr][geneID]={}
        if GeneDict[chr][geneID].has_key(transcriptID):
            pass
        else:
            GeneDict[chr][geneID][transcriptID] = {}
            GeneDict[chr][geneID][transcriptID]['UTRsCDSs'] = []
            GeneDict[chr][geneID][transcriptID]['exons'] = []
            GeneDict[chr][geneID][transcriptID]['CDS'] = []
        if doNCBIGFFFNAProk:
            if len(GeneDict[chr][geneID][transcriptID]['exons']) >= 1:
                continue
            else:
                GeneDict[chr][geneID][transcriptID]['exons'].append((chr,left,right,strand))
                GeneDict[chr][geneID][transcriptID]['CDS'].append((chr,left,right,strand))
        elif doUTRCDS:
#            if fields[2] == '5UTR' or fields[2] == '3UTR' or fields[2] == 'stop_codon' or fields[2] == 'start_codon':
            if fields[2] == '5UTR' or fields[2] == '3UTR' or fields[2] == 'stop_codon':
                GeneDict[chr][geneID][transcriptID]['UTRsCDSs'].append((chr,left,right,strand))
            if fields[2] == 'CDS':
                GeneDict[chr][geneID][transcriptID]['CDS'].append((chr,left,right,strand))
                GeneDict[chr][geneID][transcriptID]['UTRsCDSs'].append((chr,left,right,strand))
        else:
            if fields[2] == 'exon':
                GeneDict[chr][geneID][transcriptID]['exons'].append((chr,left,right,strand))
            if fields[2] == 'CDS':
                GeneDict[chr][geneID][transcriptID]['CDS'].append((chr,left,right,strand))
    outfile = open(outfilename,'w')

    if doUTRCDS:
        for chr in GeneDict.keys():
            for geneID in GeneDict[chr].keys():
                for transcriptID in GeneDict[chr][geneID].keys():
                    GeneDict[chr][geneID][transcriptID]['UTRsCDSs'].sort()
                    GeneDict[chr][geneID][transcriptID]['CDS'].sort()
                    if len(GeneDict[chr][geneID][transcriptID]['UTRsCDSs']) == 1:
                        GeneDict[chr][geneID][transcriptID]['exons'].append(GeneDict[chr][geneID][transcriptID]['UTRsCDSs'][0])
                    else:
                        i = 0
                        currLeft = GeneDict[chr][geneID][transcriptID]['UTRsCDSs'][0][1]
                        currRight = GeneDict[chr][geneID][transcriptID]['UTRsCDSs'][0][2]
                        for i in range(1,len(GeneDict[chr][geneID][transcriptID]['UTRsCDSs'])):
                            (chr,left,right,strand) = GeneDict[chr][geneID][transcriptID]['UTRsCDSs'][i]
                            if left - currRight == 1:
                                currRight = right
                            else:
                                GeneDict[chr][geneID][transcriptID]['exons'].append((chr,currLeft,currRight,strand))
                                currLeft = right
                                currRight = right
                        GeneDict[chr][geneID][transcriptID]['exons'].append((chr,currLeft,currRight,strand))
                    GeneDict[chr][geneID][transcriptID]['exons'] = list(Set(GeneDict[chr][geneID][transcriptID]['exons']))
                    GeneDict[chr][geneID][transcriptID]['exons'].sort()
    else:
        for chr in GeneDict.keys():
            for geneID in GeneDict[chr].keys():
                for transcriptID in GeneDict[chr][geneID].keys():
                    GeneDict[chr][geneID][transcriptID]['CDS'].sort()
                    GeneDict[chr][geneID][transcriptID]['exons'].sort()

    AverageGene = {}

    transcriptLengths = []
    ORFLengths = []
    IntronNumbers = []
    IntronLengthsPos = {}
    ExonLengthsPos = {}
    UTR5Lengths = []
    UTR3Lengths = []
    NGenes = 0.0
    for chr in GeneDict.keys():
        for geneID in GeneDict[chr].keys():
            NoExons = True
            for transcriptID in GeneDict[chr][geneID].keys():
                if len(GeneDict[chr][geneID][transcriptID]['exons']) > 0:
                    NoExons = False
            if NoExons:
                print 'no exons annotated for gene', geneID, 'skipping'
                continue
            NGenes += 1
            longestTranscript = ('',0)
            longestORF = ('',0)
            coordinates = []
            for transcriptID in GeneDict[chr][geneID].keys():
                TL = 0
                TCDSL = 0
                if transcriptID == 'gene_name':
                    continue
                GeneDict[chr][geneID][transcriptID]['exons'].sort()
                GeneDict[chr][geneID][transcriptID]['CDS'].sort()
                strand = GeneDict[chr][geneID][transcriptID]['exons'][0][3]
                for (chr,left,right,strand) in GeneDict[chr][geneID][transcriptID]['exons']:
                    coordinates.append(left)
                    coordinates.append(right)
                    TL += (right-left)
                if TL >= longestTranscript[1]:
                    longestTranscript = (transcriptID,TL)
                for (chr,left,right,strand) in GeneDict[chr][geneID][transcriptID]['CDS']:
                    coordinates.append(left)
                    coordinates.append(right)
                    TCDSL += (right-left)
                if TCDSL >= longestORF[1]:
                    longestORF = (transcriptID,TCDSL)
            if GeneDict[chr][geneID][longestTranscript[0]]['exons'][-1][2] - GeneDict[chr][geneID][longestTranscript[0]]['exons'][0][1] < minGeneLength:
                continue
                del GeneDict[chr][geneID] 
            if GeneDict[chr][geneID][longestTranscript[0]]['exons'][-1][2] - GeneDict[chr][geneID][longestTranscript[0]]['exons'][0][1] > maxGeneLength:
                continue
                del GeneDict[chr][geneID] 
            if longestORF[1] > 0:
                ORFLengths.append(longestORF[1])
            transcriptLengths.append(longestTranscript[1])
            NumInt = len(GeneDict[chr][geneID][longestTranscript[0]]['exons']) - 1 
            IntronNumbers.append(NumInt)
            if NumInt == 0:
                pass
            else:
                for i in range(NumInt):
                    (chr,left1,right1,strand) = GeneDict[chr][geneID][longestTranscript[0]]['exons'][i]
                    (chr,left2,right2,strand) = GeneDict[chr][geneID][longestTranscript[0]]['exons'][i+1]
                    IL = (left2 - right1)
                    if strand == '+':
                        if IntronLengthsPos.has_key(i + 1):
                            pass
                        else:
                            IntronLengthsPos[i + 1] = []
                        if IntronLengthsPos.has_key(0 - NumInt + i):
                            pass
                        else:
                            IntronLengthsPos[0 - NumInt + i] = []
                        IntronLengthsPos[i + 1].append(IL)
                        IntronLengthsPos[0 - NumInt + i].append(IL)
                    if strand == '-':
                        if IntronLengthsPos.has_key(NumInt - i):
                            pass
                        else:
                            IntronLengthsPos[NumInt - i] = []
                        if IntronLengthsPos.has_key(0 - i - 1):
                            pass
                        else:
                            IntronLengthsPos[0 - i - 1] = []
                        IntronLengthsPos[NumInt - i].append(IL)
                        IntronLengthsPos[0 - i - 1].append(IL)
            for i in range(NumInt + 1):
                (chr,left,right,strand) = GeneDict[chr][geneID][longestTranscript[0]]['exons'][i]
                EL = (right - left)
                if strand == '+':
                    if ExonLengthsPos.has_key(i + 1):
                        pass
                    else:
                        ExonLengthsPos[i+1] = []
                    if ExonLengthsPos.has_key(0 - (NumInt + 1) + i):
                        pass
                    else:
                        ExonLengthsPos[0 - (NumInt + 1) + i] = []
                    ExonLengthsPos[i + 1].append(EL)
                    ExonLengthsPos[0 - (NumInt + 1) + i].append(EL)
                if strand == '-':
                    if ExonLengthsPos.has_key((NumInt + 1) - i):
                        pass
                    else:
                        ExonLengthsPos[(NumInt + 1) - i] = []
                    if ExonLengthsPos.has_key(0 - i - 1):
                        pass
                    else:
                        ExonLengthsPos[0 - i - 1] = []
                    ExonLengthsPos[(NumInt + 1) - i].append(EL)
                    ExonLengthsPos[0 - i - 1].append(EL)
            if longestORF[1] > 0:
                leftUTR = 0
                rightUTR = 0
                leftCDScoordinate = GeneDict[chr][geneID][longestORF[0]]['CDS'][0][1]
                rightCDScoordinate = GeneDict[chr][geneID][longestORF[0]]['CDS'][-1][2]
                leftExoncoordinate = GeneDict[chr][geneID][longestORF[0]]['exons'][0][1]
                rightExoncoordinate = GeneDict[chr][geneID][longestORF[0]]['exons'][-1][2]
                if leftCDScoordinate == leftExoncoordinate:
                    pass
                else:
                    for (chr,left,right,strand) in GeneDict[chr][geneID][longestORF[0]]['exons']:
                        for i in range(left,right):
                            if i >= leftCDScoordinate:
                                break
                            else:
                                leftUTR += 1
                if rightCDScoordinate == rightExoncoordinate:
                    pass
                else:
                    for (chr,left,right,strand) in GeneDict[chr][geneID][longestORF[0]]['exons']:
                        for i in range(left,right):
                            if i <= rightCDScoordinate:
                                continue
                            else:
                                leftUTR += 1
                if strand == '+':
                    UTR5Lengths.append(leftUTR)
                    UTR3Lengths.append(rightUTR)
                if strand == '-':
                    UTR5Lengths.append(rightUTR)
                    UTR3Lengths.append(leftUTR)

    meanUTR5length = sum(UTR5Lengths)/(len(UTR5Lengths) + 0.0)
    meanUTR3length = sum(UTR3Lengths)/(len(UTR3Lengths) + 0.0)
    meanORFlength = sum(ORFLengths)/(len(ORFLengths) + 0.0)
    meanTlength = sum(transcriptLengths)/(len(transcriptLengths) + 0.0)
    meanIntronNumber = sum(IntronNumbers)/(len(IntronNumbers) + 0.0)
    outline = 'Mean Intron Number:\t' + str(meanIntronNumber)
    outfile.write(outline + '\n')
    meanIntronNumber = int(round(meanIntronNumber))

    MeanELDict = {}
    MeanILDict = {}

    for i in range(meanIntronNumber/2):
        MeanILDict[i+1] = sum(IntronLengthsPos[i+1])/(len(IntronLengthsPos[i+1]))
        MeanILDict[meanIntronNumber - i] = sum(IntronLengthsPos[-i-1])/(len(IntronLengthsPos[-i-1]))
    if meanIntronNumber % 2 == 1:
        MeanILDict[meanIntronNumber/2 + 1] = sum(IntronLengthsPos[meanIntronNumber/2 + 1])/(len(IntronLengthsPos[meanIntronNumber/2 + 1]))

    for i in range((meanIntronNumber + 1)/2):
        MeanELDict[i+1] = sum(ExonLengthsPos[i+1])/(len(ExonLengthsPos[i+1]))
        MeanELDict[meanIntronNumber + 1 - i] = sum(ExonLengthsPos[-i-1])/(len(ExonLengthsPos[-i-1]))
    if (meanIntronNumber + 1) % 2 == 1:
        MeanELDict[(meanIntronNumber + 1)/2 + 1] = sum(ExonLengthsPos[(meanIntronNumber + 1)/2 + 1])/(len(ExonLengthsPos[(meanIntronNumber + 1)/2 + 1]))

    UTR5EndExon = 0
    UTR3EndExon = meanIntronNumber + 1
    PE = 0
    for i in range(1,meanIntronNumber + 2):
        PE += MeanELDict[i]
        if PE > meanUTR5length:
            UTR5EndExon = i
            break
    PE = 0
    for i in range(1,meanIntronNumber + 2):
        PE += MeanELDict[meanIntronNumber + 2 - i]
        if PE > meanUTR3length:
            UTR3ndExon = meanIntronNumber + 2 - i
            UTR3ndExonPos = MeanELDict[meanIntronNumber + 2 - i] - (meanUTR3length - (PE - MeanELDict[meanIntronNumber + 2 - i]))
            break

    print 'meanIntronNumber', meanIntronNumber
    print 'MeanELDict', MeanELDict
    print 'MeanILDict', MeanILDict
    print 'meanUTR5length', meanUTR5length
    print 'meanUTR3length', meanUTR3length
    print 'UTR end point locations per exon', UTR5EndExon, UTR3ndExon

    PT = 0 
    PE = 0
    in5UTR = True
    in3UTR = False
    for i in range(meanIntronNumber + 1):
        i+=1
        if in5UTR:
            if i == UTR5EndExon:
                in5UTR = False
                outline = '5UTR' + '\t' + str(meanUTR5length - PE)
                CDSleftover = (PE + MeanELDict[i] - meanUTR5length)
                PT += (meanUTR5length - PE)
                PE += (meanUTR5length - PE)
                outline = outline + '\t' + str(PT)
                outfile.write(outline + '\n')
                PT += CDSleftover
                PE += CDSleftover
                outline = 'CDS' + '\t' + str(CDSleftover) + '\t' + str(PT)
                outfile.write(outline + '\n')
            else:
                PT += MeanELDict[i]
                PE += MeanELDict[i]
                outline = '5UTR' + '\t' + str(MeanELDict[i]) + '\t' + str(PT)
                outfile.write(outline + '\n')
        elif i == UTR3ndExon:
            in3UTR = True
            PE += UTR3ndExonPos
            PT += UTR3ndExonPos
            outline = 'CDS' + '\t' + str(UTR3ndExonPos) + '\t' + str(PT)
            outfile.write(outline + '\n')
            PE += (MeanELDict[i] - UTR3ndExonPos)
            PT += (MeanELDict[i] - UTR3ndExonPos)
            outline = '3UTR' + '\t' + str(MeanELDict[i] - UTR3ndExonPos) + '\t' + str(PT)
            outfile.write(outline + '\n')
        elif i > UTR3ndExon and in3UTR:
            PE += MeanELDict[i]
            PT += MeanELDict[i]
            outline = '3UTR' + '\t' + str(MeanELDict[i]) + '\t' + str(PT)
            outfile.write(outline + '\n')
        else:
            PE += MeanELDict[i]
            PT += MeanELDict[i]
            outline = 'CDS' + '\t' + str(MeanELDict[i]) + '\t' + str(PT)
            outfile.write(outline + '\n')
        if i < meanIntronNumber + 1:
            PT += MeanILDict[i]
            outline = 'intron' + '\t' + str(MeanILDict[i]) + '\t' + str(PT)
            outfile.write(outline + '\n')

    if meanIntronNumber == 0:
        outline = '3UTR' + '\t' + str(MeanELDict[i] - UTR3ndExonPos) + '\t' + str(PT)
        outfile.write(outline + '\n')
        
    
    FeatureCoverageDict = {}
    FeatureCoverageDict['upstream'] = {}
    FeatureCoverageDict['downstream'] = {}
    FeatureCoverageDict['UTR5'] = {}
    FeatureCoverageDict['CDS'] = {}
    FeatureCoverageDict['UTR3'] = {}
    FeatureCoverageDict['intron'] = {}
    FeatureCoverageDict['upstreamN'] = 0.0
    FeatureCoverageDict['downstreamN'] = 0.0
    FeatureCoverageDict['UTR5N'] = 0.0
    FeatureCoverageDict['CDSN'] = 0.0
    FeatureCoverageDict['UTR3N'] = 0.0
    FeatureCoverageDict['intronN'] = 0.0

    for i in range(-radius,0):
        FeatureCoverageDict['upstream'][i] = 0
    for i in range(0,radius + 1):
        FeatureCoverageDict['downstream'][i] = 0
    for i in range(int(round(meanUTR5length))):
        FeatureCoverageDict['UTR5'][i] = 0
    for i in range(int(round(meanUTR3length))):
        FeatureCoverageDict['UTR3'][i] = 0
    for i in range(int(round(PE - meanUTR3length - meanUTR5length))):
        FeatureCoverageDict['CDS'][i] = 0
    for i in range(int(round(PT - PE))):
        FeatureCoverageDict['intron'][i] = 0

    if wig.endswith('.bz2'):
        cmd = 'bzip2 -cd ' + wig
    elif wig.endswith('.gz'):
        cmd = 'gunzip -c ' + wig
    elif wig.endswith('.zip'):
        cmd = 'unzip -p ' + wig
    else:
        cmd = 'cat ' + wig
    p = os.popen(cmd, "r")
    line = 'line'
    currentChr = ''
    while line != '':
        line = p.readline().strip()
        if line == '':
            break
        if line.startswith('#') or line.strip() == '':
            continue
        fields = line.strip().split('\t')
        chr = fields[0]
        left = int(fields[1])
        right = int(fields[2])
        score = float(fields[3])
        if chr != currentChr:
            if currentChr == '':
                pass
            else:
                if GeneDict.has_key(currentChr):
                    FeatureCoverageDict = AssignScores(currentChr,FeatureCoverageDict,GeneDict,radius,CoverageDict)
                else:
                    pass
            CoverageDict = {}
            currentChr = chr
        if fields[3] == 'nan':
            continue
        for i in range(left,right):
            CoverageDict[i] = score

    if GeneDict.has_key(currentChr):
        FeatureCoverageDict = AssignScores(currentChr,FeatureCoverageDict,GeneDict,radius,CoverageDict)
    else:
        pass

    for i in range(-radius,0):
#        outline = str(i) + '\t' + str(FeatureCoverageDict['upstream'][i]/NGenes) + '\t' + 'upstream'
        outline = str(i) + '\t' + str(FeatureCoverageDict['upstream'][i]/FeatureCoverageDict['upstreamN']) + '\t' + 'upstream'
        outfile.write(outline + '\n')

#    print max(FeatureCoverageDict['UTR5'].keys())
#    print max(FeatureCoverageDict['UTR3'].keys())
#    print max(FeatureCoverageDict['CDS'].keys())

    print 'feature sizes:'
    print 'NGenes', NGenes
    print 'UTR5', (len(UTR5Lengths) + 0.0)
    print 'UTR3', (len(UTR3Lengths) + 0.0)
    for i in range(meanIntronNumber + 1):
        i+=1
        print i, len(ExonLengthsPos[i]), len(IntronLengthsPos[i])

    PUTR5 = 0 
    PUTR3 = 0 
    PT = 0
    PE = 0
    PCDS = 0
    in5UTR = True
    in3UTR = False
    for i in range(meanIntronNumber + 1):
        i+=1
        if in5UTR:
            if i == UTR5EndExon:
                in5UTR = False
                outline = '5UTR' + '\t' + str(meanUTR5length - PE)
                CDSleftover = (PE + MeanELDict[i] - meanUTR5length)
                for p in range(int(PE),int(meanUTR5length)-1):
                    outline = str(PT + p-PE) + '\t' + str(FeatureCoverageDict['UTR5'][p]/FeatureCoverageDict['UTR5N']) + '\t' + 'UTR5'
                    outfile.write(outline + '\n')
                if meanUTR5length >= 1:
                    for p in range(int(meanUTR5length)-1,int(meanUTR5length)):
                        outline = str(PT + p-PE) + '\t' + str(FeatureCoverageDict['UTR5'][p-1]/FeatureCoverageDict['UTR5N']) + '\t' + 'UTR5'
                        outfile.write(outline + '\n')
                PT += (meanUTR5length - PE)
                PE += (meanUTR5length - PE)
                for p in range(int(PE),int(PE + CDSleftover)):
                    outline = str(PT + p-PE) + '\t' + str(FeatureCoverageDict['CDS'][min(max(FeatureCoverageDict['CDS'].keys()),int(p - meanUTR5length))]/FeatureCoverageDict['CDSN']) + '\t' + 'CDS'
                    outfile.write(outline + '\n')
                PT += CDSleftover
                PE += CDSleftover
            else:
                for p in range(int(PE),int(PE + MeanELDict[i])-1):
                    outline = str(PT + p-PE) + '\t' + str(FeatureCoverageDict['UTR5'][p]/FeatureCoverageDict['UTR5N']) + '\t' + 'UTR5'
                    outfile.write(outline + '\n')
                for p in range(int(PE + MeanELDict[i])-1,int(PE + MeanELDict[i])):
                    outline = str(PT + p-PE) + '\t' + str(FeatureCoverageDict['UTR5'][p-1]/FeatureCoverageDict['UTR5N']) + '\t' + 'UTR5'
                    outfile.write(outline + '\n')
                PT += MeanELDict[i]
                PE += MeanELDict[i]
        elif i == UTR3ndExon:
            in3UTR = True
            for p in range(int(PE),int(len(FeatureCoverageDict['CDS'].keys()) + meanUTR5length)):
                outline = str(PT + p-PE - 1) + '\t' + str(FeatureCoverageDict['CDS'][int(p - meanUTR5length)]/FeatureCoverageDict['CDSN']) + '\t' + 'CDS'
                outfile.write(outline + '\n')
            PE += UTR3ndExonPos
            PT += UTR3ndExonPos
            for p in range(int(PE),int(PE - UTR3ndExonPos + MeanELDict[i])-1):
#                outline = str(PT + p-PE) + '\t' + str(FeatureCoverageDict['UTR3'][int(p - PE)]/NGenes) + '\t' + 'UTR3'
#                outline = str(PT + p-PE) + '\t' + str(FeatureCoverageDict['UTR3'][int(p - PE)]/(len(UTR3Lengths) + 0.0)) + '\t' + 'UTR3'
                outline = str(PT + p-PE) + '\t' + str(FeatureCoverageDict['UTR3'][int(p - PE)]/FeatureCoverageDict['UTR3N']) + '\t' + 'UTR3'
                outfile.write(outline + '\n')
            for p in range(int(PE - UTR3ndExonPos + MeanELDict[i]),int(PE - UTR3ndExonPos + MeanELDict[i])-1):
                outline = str(PT + p-PE) + '\t' + str(FeatureCoverageDict['UTR3'][int(p - PE-1)]/FeatureCoverageDict['UTR3N']) + '\t' + 'UTR3'
                outfile.write(outline + '\n')
            PE += (MeanELDict[i] - UTR3ndExonPos)
            PT += (MeanELDict[i] - UTR3ndExonPos)
        elif i > UTR3ndExon and in3UTR:
            PE += MeanELDict[i]
            PT += MeanELDict[i]
            for p in range(int(PE),int(PE + MeanELDict[i])-1):
#                outline = str(PT + p-PE) + '\t' + str(FeatureCoverageDict['UTR3'][int(p - len(FeatureCoverageDict['CDS'].keys()) - meanUTR5length)]/NGenes) + '\t' + 'UTR3'
#                outline = str(PT + p-PE) + '\t' + str(FeatureCoverageDict['UTR3'][int(p - len(FeatureCoverageDict['CDS'].keys()) - meanUTR5length)]/(len(UTR3Lengths) + 0.0)) + '\t' + 'UTR3'
                outline = str(PT + p-PE) + '\t' + str(FeatureCoverageDict['UTR3'][int(p - len(FeatureCoverageDict['CDS'].keys()) - meanUTR5length)]/FeatureCoverageDict['UTR3N']) + '\t' + 'UTR3'
                outfile.write(outline + '\n')
            for p in range(int(PE + MeanELDict[i])-1,int(PE + MeanELDict[i])):
                outline = str(PT + p-PE) + '\t' + str(FeatureCoverageDict['UTR3'][int(p - len(FeatureCoverageDict['CDS'].keys()) - meanUTR5length)-1]/FeatureCoverageDict['UTR3N']) + '\t' + 'UTR3'
                outfile.write(outline + '\n')
        else:
            for p in range(int(PE),int(PE + MeanELDict[i])):
#                outline = str(PT + p-PE) + '\t' + str(FeatureCoverageDict['CDS'][int(p - meanUTR5length)]/NGenes) + '\t' + 'CDS'
#                outline = str(PT + p-PE) + '\t' + str(FeatureCoverageDict['CDS'][int(p - meanUTR5length)]/(len(ExonLengthsPos[1]) + 0.0)) + '\t' + 'CDS'
                outline = str(PT + p-PE) + '\t' + str(FeatureCoverageDict['CDS'][int(p - meanUTR5length)]/FeatureCoverageDict['CDSN']) + '\t' + 'CDS'
                outfile.write(outline + '\n')
            PE += MeanELDict[i]
            PT += MeanELDict[i]
        if i <= meanIntronNumber:
            for p in range(int(PT),int(PT + MeanILDict[i])-1):
#                outline = str(p - 1) + '\t' + str(FeatureCoverageDict['intron'][int(p - PE)]/NGenes) + '\t' + 'intron'
#                outline = str(p - 1) + '\t' + str(FeatureCoverageDict['intron'][int(p - PE)]/(len(IntronLengthsPos[i]) + 0.0)) + '\t' + 'intron'
                outline = str(p - 1) + '\t' + str(FeatureCoverageDict['intron'][int(p - PE)]/FeatureCoverageDict['intronN']) + '\t' + 'intron'
                outfile.write(outline + '\n')
            for p in range(int(PT + MeanILDict[i])-1,int(PT + MeanILDict[i])):
                outline = str(p - 1) + '\t' + str(FeatureCoverageDict['intron'][int(p - PE)-1]/FeatureCoverageDict['intronN']) + '\t' + 'intron'
                outfile.write(outline + '\n')
            PT += MeanILDict[i]

    for i in range(1,radius):
#        outline = str(int(PT) + i) + '\t' + str(FeatureCoverageDict['downstream'][i]/NGenes) + '\t' + 'downstream'
        outline = str(int(PT) + i) + '\t' + str(FeatureCoverageDict['downstream'][i]/FeatureCoverageDict['downstreamN']) + '\t' + 'downstream'
        outfile.write(outline + '\n')

    outfile.close()

run()

