##################################
#                                #
# Last modified 05/18/2011       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys

def run():

    if len(sys.argv) < 3:
        print 'usage: python %s list_of_bedRPKM_rawreadCount_files expected_chrCounts outifle' % sys.argv[0]
        print '     list_of_bedRPKM_rawreadCount_files format: label <tab> bedRPKM_file'
        print '     bedRPKM_rawreadCount_file format: chr <tab> 0 <tab> chrLength <tab> <reads>'
        print '     bedRPKM_rawreadCount_file format: produced by running bedRPKM.py with the following options: -rawreadNumber -cache 50000000 -nomulti'
        sys.exit(1)

    input = sys.argv[1]
    expected_counts = sys.argv[2]
    outputfilename = sys.argv[3]

    outfile = open(outputfilename, 'w')

    ChrDict={}
    lineslist = open(expected_counts)
    TotalLength=0.0
    for line in lineslist:
        if line.startswith('#'):
            continue
        fields=line.strip().split('\t')
        chr=fields[0]
        counts=int(fields[1])
        if ChrDict.has_key(chr):
            pass
        else:
            ChrDict[chr]={}
        TotalLength+=counts
        ChrDict[chr]['EffectiveLength']=counts

    for chr in ChrDict:
        ChrDict[chr]['ExpectedReadFraction']=ChrDict[chr]['EffectiveLength']/(TotalLength)

    keys=ChrDict.keys()
    keys.sort()
    outline='#label'
    for chr in keys:
        outline = outline + '\t' + chr
    outfile.write(outline+'\n')

    lineslist = open(input)
    for line in lineslist:
        SampleChrDict={}
        TotalReads=0
        fields=line.strip().split('\t')
        label=fields[0]
        file=fields[1]
        lines=open(file)
        lines=list(lines)
        if len(lines) == 0:
            continue
        for line2 in lines:
            fields2=line2.strip().split('\t')
            chr=fields2[0]
            if ChrDict.has_key(chr):
                pass
            else:
                continue
            reads=int(float(fields2[3]))
            TotalReads+=reads
            SampleChrDict[chr]=reads
        outline=label
        for chr in keys:
            expectedReads = ChrDict[chr]['ExpectedReadFraction']*TotalReads
            try:
                score= (SampleChrDict[chr]/expectedReads)-1
            except:
                print 'problem with', label, chr, SampleChrDict[chr], expectedReads
                scroe=0
            outline = outline + '\t' + str(score)
        outfile.write(outline+'\n')

    outfile.close()

run()

