##################################
#                                #
# Last modified 2018/08/06       #
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string
import math
import random
from sets import Set
import time

def run():

    if len(sys.argv) < 2:
        print 'usage: python %s config output' % sys.argv[0]
        print '\tconfig format:'
        print '\ttype("plasmid"/"exp")\texp_label\t\tguide_field_ID\tcounts_field_ID\tfilename'
        sys.exit(1)

    config = sys.argv[1]
    outfilename = sys.argv[2]

    TotalCountsDict = {}
    TotalCountsDict['plasmid'] = {}
    TotalCountsDict['exp'] = {}
    GuideDict = {}

    configlinelist = open(config)
    for configline in configlinelist:
        cfields = configline.strip().split('\t')
        type = cfields[0]
        label = cfields[1]
        sgID = int(cfields[2])
        countID = int(cfields[3])
        filename = cfields[4]
        linelist = open(filename)
        for line in linelist:
            if line.startswith('#'):
                continue
            fields = line.strip().split('\t')
            sgRNA = fields[sgID]
            counts = int(fields[countID])
            if GuideDict.has_key(sgRNA):
                pass
            else:
                GuideDict[sgRNA] = {}
                GuideDict[sgRNA]['plasmid'] = {}
                GuideDict[sgRNA]['exp'] = {}
            GuideDict[sgRNA][type][label] = counts
            if TotalCountsDict[type].has_key(label):
                pass
            else:
                TotalCountsDict[type][label] = 0
            TotalCountsDict[type][label] += counts

    outfile = open(outfilename, 'w')

    outline = '#sgRNA' + '\t' + 'plasmid_counts'
    for label in TotalCountsDict['exp'].keys():
        outline = outline + '\t' + label + '_counts'
        outline = outline + '\t' + label + '_lFC'
    outfile.write(outline + '\n')

    for sgRNA in GuideDict.keys():
        plabel = TotalCountsDict['plasmid'].keys()[0]
        if GuideDict[sgRNA]['plasmid'].has_key(plabel):
            pass
        else:
            GuideDict[sgRNA]['plasmid'][plabel] = 1
        PlasmidCounts = GuideDict[sgRNA]['plasmid'][plabel]/(TotalCountsDict['plasmid'][plabel]/1e6)
        outline = sgRNA + '\t' + str(PlasmidCounts)
        for label in TotalCountsDict['exp'].keys():
            if GuideDict[sgRNA]['exp'].has_key(label):
                pass
            else:
                GuideDict[sgRNA]['exp'][label] = 1
            LabelCounts = GuideDict[sgRNA]['exp'][label]/(TotalCountsDict['exp'][label]/1e6)
            logFC = math.log(LabelCounts/PlasmidCounts,2)
            outline = outline + '\t' + str(LabelCounts) + '\t' + str(logFC)
        outfile.write(outline + '\n')
        
    outfile.close()

run()
