##################################
#                                #
# Last modified 08/10/2010       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string
import math
from sets import Set

def run():

    if len(sys.argv) < 4:
        print 'usage: python %s  datafilename fieldIDs ExpressionValues outfilename -top num1,num2,num3..numN' % sys.argv[0]
        print '	fieldIDs and ExpressionValues should be comma separated' 
        print '	if using the -top option, the first number referse to the top Num1 values, the second refers to the top Num2, from which the first top Num1 will be excluded, etc.' 

        sys.exit(1)

    datafilename = sys.argv[1]
    fields = sys.argv[2].split(',')
    IDfields=[]
    for ID in fields:
        IDfields.append(int(ID))
    fields = sys.argv[3].split(',')
    binList=[]
    for value in fields:
        binList.append(float(value))
    binList.append(0)
    outfilename = sys.argv[4]

    doTop=False
    if '-top' in sys.argv:
        doTop=True
        fields=sys.argv[sys.argv.index('-top')+1].split(',')
        binList=[]
        for value in fields:
            binList.append(int(value))

    binList=list(Set(binList))
    LabelDict={}
    DataDict={}

    outfile = open(outfilename, 'w')

    HistDict={}
    lineslist  = open(datafilename)
    i=0
    for line in lineslist:
        i+=1
        if i % 100000 == 0:
            print i
        if line[0]=='#':
            fields=line.strip().split('\t')
            for ID in IDfields:
                LabelDict[ID]=fields[ID]
                DataDict[fields[ID]]=[]
            continue
        fields = line.strip().split('\t')
        for ID in IDfields:
            DataDict[LabelDict[ID]].append(fields[ID])

    IDkeys=DataDict.keys()
    IDkeys.sort()
    HistDict={}
    for ID in IDkeys:
        HistDict[ID]={}
        for bin in binList:
            HistDict[ID][bin]=0

    binList.sort()   
    if doTop:
        for ID in IDkeys:
            ExprList=[]
            print ID
            for value in DataDict[ID]:
                ExprList.append(float(value))
            ExprList.sort()
            ExprList.reverse()
            previous=0
            print ExprList[0:20]
            for top in binList:
                print previous, top
                for num in ExprList[previous:top]:
                    HistDict[ID][top]+=num
                previous=top
    else:
        for ID in IDkeys:
            print ID
            for value in DataDict[ID]:
                 num=float(value)
                 if num > binList[-1]:
                     HistDict[ID][binList[-1]]+=num
                 else:
                     for bin in binList[0:-1]:
                         if num >=bin and num < binList[binList.index(bin)+1]:
                             HistDict[ID][bin]+=num
                             continue

    outline='#'
    for bin in binList:
        outline=outline+'\t'+str(bin)
    outfile.write(outline+'\n')
    for ID in IDkeys:
        outline=ID
        for bin in binList:
            outline=outline+'\t'+str(HistDict[ID][bin])
        outfile.write(outline+'\n')
        
    outfile.close()
        
run()

