##################################
#                                #
# Last modified 2017/09/11       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
from sets import Set

def run():

    if len(sys.argv) < 4:
        print 'usage: python %s inputfilename fieldID value_bins outfileprefix' % sys.argv[0]
        print '       value bins should be comma separated'
        sys.exit(1)

    inputfilename = sys.argv[1]
    fieldID = int(sys.argv[2])
    outfileprefix = sys.argv[4]

    bins=sys.argv[3]
    bins=bins.split(',')
    binList=[]
    print bins
    for bin in bins:
        binList.append(float(bin))
    binList.append(0)
    binList = list(Set(binList))
    binList.sort()

    OutFileDict={}
    for bin in binList:
        OutFileDict[bin] = open(outfileprefix+'.' + str(bin), 'w')

    input_stream = open(inputfilename)
    i=0
    header = ''
    for line in input_stream:
        i+=1
        if i % 1000000 == 0:
            print i, 'lines processed'
        if line.startswith('#'):
            header = line
            for bin in binList:
                OutFileDict[bin].write(line)
            continue
        fields=line.strip().split('\t')
        value=float(fields[fieldID])
        bin = ''
        if value >= max(binList):
            bin = max(binList)
        elif value < min(binList):
            bin = min(binList)
        else:
            for j in range(len(binList)-1):
                if value >= binList[j] and value < binList[j+1]:
                    bin = binList[j]
                    break
        OutFileDict[bin].write(line)

    for bin in OutFileDict.keys():
        OutFileDict[bin].close()
        
run()

