##################################
#                                #
# Last modified 2025/04/17       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import gzip
from sets import Set

def run():

    if len(sys.argv) < 4:
        print 'usage: python %s inputfilename fieldID N_quantiles outfileprefix' % sys.argv[0]
        sys.exit(1)

    inputfilename = sys.argv[1]
    fieldID = int(sys.argv[2])
    N = int(sys.argv[3])
    outfileprefix = sys.argv[4]

    OutList = []

    if inputfilename.endswith('.gz'):
        input_stream = gzip.open(inputfilename)
    else:
        input_stream = open(inputfilename)
    i=0
    header = ''
    for line in input_stream:
        i+=1
        if i % 1000000 == 0:
            print i, 'lines processed'
        if line.startswith('#'):
            header = line
            continue
        fields = line.strip().split('\t')
        value = float(fields[fieldID])
        OutList.append((value,line))

    OutList.sort()
    OutList.reverse()

    L = len(OutList)
    step = (L - (L % N))/N

    i = 0
    for Q in range(N):
        outfile = open(outfileprefix + '.Q' + str(Q+1), 'w')
        print outfileprefix + '.Q' + str(Q+1), Q*step, Q*step + step
        for i in range(Q*step, Q*step + step):
            outfile.write(OutList[i][1])
        outfile.close()

run()

