##################################
#                                #
# Last modified 2022/10/18       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string
import os

def run():

    if len(sys.argv) < 4:
        print 'usage: python %s list_of_files fieldID chunksize outfileprefix [-list_of_files prefix]' % sys.argv[0]
        sys.exit(1)

    inputfilename = sys.argv[1]
    fieldID = int(sys.argv[2])
    chunksize = int(sys.argv[3])
    outfileprefix = sys.argv[4]

    SB = '\t'
#    if '-splitBy' in sys.argv:
#       SB = sys.argv[sys.argv.index('-splitBy') + 1]

    LoFprefix = ''
    if '-list_of_files' in sys.argv:
       LoFprefix = sys.argv[sys.argv.index('-list_of_files') + 1]

    FileList = []

    outfileDict={}
    header = ''
    if inputfilename.endswith('.bz2'):
        cmd = 'bzip2 -cd ' + inputfilename
    elif inputfilename.endswith('.gz') or inputfilename.endswith('.bgz'):
        cmd = 'zcat ' + inputfilename
    else:
        cmd = 'cat ' + inputfilename
    p = os.popen(cmd, "r")
    line = 'line'
    i=0
    while line != '':
        line = p.readline()
        if line == '':
            break
        if line.startswith('#'):
            header = line
            continue
        fields = line.strip().split(SB)
        FileList.append(LoFprefix + fields[fieldID])

    chunks = int(len(FileList)/chunksize)
    print len(FileList), chunks

    for i in range(chunks+1):
        cmd = 'mkdir ' + outfileprefix + str(i)
        os.system(cmd)
        
    i = 0
    for filename in FileList:
        chunk = i/chunksize
        cmd = 'cp ' + filename + ' ' + outfileprefix + str(chunk)
        os.system(cmd)
        if i % chunksize == 0:
             print chunk
        i+=1

        
run()

