##################################
#                                #
# Last modified 2017/09/03       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string
import math
import os

def run():

    if len(sys.argv) < 3:
        print 'usage: python %s input fieldID outfilename [-step N] [-totalSteps N]' % sys.argv[0]
        sys.exit(1)
    
    input = sys.argv[1]
    fieldID = int(sys.argv[2])
    outfilename = sys.argv[3]

    step = 1
    if '-step' in sys.argv:
        step = int(sys.argv[sys.argv.index('-step')+1])
    if '-totalSteps' in sys.argv:
        totalSteps = int(sys.argv[sys.argv.index('-totalSteps')+1])
        L = 0.0
        if input.endswith('.bz2'):
            cmd = 'bzip2 -cd ' + input
        elif input.endswith('.gz'):
            cmd = 'zcat ' + input
        else:
            cmd = 'cat ' + input
        p = os.popen(cmd, "r")
        line = 'line'
        while line != '':
            line = p.readline()
            if line == '':
                break
            if line.startswith('#'):
                continue
            L+=1
        step = L/totalSteps

    DataList=[]

    if input.endswith('.bz2'):
        cmd = 'bzip2 -cd ' + input
    elif input.endswith('.gz'):
        cmd = 'zcat ' + input
    else:
        cmd = 'cat ' + input
    p = os.popen(cmd, "r")
    line = 'line'
    while line != '':
        line = p.readline()
        if line == '':
            break
        if line.startswith('#'):
            continue
        fields=line.strip().split('\t')
        try:
            if fields[fieldID] != 'nan':
                DataList.append(float(fields[fieldID]))
        except:
            continue

    outfile = open(outfilename, 'w')

    DataList.sort()

    d = 0
    while d < len(DataList):
        D = int(d)
        outline = str(DataList[D]) + '\t' + str(d/(len(DataList) + 0.0))
        outfile.write(outline + '\n')
        d += step
    outfile.close()
   
run()
