##################################
#                                #
# Last modified 10/31/2014       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string
import math
import numpy

def run():

    if len(sys.argv) < 4:
        print 'usage: python %s input labelfields valuefields outputfilename [-minMax Value] [-maxMax Value] [-normToMean]' % sys.argv[0]
        print '\tvaluefields format: either comma separated, or start:end (including start and end, 0-based)'
        print '\tthe -min and -max options will filter out all lines the maximum value of which (within the specified fields) does not match the required values'
        sys.exit(1)
    
    input = sys.argv[1]
    outfilename = sys.argv[4]
    outfile = open(outfilename, 'w')

    fields = sys.argv[2].split(',')
    labelFields=[]
    for f in fields:
        labelFields.append(int(f))
    labelFields.sort()

    print labelFields

    valueFields=[]
    if ':' in sys.argv[3]:
        fields = sys.argv[3].split(':')
        start = int(fields[0])
        end = int(fields[1])
        for f in range(start,end+1):
            valueFields.append(f)
    else:
        fields = sys.argv[3].split(',')
        for f in fields:
            valueFields.append(int(f))
    valueFields.sort()

    doNTM = False
    if '-normToMean' in sys.argv:
        doNTM = True
        print 'will normalize to the mean instead of the stdv'

    print valueFields

    doMin = False
    if '-minMax' in sys.argv:
        minValue = float(sys.argv[sys.argv.index('-minMax')+1])
        doMin = True
    doMax = False
    if '-maxMax' in sys.argv:
        maxValue = float(sys.argv[sys.argv.index('-maxMax')+1])
        doMax = True

    DataDict={}

    linelist = open(input)
    for line in linelist:
        fields=line.replace('\x00','').strip().split('\t')
        if line.startswith('#') or line.startswith('tracking_id'):
            if len(fields) < max(valueFields):
                continue
            outline = '#'
            for ID in labelFields:
                outline = outline + fields[ID] + '\t'
            for ID in valueFields:
                outline = outline + fields[ID] + '\t'
            outfile.write(outline.strip()+'\n')
            continue
        outline = ''
        for ID in labelFields:
            outline = outline + fields[ID] + '\t'
        values = []
        for ID in valueFields:
            values.append(float(fields[ID]))
        if doMin:
            if max(values) < minValue:
                continue
        if doMax:
            if max(values) > maxValue:
                continue
        arr = numpy.array(values)
        arrMean = numpy.mean(arr)
        arrStd = numpy.std(arr)
        for ID in valueFields:
            X = float(fields[ID])
            if doNTM:
                normX = (X - arrMean)/arrMean
            else:
                normX = (X - arrMean)/arrStd
            outline = outline + str(normX) + '\t'
        outfile.write(outline.strip() + '\n')

    outfile.close()
   
run()
