##################################
#                                #
# Last modified 12/18/2011       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string
import random
import copy
import time
from sets import Set

def run():

    if len(sys.argv) < 5:
        print 'usage: python %s inputfilename datafields iterations minValue outputfilename [-fixedorder]' % sys.argv[0]
        print '       Note: datafields are to be comma-separated, for example: 1,2,3,4,5, or if a large range is wanted, by a dash, for example: 1-15,17-20, (this will include the last number and is 0-based)'
        print '       if you used the -fixedorder option, make sure the order the datafileds are listed is the same as the order they should appear in the outputfile'
        print '       Note2: if a FAIL status is in a field instead of a number, 0 will be assigned'
        sys.exit(1)

    
    input = sys.argv[1]
    datafields = sys.argv[2]
    iterations = int(sys.argv[3])
    minValue = float(sys.argv[4])
    outfilename = sys.argv[5]

    doFixedOrder=False
    if '-fixedorder' in sys.argv:
        doFixedOrder=True
        print 'will not permute but output the saturation in the order the datafields are supplied'

    if doFixedOrder:
        fieldsList=[]
        fields = datafields.split(',')
        for field in fields:
            if '-' in field:
                positions=field.split('-')
                if len(positions) > 2:
                    print 'improper datafields format, exiting'
                    sys.exit(1)
                else:
                    for i in range(int(positions[0]),int(positions[1])+1):
                        fieldsList.append(int(i))
            else:
                fieldsList.append(int(field))

        print len(fieldsList), 'fields:'
        print fieldsList

        DataDict={}

        outfile = open(outfilename, 'w')
    
        lineslist=open(input)
        c=0
        for line in lineslist:
            if line.startswith('#'):
                continue
            fields=line.strip().split('\t')
            data=[]
            for i in fieldsList:
                if 'FAIL' in fields[i]:
                    data.append(0)
                else:
                    data.append(float(fields[i]))
            DataDict[c]=data
            c+=1

        OutputDict={}
        i=1
        OutputDict[i]={}
        data={}
        for v in DataDict.keys():
            data[v]=DataDict[v]
        countsDict={}
        c=0
        countsDict[c]=0
        for j in range(len(fieldsList)):
            c+=1
            countsDict[c]=countsDict[c-1]
            for p in data.keys():
                if data[p][j] >= minValue:
                    countsDict[c]+=1
                    del data[p]
            print c, countsDict[c]
        print countsDict
        for d in countsDict.keys():
            OutputDict[i][d]=countsDict[d]
    else:
        fieldsList=[]
        fields = datafields.split(',')
        for field in fields:
            if '-' in field:
                positions=field.split('-')
                if len(positions) > 2:
                    print 'improper datafields format, exiting'
                    sys.exit(1)
                else:
                    for i in range(int(positions[0]),int(positions[1])+1):
                        fieldsList.append(int(i))
            else:
                fieldsList.append(int(field))

        fieldsList=list(Set(fieldsList))

        print len(fieldsList), 'fields:'
        print fieldsList

        DataDict={}

        outfile = open(outfilename, 'w')
    
        lineslist=open(input)
        c=0
        for line in lineslist:
            if line.startswith('#'):
                continue
            fields=line.strip().split('\t')
            data=[]
            for i in fieldsList:
                if fields[i] == 'FAIL':
                    data.append(0)
                else:
                    data.append(float(fields[i]))
            DataDict[c]=data
            c+=1

        OutputDict={}

        for i in range(iterations):
            t0 = time.time()
            OutputDict[i]={}
            print 'iteration', i
            fields=[]
            for y in range(len(fieldsList)):
                fields.append(y)
            random.shuffle(fields)
            data={}
            for v in DataDict.keys():
                data[v]=DataDict[v]
#            data = copy.copy(DataDict)
            countsDict={}
            c=0
            countsDict[0]=0
            for j in range(len(fields)):
#                print c, (len(data.keys()))
                c+=1
                countsDict[c]=countsDict[c-1]
                for p in data.keys():
                    try:
                        if data[p][fields[j]] >= minValue:
                            countsDict[c]+=1
                            del data[p]
                    except:
                        print data[p]
                        print fields[j]
                        print data[p][fields[j]]
                        print len(data[p])
                        print j
                        print len(fields)
                        print fields
                        sys.exit(1)
            for d in countsDict.keys():
                OutputDict[i][d]=countsDict[d]
            print time.time() - t0, "seconds"

    outline='#'
    for i in range(1,len(fieldsList)+1):
        outline=outline+'\t'+str(i)
    outfile.write(outline+'\n')

    keys=OutputDict.keys()
    keys.sort()

    for i in keys:
        outline=str(i)
        for j in range(1,len(fieldsList)+1):
            outline=outline+'\t'+str(OutputDict[i][j])            
        outfile.write(outline+'\n')

    outfile.close()
   
run()
