##################################
#                                #
# Last modified 2016/08/27       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string
# import scipy.stats
# import numpy
import math
import random
from sets import Set
import time

def run():

    if len(sys.argv) < 4:
        print 'usage: python %s DirectRNASeqSimulation-output parameters threshold outfile' % sys.argv[0]
        print '\tAssumed input format:'
        print '\t#gene\toriginal_FPKM\toriginal_TPM\tSampled_Reads\tSampled_TPM\tWithin_Range?'
        print '	parameters format: TPM1,TPM2,TPM3...TPMN (genes with values less than the first one will be omitted)'
        print '	threshold: maximum fractional difference from final value, for example 0.05)'
        sys.exit(1)

    inputfilename = sys.argv[1]
    parameters = sys.argv[2]
    threshold = float(sys.argv[3])
    outputfilename = sys.argv[4]
    ParamList=[]
    ParamList.append(0.0)
    NomoDict={}
    fields=parameters.split(',')
    for p in fields:
        ParamList.append(float(p))
        NomoDict[float(p)] = {}
        NomoDict[float(p)][0] = 0
        NomoDict[float(p)][1] = 0

    ParamList = list(Set(ParamList))
    ParamList.sort()

    print ParamList

    lineslist = open(inputfilename)
    for line in lineslist:
        fields=line.strip().split('\t')
        if line[0]=='#':
            continue
        TPM = float(fields[2])
        if TPM >= max(ParamList): 
            param=max(ParamList)
        else:
            for p in ParamList:
                if TPM >= p and TPM < ParamList[ParamList.index(p)+1]:
                    param=p
                    break
        WithinRange = fields[5]
        if WithinRange == '0':
            NomoDict[param][0] += 1
        if WithinRange == '1':
            NomoDict[param][1] += 1
            
    ParamList.sort()

    print NomoDict
    print ParamList

    outfile = open(outputfilename, 'w')

    outline = '#TPM\tNumber_entries\tWithin_Range\tOutside_Range\tFraction_within_range'
    outfile.write(outline + '\n')

    for param in ParamList:
        outline = str(param) + '\t' + str(NomoDict[param][1] + NomoDict[param][0])
        outline = outline + '\t' + str(NomoDict[param][1]) + '\t' + str(NomoDict[param][0])
        if NomoDict[param][1] + NomoDict[param][0] > 0:
            f = NomoDict[param][1]/(NomoDict[param][1] + NomoDict[param][0] + 0.0)
        else:
            f = 'nan'
        outline = outline + '\t' + str(f)
        outfile.write(outline+'\n')

    outfile.close()

run()