##################################
#                                #
# Last modified 09/07/2010       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import math

try:
	import psyco
	psyco.full()
except:
	pass

def run():

    if len(sys.argv) < 2:
        print 'usage: python %s inputfilename outputfilename [-first N]' % sys.argv[0]
        sys.exit(1)

    inputfilenames = sys.argv[1]
    outputfilename = sys.argv[2]
    doFirst=False
    if '-first' in sys.argv:
        doFirst=True
        first=int(sys.argv[sys.argv.index('-first')+1])

    outfile = open(outputfilename, 'w')

    Number=0.0
    Sum=0.0
    lineslist = open(inputfilenames)
    CountsDict={}
    i=0
    for line in lineslist:
        if line.startswith('#'):
            continue
        i+=1
        if doFirst and i > first:
            print 'skipping'
            break
        outfile.write(line)
        if line.startswith('length'):
            continue
        fields=line.strip().split('\t')
        if len(fields)<2:
            continue
        insert=int(float(fields[0]))
        counts=int(fields[1])
        Number+=counts
        Sum+=(insert*counts)
        CountsDict[insert]=counts

    mean=Sum/Number
    stdDevSum=0
    for insert in CountsDict.keys():
        stdDevSum+=(CountsDict[insert]*(insert-mean)*(insert-mean))
    stdDev=math.sqrt(stdDevSum/Number)
    
    outline='#mean = ' + str(mean)
    outfile.write(outline+'\n')
    outline='#st. dev = ' + str(stdDev)
    outfile.write(outline+'\n')

    outfile.close()

run()

