##################################
#                                #
# Last modified 10/26/2011       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string
import math
from sets import Set

def run():

    if len(sys.argv) < 4:
        print 'usage: python %s input fieldID values_file outfilename ' % sys.argv[0]
        sys.exit(1)

    junctions = sys.argv[1]
    fieldID = int(sys.argv[2])
    values = sys.argv[3]
    outfilename = sys.argv[4]

    CountsDict={}
    CountsDict['others']=0
    lineslist  = open(values)
    for line in lineslist:
        fields = line.strip().split('\t')
        CountsDict[fields[0]]=0

    JunctionsDict={}
    lineslist  = open(junctions)
    for line in lineslist:
        fields = line.strip().split('\t')
        N = fields[fieldID]
        if CountsDict.has_key(N):
            CountsDict[N]+=1.0
        else: 
            CountsDict['others']+=1

    outfile = open(outfilename, 'w')

    outfile.write('#\tfraction\n')  

    TotalCounts=0
    for N in CountsDict.keys():
        TotalCounts+=CountsDict[N]

    keys=CountsDict.keys()
    keys.sort()

    for N in keys:
        outline = N + '\t' +str(CountsDict[N]/TotalCounts)
        print outline
        outfile.write(outline+'\n')

    outfile.close()
        
run()

