##################################
#                                #
# Last modified 09/09/2011       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string
import math

def run():

    if len(sys.argv) < 4:
        print 'usage: python %s repeatMask chrFiledID labelFields outputfilename' % sys.argv[0]
        print '       Label fields some combination of the three levels of annotation'
        sys.exit(1)
    
    input = sys.argv[1]
    chtFieldID = int(sys.argv[2])
    fields = sys.argv[3].split(',')
    labelFields = []
    for ID in fields:
        labelFields.append(int(ID))
    labelFields.sort()
    outfilename = sys.argv[4]

    RepeatDict={}

    lineslist = open(input)
    i=0
    for line in lineslist:
        i+=1
        if i % 1000000 == 0:
            print i, 'lines processed'
        if line[0]=='#':
            continue
        fields=line.strip().split('\t')
        repeat=[]
        for ID in labelFields:
            repeat.append(fields[ID])
        repeat = tuple(repeat)
        if RepeatDict.has_key(repeat):
            pass
        else:
            RepeatDict[repeat] = 0
        RepeatDict[repeat] += (int(fields[chtFieldID+2]) - int(fields[chtFieldID+1]))

    outfile = open(outfilename,'w')

    for repeat in RepeatDict.keys():
        repeatlist = list(repeat)
        outline = ''
        for label in repeatlist:
            outline = outline + label + '::'
        outline = outline[0:-2] + '\t' + str(RepeatDict[repeat])
        outfile.write(outline + '\n')

    outfile.close()
   
run()
