##################################
#                                #
# Last modified 01/06/2013       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string
import math

def run():

    if len(sys.argv) < 4:
        print 'usage: python %s bedfilename chrFieldID IDfield(s) outputfilename' % sys.argv[0]
        print 'IDfields comma separated'
        sys.exit(1)
    
    bedfile = sys.argv[1]
    chrFieldID = int(sys.argv[2])
    IDfieldIDs = []
    fields = sys.argv[3].split(',')
    for ID in fields:
        IDfieldIDs.append(int(ID))
    IDfieldIDs.sort()
    outfilename = sys.argv[4]

    print outfilename

    FeatureDict={}

    listoflines = open(bedfile)
    i=0
    for line in listoflines:
        if line[0]=='#':
            continue
        i+=1
        if i % 100000 == 0:
            print str(i/1000000.) + 'M lines processed'
        fields=line.split('\n')[0].split('\t')
        chr = fields[chrFieldID]
        left=int(fields[chrFieldID+1])
        right=int(fields[chrFieldID+2])
        FeatureID = []
        for ID in IDfieldIDs:
            FeatureID.append(fields[ID])
        FeatureID = tuple(FeatureID)
        if FeatureDict.has_key(FeatureID):
            pass
        else:
           FeatureDict[FeatureID]=0
        FeatureDict[FeatureID]+=(right-left)
        
    keys = FeatureDict.keys()
    keys.sort()

    outfile = open(outfilename, 'w')
    
    for FeatureID in keys:
        FeatureIDList = list(FeatureID)
        outline = ''
        for ID in FeatureIDList:
            outline = outline + ID + '\t'
        outline = outline + str(FeatureDict[FeatureID])
        outfile.write(outline + '\n')

    outfile.close()
   
run()
