##################################
#                                #
# Last modified 09/57/2011       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string
import math
from sets import Set

def run():

    if len(sys.argv) < 3:
        print 'usage: python %s constructs list-of-intersect1-files outputfilename [-gtfexons] [-singlebpwig] [-nomulti] [-chr chrN]' % sys.argv[0]
        print '       list of intersect1 files format: label <tab> file'
        sys.exit(1)
    
    bed = sys.argv[1]
    list_of_files = sys.argv[2]
    outfilename = sys.argv[3]

    regionDict={}
    overlapDict={}
    
    lineslist = open(bed)
    for line in lineslist:
        if line[0]=='#':
            continue
        fields=line.strip().split('\t')
        prefix_fields = fields[0].split('_')
        prefix=''
        for i in range(len(prefix_fields)-1):
            prefix = prefix + prefix_fields[i] + '_'
        prefix = prefix[0:-1]
        if regionDict.has_key(prefix):
            pass
        else:
            overlapDict[prefix]={}
            regionDict[prefix]=0
        regionDict[prefix]+=1

    outfile = open(outfilename,'w')

    outline = '#construct_class\tnumber\t'

    linelist = open(list_of_files)
    labels = []
    for line in linelist:
        fields=line.strip().split('\t')
        label = fields[0]
        labels.append(label)
        file = fields[1]
        lines = open(file)
        for line in lines:
            if line[0]=='#':
                continue
            fields=line.strip().split('\t')
            prefix_fields = fields[0].split('_')
            prefix=''
            for i in range(len(prefix_fields)-1):
                prefix = prefix + prefix_fields[i] + '_'
            prefix = prefix[0:-1]
            if overlapDict[prefix].has_key(label):
                pass
            else:
                overlapDict[prefix][label]=0
            overlapDict[prefix][label] +=1

    labels.sort()

    for label in labels:
        outline = outline + label + '\t'
    outfile.write(outline.strip() + '\n')

    prefixes = overlapDict.keys()
    prefixes.sort()
    for prefix in prefixes:
        outline = prefix + '\t' +str(regionDict[prefix])
        for label in labels:
            if overlapDict[prefix].has_key(label):
                outline = outline + '\t' + str(overlapDict[prefix][label])
            else:
                outline = outline + '\t' + '0'
        outfile.write(outline +'\n')
          
    outfile.close()
   
run()
