##################################
#                                #
# Last modified 04/27/2011       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string

def run():

    if len(sys.argv) < 2:
        print 'usage: python %s input outputfilename' % sys.argv[0]
        print '       input file in bowtie format'
        print '       only unique reads are considered for building clusters'
        sys.exit(1)
    
    input = sys.argv[1]
    outfilename = sys.argv[2]

    coverageDict={}
    cumSumCoverage=0.0
    i=0
    linelist = open(bowtie)
    for line in linelist:
        i+=1
        if i % 1000000 == 0:
            print str(i/1000000)+'M reads processed'
        if line.startswith('@'):
            continue
        if line.startswith('#'):
            continue
        fields=line.strip().split('\t')
        if fields[6]!=0:
            conitnue
        chr=fields[2]
        start=int(fields[3])
        end=start+len(fields[4])
        if coverageDict.has_key(chr):
            pass
        else:
            coverageDict[chr]={}
        for j in range(start,end):
            if coverageDict[chr].has_key(j):
                pass
            else:
                coverageDict[chr][j]=''

    outfile = open(outfilename, 'w')

    chromosomes=coverageDict.keys()
    chromosomes.sort()
    for chr in chromosomes:
        print chr
        posKeys=coverageDict[chr].keys()
        posKeys.sort()
        initial=(posKeys[0],coverageDict[chr][posKeys[0]])
        previous=(posKeys[0],coverageDict[chr][posKeys[0]])
        written=['']
        for i in posKeys[1:len(posKeys)]:
            if previous[0]+1 == i and previous[1]==coverageDict[chr][i]:
                 previous=(i,coverageDict[chr][i])
            else:
                 if written[0]==initial[0]:
                     print written, initial, previous
                 if doStranded and strand == '-':
                     outline=chr+'\t'+str(initial[0])+'\t'+str(previous[0]+1)+'\t-'+str(initial[1])
                 else:
                     outline=chr+'\t'+str(initial[0])+'\t'+str(previous[0]+1)+'\t'+str(initial[1])
                 written=(initial[0],previous[0]+1)
                 outfile.write(outline+'\n')
                 initial=(i,coverageDict[chr][i])
                 previous=(i,coverageDict[chr][i])

    outfile.close()
            
run()
