##################################
#                                #
# Last modified 2024/01/10       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string
import gzip

def run():

    if len(sys.argv) < 6:
        print 'usage: python %s bed chrFieldID leftFieldID rightFieldID chrom.sizes outputfilename [-chunks size] [-noFirstOrLast]' % sys.argv[0]
        print '\Note: the -regions option and the -chr option will be integrated if both run, i.e. only the regions within the wanted chromosomes will be used'
        sys.exit(1)

    doChunks = False
    if '-chunks' in sys.argv:
        doChunks = True
        ChunkSize = int(sys.argv[sys.argv.index('-chunks') + 1])

    regionsFile = sys.argv[1]
    regionsChr = int(sys.argv[2])
    regionsLeft = int(sys.argv[3])
    regionsRight = int(sys.argv[4])
    chrominfo=sys.argv[5]

    chromInfoDict = {}
    linelist=open(chrominfo)
    for line in linelist:
        fields=line.strip().split('\t')
        chr=fields[0]
        start=0
        end=int(fields[1])
        chromInfoDict[chr] = end

    outfilename = sys.argv[6]

    doNFoL = False
    if '-noFirstOrLast' in sys.argv:
        print 'will omit the ends of contigs'
        doNFoL = True

    RegionsDict = {}

    if regionsFile.endswith('.gz'):
        linelist = gzip.open(regionsFile)
    else:
        linelist = open(regionsFile)
    for line in linelist:
        if line.startswith('#'):
            continue
        fields = line.strip().split('\t')
        chr = fields[regionsChr]
        left = int(fields[regionsLeft])
        right = int(fields[regionsRight])
        if RegionsDict.has_key(chr):
            pass
        else:
            RegionsDict[chr] = []
        RegionsDict[chr].append((left,right))

    outfile = open(outfilename, 'w')

    keys = chromInfoDict.keys()
    keys.sort()

    for chr in keys:
        i=0
        if RegionsDict.has_key(chr):
            pass
        else:
            if doNFoL:
                pass
            else:
                Start = 1
                End = chromInfoDict[chr]
                if doChunks:
                     c = Start
                     while c <= End:
                         outline = chr + '\t' + str(c) + '\t' + str(min(c + ChunkSize,End))
                         outfile.write(outline + '\n')
                         c += ChunkSize
                else:
                    outline = chr + '\t' + str(1) + '\t' + str(chromInfoDict[chr])
                    outfile.write(outline + '\n')
            continue
        RegionsDict[chr].sort()
        if len(RegionsDict[chr]) <= 1:
            continue
        for (left,right) in RegionsDict[chr]:
            i += 1
#            print i, len(RegionsDict[chr])
            if i == 1:
                Start = 1
                End = left
                if doChunks:
                    c = Start
                    while c <= End:
                        outline = chr + '\t' + str(c) + '\t' + str(min(c + ChunkSize,End))
                        if doNFoL:
                            pass
                        else:
                            outfile.write(outline + '\n')
                        c += ChunkSize
                else:
                    outline = chr + '\t' + str(Start) + '\t' + str(End)
                    if doNFoL:
                        pass
                    else:
                        outfile.write(outline + '\n')
                Start = right
                End = RegionsDict[chr][i][0]
                if doChunks:
                    c = Start
                    while c <= End:
                        outline = chr + '\t' + str(c) + '\t' + str(min(c + ChunkSize,End))
                        outfile.write(outline + '\n')
                        c += ChunkSize
                else:
                    outline = chr + '\t' + str(Start) + '\t' + str(End)
                    outfile.write(outline + '\n')
                continue
            elif i == len(RegionsDict[chr]):
                Start = right
                End = chromInfoDict[chr]
                if doChunks:
                     c = Start
                     while c <= End:
                         outline = chr + '\t' + str(c) + '\t' + str(min(c + ChunkSize,End))
                         if doNFoL:
                             pass
                         else:
                             outfile.write(outline + '\n')
                         c += ChunkSize
                else:
                    outline = chr + '\t' + str(Start) + '\t' + str(End)
                    if doNFoL:
                        pass
                    else:
                        outfile.write(outline + '\n')
            else:
                Start = right
                End = RegionsDict[chr][i][0]
                if doChunks:
                     c = Start
                     while c <= End:
                         outline = chr + '\t' + str(c) + '\t' + str(min(c + ChunkSize,End))
                         outfile.write(outline + '\n')
                         c += ChunkSize
                else:
                    outline = chr + '\t' + str(Start) + '\t' + str(End)
                    outfile.write(outline + '\n')

    outfile.close()
            
run()
