##################################
#                                #
# Last modified 2017/06/01       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string
import math
import os
from sets import Set

def run():

    if len(sys.argv) < 8:
        print 'usage: python %s input chrFieldID leftFieldID rightFieldID chr left_boundary right_boundary outfilename [-NumericChromosomes]'
        print '\tthe script can read .gz, .bz2 and .zip files'
        print '\tthe script will output both fully contained entries and entries overlapping the desired interval'
        print '\tuse the [-NumericChromosomes] options if chromosomes do not have "chr" in their names (such as in vcf files)'
        sys.exit(1)

    doNumChr=False
    if '-NumericChromosomes' in sys.argv:
        doNumChr=True

    datafilename = sys.argv[1]
    chrFieldID = int(sys.argv[2])
    leftFieldID = int(sys.argv[3])
    rightFieldID = int(sys.argv[4])
    chrWanted = sys.argv[5]
    leftBound = int(sys.argv[6])
    rightBound = int(sys.argv[7])
    outfilename = sys.argv[8]

    print chrWanted, leftBound, rightBound
    print 'wanted:', chrWanted, leftBound, rightBound

    outfile = open(outfilename, 'w')

    if datafilename.endswith('.bz2'):
        cmd1 = 'bzip2 -cd ' + datafilename
    elif datafilename.endswith('.gz'):
        cmd1 = 'gunzip -c ' + datafilename
    elif datafilename.endswith('.zip'):
        cmd1 = 'unzip -p ' + datafilename
    else:
        cmd1 = 'cat ' + datafilename
    p = os.popen(cmd1, "r")

    line = 'line'
    i = 0
    while line != '':
        line = p.readline().strip()
        if line == '':
            break
        if line.startswith('#'):
            outfile.write(line + '\n')
            continue
        i+=1
        if i % 5000000 == 0:
            print str(i/1000000) + 'M lines processed'
        fields = line.strip().split('\t')
        chr = fields[chrFieldID]
        if doNumChr:
            chr = 'chr' + chr
        if chr != chrWanted:
            continue
        left = int(fields[leftFieldID])
        right = int(fields[rightFieldID])
        if (left >= leftBound and  left <= rightBound) or (right >= leftBound and right <= rightBound):
            outfile.write(line + '\n')

    outfile.close()
        
run()

