##################################
#                                #
# Last modified 03/24/2013       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string

def run():

    if len(sys.argv) < 4:
        print 'usage: python %s VCF regions_file chromField2 outfilenameprefix ' % sys.argv[0]
        sys.exit(1)

    VCF = sys.argv[1]
    regions = sys.argv[2]
    outfile = sys.argv[4]
    chromFieldID = int(sys.argv[3])

    RegionCoverageDict={}

    listoflines = open(regions)
    i=0
    for line in listoflines:
        i+=1
        if line.startswith('#') or line.startswith('track type'):
            continue
        if len(line.strip())==0:
            continue
        fields=line.strip().split('\t')
        chr=fields[chromFieldID]
        if RegionCoverageDict.has_key(chr):
            pass
        else:
            RegionCoverageDict[chr]={}
        start = int(fields[chromFieldID + 1])
        end = int(fields[chromFieldID + 2])
        for j in range(start,end):
            RegionCoverageDict[chr][j]=i
        if end == start:
            RegionCoverageDict[chr][start]=i

    outfile = open(outfile,'w')

    listoflines = open(VCF)
    k=0
    for line in listoflines:
        k+=1
        if k % 100000 == 0:
            print k, 'lines in file2 processed'
        if line.startswith('#') or line.startswith('track type'):
            continue
        fields=line.strip().split('\t')
        chr='chr'+fields[0]
        pos = int(fields[1])
        if RegionCoverageDict.has_key(chr):
            if RegionCoverageDict[chr].has_key(pos):
                outfile.write(line)

    outfile.close()

run()
