##################################
#                                #
# Last modified 7/13/2009         # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string
import math
from sets import Set

try:
    import psyco
    psyco.full()
except:
    print 'psyco not running'

from commoncode import *

def run():

    if len(sys.argv) < 6:
        print 'usage: python %s file1 chromField1 file2 chromField2 outfilenameprefix centerDistance' % sys.argv[0]
        sys.exit(1)

    file1 = sys.argv[1]
    chromField1 = int(sys.argv[2])
    file2 = sys.argv[3]
    chromField2 = int(sys.argv[4])
    outfileprefix = sys.argv[5]
    centerdistance = int(sys.argv[6])

    listoflines = open(file1)
    lineslist = listoflines.readlines()
    chrlist=[]
    for line in lineslist:
        if line[0]=='#':
            continue
        fields=line.split('\n')[0].split('\t')
        if fields[chromField1+1].isalpha():
            print 'skipping', line.strip()
            continue
        chr=fields[chromField1]
        chrlist.append(chr)
    listoflines = open(file2)
    lineslist = listoflines.readlines()
    for line in lineslist:
        if line[0]=='#':
            continue
        fields=line.split('\n')[0].split('\t')
        if fields[chromField1+1].isalpha():
            print 'skipping', line
            continue
        chr=fields[chromField2]
        chrlist.append(chr)
    chrlist=list(Set(chrlist))
    file1Dict={}
    file2Dict={}
    intersection_regions={}
    intersection1={}
    intersection2={}
    outersection1={}
    outersection2={}
    for chr in chrlist:
        file1Dict[chr]={}
        file2Dict[chr]={}
        intersection_regions[chr]={}
        intersection1[chr]={}
        intersection2[chr]={}
        outersection1[chr]={}
        outersection2[chr]={}

    listoflines = open(file1)
    lineslist = listoflines.readlines()
    i=0;
    for line in lineslist:
        if line[0]=='#':
            continue
        fields=line.split('\n')[0].split('\t')
        if fields[chromField1+1].isalpha():
            print 'skipping', line
            continue
        chr=fields[chromField1]
        file1Dict[chr][i]={}
        file1Dict[chr][i]['start']=int(fields[chromField1+1])
        file1Dict[chr][i]['stop']=int(fields[chromField1+2])
        file1Dict[chr][i]['intersected']=0
        i+=1

    listoflines = open(file2)
    lineslist = listoflines.readlines()
    i=0;
    for line in lineslist:
        if line[0]=='#':
            continue
        fields=line.split('\n')[0].split('\t')
        if fields[chromField1+1].isalpha():
            print 'skipping', line
            continue
        chr=fields[chromField2]
        file2Dict[chr][i]={}
        file2Dict[chr][i]['start']=int(fields[chromField2+1])
        file2Dict[chr][i]['stop']=int(fields[chromField2+2])
        file2Dict[chr][i]['intersected']=0
        i+=1

    intersect=0
    for chr in chrlist:
        print chr
        for i in file1Dict[chr].keys():
            middle1=(file1Dict[chr][i]['start']+file1Dict[chr][i]['stop'])/2.0
            for j in file2Dict[chr].keys():
                middle2=(file2Dict[chr][j]['start']+file2Dict[chr][j]['stop'])/2.0
                if math.fabs(middle1-middle2)<centerdistance:
                    intersection_regions[chr][intersect]={}
                    file1Dict[chr][i]['intersected']=1
                    file2Dict[chr][j]['intersected']=1
                    intersection_regions[chr][intersect]['start1']=file1Dict[chr][i]['start']
                    intersection_regions[chr][intersect]['start2']=file2Dict[chr][j]['start']
                    intersection_regions[chr][intersect]['stop1']=file1Dict[chr][i]['stop']
                    intersection_regions[chr][intersect]['stop2']=file2Dict[chr][j]['stop']
                    intersection_regions[chr][intersect]['shift']=middle1-middle2
                    intersect+=1
                    break

    outersect=0
    intersect=0
    for chr in chrlist:
        for i in file1Dict[chr].keys():
            if file1Dict[chr][i]['intersected']==0:
                outersection1[chr][outersect]={}
                outersection1[chr][outersect]['start']=file1Dict[chr][i]['start']
                outersection1[chr][outersect]['stop']=file1Dict[chr][i]['stop']
                outersect+=1
            if file1Dict[chr][i]['intersected']==1:
                intersection1[chr][intersect]={}
                intersection1[chr][intersect]['start']=file1Dict[chr][i]['start']
                intersection1[chr][intersect]['stop']=file1Dict[chr][i]['stop']
                intersect+=1

    outersect=0
    intersect=0
    for chr in chrlist:
        for i in file2Dict[chr].keys():
            if file2Dict[chr][i]['intersected']==0:
                outersection2[chr][outersect]={}
                outersection2[chr][outersect]['start']=file2Dict[chr][i]['start']
                outersection2[chr][outersect]['stop']=file2Dict[chr][i]['stop']
                outersect+=1
            if file2Dict[chr][i]['intersected']==1:
                intersection2[chr][intersect]={}
                intersection2[chr][intersect]['start']=file2Dict[chr][i]['start']
                intersection2[chr][intersect]['stop']=file2Dict[chr][i]['stop']
                intersect+=1

    outfilename=outfileprefix+'-intersect_regions.txt'
    outfile = open(outfilename, 'w')
    for chr in intersection_regions.keys():
        for i in intersection_regions[chr].keys():
            line=chr+'\t'+str(intersection_regions[chr][i]['start1'])+'\t'+str(intersection_regions[chr][i]['stop1'])+chr+'\t'+str(intersection_regions[chr][i]['start2'])+'\t'+str(intersection_regions[chr][i]['stop2'])+'\t'+str(intersection_regions[chr][i]['shift'])+'\n'
            outfile.write(line)
    outfile.close()

    outfilename=outfileprefix+'-intersection1'
    outfile = open(outfilename, 'w')
    for chr in intersection1.keys():
        for i in intersection1[chr].keys():
            line=chr+'\t'+str(intersection1[chr][i]['start'])+'\t'+str(intersection1[chr][i]['stop'])+'\n'
            outfile.write(line)
    outfile.close()

    outfilename=outfileprefix+'-intersection2'
    outfile = open(outfilename, 'w')
    for chr in intersection2.keys():
        for i in intersection2[chr].keys():
            line=chr+'\t'+str(intersection2[chr][i]['start'])+'\t'+str(intersection2[chr][i]['stop'])+'\n'
            outfile.write(line)
    outfile.close()

    outfilename=outfileprefix+'-outersection1'
    outfile = open(outfilename, 'w')
    for chr in outersection1.keys():
        for i in outersection1[chr].keys():
            line=chr+'\t'+str(outersection1[chr][i]['start'])+'\t'+str(outersection1[chr][i]['stop'])+'\n'
            outfile.write(line)
    outfile.close()

    outfilename=outfileprefix+'-outersection2'
    outfile = open(outfilename, 'w')
    for chr in outersection2.keys():
        for i in outersection2[chr].keys():
            line=chr+'\t'+str(outersection2[chr][i]['start'])+'\t'+str(outersection2[chr][i]['stop'])+'\n'
            outfile.write(line)
    outfile.close()

run()
