##################################
#                                #
# Last modified 04/06/2016       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string
import os
from sets import Set

def OptimalCoordinates(coordinates,MaxLegnth):

    coordinates.sort()
    OLlist = []
    for i in range(len(coordinates)):
        if i % 2 == 1:
            continue
        j = len(coordinates) - 1
        OL = coordinates[j] - coordinates[i]
        while OL > MaxLegnth:
            j = j-2
            OL = coordinates[j] - coordinates[i]
        OLlist.append((j-i,j,i))
    OLlist.sort()
#    print coordinates
#    print OLlist

    return (coordinates[OLlist[-1][0]],coordinates[OLlist[-1][1]])

def run():

    if len(sys.argv) < 2:
        print 'usage: python %s OB outfilename [-chrLabels string_to_replace_1 new_string_1 string_to_replace_2 new_string_2] [-maxDiff factor(float)]' % sys.argv[0]
        sys.exit(1)

    OBlist = sys.argv[1]
    outfilename = sys.argv[2]

    doLabels = False
    if '-chrLabels' in sys.argv:
        doLabels = True
        S1 = sys.argv[sys.argv.index('-chrLabels') + 1]
        L1 = sys.argv[sys.argv.index('-chrLabels') + 2]
        S2 = sys.argv[sys.argv.index('-chrLabels') + 3]
        L2 = sys.argv[sys.argv.index('-chrLabels') + 4]

    doMD = False
    if '-maxDiff' in sys.argv:
        doMD = True
        MD = float(sys.argv[sys.argv.index('-maxDiff') + 1])

    OBDict = {}

    ChrList1 = []
    ChrList2 = []

    linelist = open(OBlist)
    for line in linelist:
        if line.startswith('#'):
            continue
        fields = line.strip().split('\t')
        OB = int(fields[0].split('OB')[1])
        chr1 = fields[1]
        chr2 = fields[6]
        if doLabels:
            chr1 = chr1.replace(S1,L1)
            chr2 = chr2.replace(S2,L2)
        left1 = int(fields[2])
        right1 = int(fields[3])
        if fields[7] != '-':
            left2 = int(fields[7])
            right2 = int(fields[8])
        else:
            left2 = '-'
            right2 = '-'        
        if OBDict.has_key((OB,chr1)):
            pass
        else:
            OBDict[(OB,chr1)] = {}
            OBDict[(OB,chr1)]['pos1'] = []
            OBDict[(OB,chr1)]['pos2'] = {}
        if left2 != '-':
            OBDict[(OB,chr1)]['pos1'].append(left1)
            OBDict[(OB,chr1)]['pos1'].append(right1)
            if OBDict[(OB,chr1)]['pos2'].has_key(chr2):
                pass
            else:
                OBDict[(OB,chr1)]['pos2'][chr2] = []
            OBDict[(OB,chr1)]['pos2'][chr2].append(left2)
            OBDict[(OB,chr1)]['pos2'][chr2].append(right2)

    OBs = OBDict.keys()
    OBs.sort()

    outfile = open(outfilename, 'w')

    TotalMaxCount = 0

    for (OB,chr1) in OBs:
#        print OB, chr1
        outline = chr1 + '\t' + str(min(OBDict[(OB,chr1)]['pos1'])) + '\t' + str(max(OBDict[(OB,chr1)]['pos1']))
        maxCountChr = ''
        maxCount = 0
        ChrList1.append(chr1)
        for chr2 in OBDict[(OB,chr1)]['pos2'].keys():
            if len(OBDict[(OB,chr1)]['pos2'][chr2]) > maxCount:
                maxCount = max(maxCount,len(OBDict[(OB,chr1)]['pos2'][chr2]))
                maxCountChr = chr2
        M1 = min(OBDict[(OB,chr1)]['pos2'][maxCountChr])
        M2 = max(OBDict[(OB,chr1)]['pos2'][maxCountChr])
        TotalMaxCount += maxCount
        if doMD:
            TL = max(OBDict[(OB,chr1)]['pos1']) - min(OBDict[(OB,chr1)]['pos1'])
            if (M2 - M1) > MD*TL:
                (M1,M2) = OptimalCoordinates(OBDict[(OB,chr1)]['pos2'][maxCountChr],MD*TL)
        outline = outline + '\t' + maxCountChr + '\t' + str(M1) + '\t' + str(M2)
        ChrList2.append(maxCountChr)
        outline = outline + '\t' + 'thickness=1.0'
        outfile.write(outline + '\n')

    outline = '# '
    ChrList1 = list(Set(ChrList1))
    ChrList1.sort()
    for chr in ChrList1:
        outline = outline + chr + ';'
    ChrList2 = list(Set(ChrList2))
    ChrList2.sort()
    for chr in ChrList2:
        outline = outline + chr + ';'
    outfile.write(outline[0:-1] + '\n')

    outline = '# Total number of orthologous genes in synteny: ' + '\t' + str(TotalMaxCount/2.)
    print outline
    outfile.write(outline + '\n')

    outfile.close()

run()
