##################################
#                                #
# Last modified 9/2/2009         # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string
import math
from sets import Set
from cistematic.core import Genome
from cistematic.core.geneinfo import geneinfoDB

try:
    import psyco
    psyco.full()
except:
    print 'psyco not running'

from commoncode import *

def run():

    if len(sys.argv) < 4:
        print 'usage: python %s RNAFARfilename CAGEPlusClustersfile CAGEMinusClustersfile outputfilename' % sys.argv[0]
        sys.exit(1)

    RNAFAR = sys.argv[1]
    CAGEPlus = sys.argv[2]
    CAGEMinus= sys.argv[3]
    outfilename = sys.argv[4]

    outfile = open(outfilename, 'w')

    listoflines = open(RNAFAR)
    lineslist = listoflines.readlines()
    RNAFARDict={}
    for line in lineslist:
        if line[0]=='#':
            continue
        fields=line.split('\n')[0].split('\t')
        chr=fields[1]
        if RNAFARDict.has_key(chr):
            RNAFARDict[chr].append((fields[0],int(fields[2]),int(fields[3]),float(fields[4])))
        else:
            RNAFARDict[chr]=[]
            RNAFARDict[chr].append((fields[0],int(fields[2]),int(fields[3]),float(fields[4])))
    CAGEDictPlus={}
    CAGEDictMinus={}
    listoflines = open(CAGEPlus)
    lineslist = listoflines.readlines()
    lineslist.remove(lineslist[0])
    for line in lineslist:
        if line[0]=='#':
            continue
        fields=line.split('\n')[0].split('\t')
        chr=fields[0]
        if CAGEDictPlus.has_key(chr):
            CAGEDictPlus[chr].append((int(fields[1]),float(fields[3])))
        else:
            CAGEDictPlus[chr]=[]
            CAGEDictPlus[chr].append((int(fields[1]),float(fields[3])))
    listoflines = open(CAGEMinus)
    lineslist = listoflines.readlines()
    lineslist.remove(lineslist[0])
    for line in lineslist:
        if line[0]=='#':
            continue
        fields=line.split('\n')[0].split('\t')
        chr=fields[0]
        if CAGEDictMinus.has_key(chr):
            CAGEDictMinus[chr].append((int(fields[2]),float(fields[3])))
        else:
            CAGEDictMinus[chr]=[]
            CAGEDictMinus[chr].append((int(fields[2]),float(fields[3])))
    outfile.write('RNAFAR\tchr\tstart\tstop\tRPKM\tClosestLeftCAGEtag\tCAGERPM\tClosestRightCAGEtag\tCAGERPM\n')
    for chr in RNAFARDict:
        print chr
        for (RNAFAR,start,stop,RPKM) in RNAFARDict[chr]:
            leftdistance=1000000000000000
            rightdistance=1000000000000000
            for (tag,levels) in CAGEDictPlus[chr]:
                if math.fabs(tag-start)<leftdistance:
                    leftdistance=math.fabs(tag-start)
                    leftCAGE=(tag,levels)
            for (tag,levels) in CAGEDictMinus[chr]:
                if math.fabs(tag-stop)<rightdistance:
                    rightdistance=math.fabs(tag-stop)
                    rightCAGE=(tag,levels)
            outline=RNAFAR+'\t'+chr+'\t'+str(start)+'\t'+str(stop)+'\t'+str(RPKM)+'\t'+str(leftCAGE[0]-start)+'\t'+str(leftCAGE[1])+'\t'+str(stop-rightCAGE[0])+'\t'+str(rightCAGE[1])+'\n'
            outfile.write(outline)
                
    outfile.close()

run()
