##################################
#                                #
# Last modified 7/22/2009         # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string
import math
from cistematic.core import Genome
from cistematic.core.geneinfo import geneinfoDB
from commoncode import *


def run():

    if len(sys.argv) < 6:
        print 'usage: python %s genome plusreadsrdsfilename minusrdsrdsfilename windowlength minRPKM outputfilename [-nomulti] [-cache size] ' % sys.argv[0]

        sys.exit(1)
    
    genome = sys.argv[1]
    hitfilenamePlus = sys.argv[2]
    hitfilenameMinus = sys.argv[3]
    window=int(sys.argv[4])
    minRPKM=float(sys.argv[5])
    outfilename = sys.argv[6]

    outfile = open(outfilename, 'w')

    cachePages = -1
    doCache = False
    if '-cache' in sys.argv:
        doCache = True
        cachePages =  int(sys.argv[sys.argv.index('-cache') + 1])

    doMulti = True
    if '-nomulti' in sys.argv:
        doMulti = False

    hg = Genome(genome)
    hitRDSplus = readDataset(hitfilenamePlus, verbose = True, cache=True)
    hitRDSminus = readDataset(hitfilenameMinus, verbose = True, cache=True)

    if cachePages > hitRDSplus.getDefaultCacheSize():
        hitRDSplus.setDBcache(cachePages)
    if cachePages > hitRDSminus.getDefaultCacheSize():
        hitRDSminus.setDBcache(cachePages)

    metadata = hitRDSplus.getMetadata()
    readlen = int(metadata['readsize'])
    dataType = metadata['dataType']
    readlenRange = range(readlen)

    metadata = hitRDSminus.getMetadata()
    readlen = int(metadata['readsize'])
    dataType = metadata['dataType']
    readlenRange = range(readlen)

    normalizeBy = (len(hitRDSplus) + len(hitRDSminus))/((window/1000.)*1000000.)
    print 'normalizing factor:', normalizeBy 

    chromlist=hg.allChromNames()
    candidateAntisenseRegions={}
    outfile.write('#chr\tstart\tstop\tplusRPKM\tminusRPKM\tPlus%\tMinus%\n')
    for chromosome in chromlist:
        if 'rand' in chromosome:
            continue
        length=len(hg.getChromosomeSequence(chromosome))
#        candidateAntisenseRegions[chr]=[]
        i=0
        chr='chr'+chromosome
        print chr, length, 'bp'
        for i in range(0,length-int(window/2.),window):
            if i % int(window/2.) != 0:
                continue
            if i % 20000000 == 0:
                print i
            rmin=i 
            rmax=i+window
            plusRPKM=hitRDSplus.getCounts(chrom=chr, rmin=rmin, rmax=rmax, uniqs=True, multi=doMulti, splices=True, reportCombined=True)/normalizeBy 
            if plusRPKM < minRPKM:
                 i+=window/2
                 continue
            minusRPKM=hitRDSminus.getCounts(chrom=chr, rmin=rmin, rmax=rmax, uniqs=True, multi=doMulti, splices=True, reportCombined=True)/normalizeBy 
            if minusRPKM > minRPKM:
#                candidateAntisenseRegions[chr].append((rmin,rmax,plusRPKM,minusRPKM))
                 outline = '%s\t%s\t%s\t%s\t%s\t%s\t%s\t' % (chr, rmin, rmax, plusRPKM, minusRPKM, plusRPKM/(plusRPKM + minusRPKM), minusRPKM/(plusRPKM + minusRPKM))
                 outfile.write(outline+'\n')
            i+=int(window/2)

    outfile.close()
   
run()
