##################################
#                                #
# Last modified 7/13/2009         # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string
import math
from cistematic.core import Genome
from cistematic.core.geneinfo import geneinfoDB
from commoncode import *


def Poisson(lam,k):

    kfactorial=1.0
    for i in range(1,int(k)):
        kfactorial=kfactorial*i
#    print lam, k, math.exp(-lam), math.pow(lam,k)
    poisson=math.exp(-lam)*math.pow(lam,k)/kfactorial

    return poisson


def run():

    if len(sys.argv) < 3:
        print 'usage: python %s genome NPSinputfilename outputfilename [-pvalue threshold] [ModificationName::rdsfilename] [-cache size] ' % sys.argv[0]

        sys.exit(1)
    
    genome = sys.argv[1]
    hitfilename = sys.argv[2]
    outfilename = sys.argv[3]

    outfile = open(outfilename, 'w')

    cachePages = -1
    doCache = False
    if '-cache' in sys.argv:
        doCache = True
        cachePages =  int(sys.argv[sys.argv.index('-cache') + 1])
    pvalueThreshold=0.0001
    if '-pvalue' in sys.argv:
        doCache = True
        pvalueThreshold =  float(sys.argv[sys.argv.index('-pvalue') + 1])

    modificationDict={}
    for argument in sys.argv:
        if '::' in argument:
            modificationDict[argument.split('::')[0]]=argument.split('::')[1]
            print argument.split('::')[0], '-->', argument.split('::')[1]

    genes = {}
    hg = Genome(genome)
    idb = geneinfoDB()

    chromlist=hg.allChromNames()
    nucleosomeDict={}
    genomelength=0
    for chromosome in chromlist:
        chromosome=str(chromosome)
        if 'rand' in chromosome:
            chr='chr'+chromosome.split('rand')[0]+'_rand'
        else:
            chr='chr'+chromosome
        nucleosomeDict[chr]={}
        genomelength=genomelength+len(hg.getChromosomeSequence(chromosome))
    print 'chromosomes:', nucleosomeDict.keys()
    print 'genome length = ', genomelength

    listoflines = open(hitfilename)
    lineslist = listoflines.readlines()
    for line in lineslist:
        if line[0]=='#':
            continue
        fields=line.split('\n')[0].split('\t')
        if fields[1].isalpha():
            print 'skipping', line.strip()
            continue
        start=int(fields[1])
        stop=int(fields[2])
        nucleosomeDict[fields[0]][start]={}
        nucleosomeDict[fields[0]][start]['start']=start
        nucleosomeDict[fields[0]][start]['stop']=stop
    
    outline = 'chr\tstart\tstop'
    for modification in modificationDict.keys():
        outline=outline+'\t'+modification
    outline=outline+'\n'
    outfile.write(outline)

    for modification in modificationDict.keys():
        hitRDS = readDataset(modificationDict[modification], verbose = True, cache=doCache)
        #sqlite default_cache_size is 2000 pages
        if cachePages > hitRDS.getDefaultCacheSize():
            hitRDS.setDBcache(cachePages)
        metadata = hitRDS.getMetadata()
        readlen = int(metadata['readsize'])
        dataType = metadata['dataType']
        readlenRange = range(readlen)
        readnumber = len(hitRDS)
        for chr in nucleosomeDict.keys():
            print chr
            for start in nucleosomeDict[chr].keys():
                length=float(nucleosomeDict[chr][start]['stop']-nucleosomeDict[chr][start]['start'])
                lam=(length/genomelength)*readnumber
                k=hitRDS.getCounts(chrom=chr, rmin=nucleosomeDict[chr][start]['start'], rmax=nucleosomeDict[chr][start]['stop'], uniqs=True, multi=True, splices=False, reportCombined=True)
                pvalue=Poisson(lam,k)
                if pvalue<pvalueThreshold:
                    nucleosomeDict[chr][start][modification]=pvalue
                else:
                    nucleosomeDict[chr][start][modification]=''

    list1=nucleosomeDict.keys()
    list1.sort()
    for chr in list1:
        list2=nucleosomeDict[chr].keys()
        list2.sort()
        for start in nucleosomeDict[chr].keys():
            outline=chr+'\t'+str(start)+'\t'+str(nucleosomeDict[chr][start]['stop'])
            for modification in modificationDict.keys():
                outline=outline+'\t'+str(nucleosomeDict[chr][start][modification])
            print outline
            outfile.write(outline + '\n')
    outfile.close()
   
run()
