##################################
#                                #
# Last modified 2019/01/24       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string
import numpy as np
from scipy.stats import entropy
from scipy.stats import fisher_exact
from scipy.stats import beta
from scipy.stats import binom
from sklearn.metrics import normalized_mutual_info_score as NMIS
import random
import os
import math
from sets import Set

import sys
import os
import string
import gzip
from distutils.core import setup
from Cython.Build import cythonize
import pyximport; pyximport.install()
import SingleMoleculeCorrelation_NMI_matrix
from SingleMoleculeCorrelation_NMI_matrix import SingleMoleculeCorrelation_NMI_matrix_run


# def run():
# 
#     if len(sys.argv) < 10:
#         print 'usage: python %s methylation_reads_all.tsv region.bed chrFieldID leftField rightFieldID minCoverage windowsize stepsize tabix_location outfileprefix [-subsample N] [-expectedMaxDist bp] [-label fieldID]' % sys.argv[0]
#         print '\Note: the script assumes Tombo 1.3 probabilities, and a tabix indexed reads file'
#         print '\Note: the [-subsample] option will sample the reads in all comparisons down to the minCoverage level; the N parameter indicates how many such subsamplings should be averaged for the final value'
#         print '\Note: the [-expectedMaxDist] option will change the initial window over which the required minimum number of reads is to be search for; default: 2kb'
#         sys.exit(1)

reads = sys.argv[1]
peaks = sys.argv[2]
chrFieldID = int(sys.argv[3])
leftFieldID = int(sys.argv[4])
rightFieldID = int(sys.argv[5])
minCov = int(sys.argv[6])
window = int(sys.argv[7])
step = int(sys.argv[8])
tabix = sys.argv[9]
outprefix = sys.argv[10]

alph = 10
bet = 10
PSS = 100

SS = 1
doSS = False
if '-subsample' in sys.argv:
    SS = int(sys.argv[sys.argv.index('-subsample') + 1])
    doSS = True
    print 'will subsample all comparisons down to', minCov, 'reads'
    print 'will take the average outcome of', SS, 'subsamplings'

doLabel = False
if '-label' in sys.argv:
    labelFieldID = int(sys.argv[sys.argv.index('-label') + 1])
    doLabel = True

EMD = 2000
if '-expectedMaxDist' in sys.argv:
    EMD = int(sys.argv[sys.argv.index('-expectedMaxDist') + 1])
    print 'will use an expected maximum distance of', EMD

# result = guidesMismatches(fasta,guides,MM,outfilename)

SingleMoleculeCorrelation_NMI_matrix_run(reads,peaks,chrFieldID,leftFieldID,rightFieldID,minCov,window,step,tabix,outprefix,alph,bet,PSS,SS,doSS,doLabel,EMD)
