ó
Ý1`Zc           @   sê   d  d l  m Z d  d l m Z d  d l m Z d  d l m Z d  d l m	 Z	 d  d l m
 Z
 d  d l m Z d  d l Z d  d l Z d  d l Z d	 f  d
 „  ƒ  YZ d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d S(   iÿÿÿÿ(   t   ReadContainer(   t
   itemgetter(   t   stats(   t   clock(   t	   factorial(   t   log(   t   randintNt   tPointsc           B   s}   e  Z d  „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z	 d „  Z
 d	 „  Z d
 „  Z d „  Z d „  Z RS(   c         C   sy   | |  _  | |  _ | |  _ | |  _ | |  _ i  |  _ d |  _ i  |  _ i  |  _ i  |  _	 d |  _
 d |  _ g  |  _ d  S(   Ni    (   t   wt   l_limitt   u_limitt   mindistt   sR   t   num_tPointst
   numwindowst   read_counts_plust   read_counts_minust   numtestst	   umi_countt   transitions(   t   selfR   R	   R
   R   R   (    (    s\   /oak/stanford/groups/akundaje/marinovg/programs/PeakXus/src/analyzeTransitions6_symmetric.pyt   __init__   s    												c         C   s   |  j  S(   N(   R   (   R   (    (    s\   /oak/stanford/groups/akundaje/marinovg/programs/PeakXus/src/analyzeTransitions6_symmetric.pyt   getUMIcount/   s    c         C   s   |  j  S(   N(   R   (   R   (    (    s\   /oak/stanford/groups/akundaje/marinovg/programs/PeakXus/src/analyzeTransitions6_symmetric.pyt   getNumTests0   s    c         C   s™  | j  | d |  j | ƒ |  j | <| j  | d |  j | ƒ |  j | <t j d ƒ |  j | <d } x2t d t |  j | ƒ d d ƒ D]} t	 |  j | | | d	 !ƒ t	 |  j | | | d	 !ƒ } | |  j | d k  rá q„ n  | |  j | | d f d k rä| |  j | | d f k r‘| |  j | | d f <| d	 |  j | | d f <| |  j | | d f <|  j | |  j | d  d  … d f j
 ƒ  d  d  d
 … |  j | <t j |  j | d  d  … d f | k ƒ d d } q‘q„ | |  j | d <| d	 |  j | d <| |  j | d <|  j | |  j | d  d  … d f j
 ƒ  d  d  d
 … |  j | <t j |  j | d  d  … d f | k ƒ d d } q„ Wd  S(   Ns   ++s   --ié  i   i8ÿÿÿi    id   i   ie   iÿÿÿÿi   (   ié  i   (   iÿÿÿÿiÿÿÿÿ(   iÿÿÿÿi    (   iÿÿÿÿi   (   iÿÿÿÿi   (   t   getAllReadHistoR   R   R   t   npt   zerosR   t   ranget   lent   sumt   argsortt   where(   R   t   chromt   readst   allpairst   nproct   latest_it   it   current(    (    s\   /oak/stanford/groups/akundaje/marinovg/programs/PeakXus/src/analyzeTransitions6_symmetric.pyt   calcTransitions_counts1   s(    ""*: !B:Bc         C   sp  d } | j  | ƒ \ } } | j | d d d ƒ }	 t g  t d t |	 ƒ ƒ D]& }
 |	 |
 d k rL |
 |	 |
 f ^ qL ƒ } | d k r¬ | j | d |  j | ƒ |  j | <n" | j | d |  j | ƒ |  j | <| d k rÿ | j | d |  j | ƒ |  j | <n" | j | d |  j | ƒ |  j | <t	 j
 |  j | ƒ t	 j
 |  j | ƒ |  _ d t |  j ƒ d Gt |	 ƒ |  j | <i  } x  | D] } | d | | d <qƒWt | j ƒ  ƒ } t	 j t | ƒ d d	 d d |  j d |  j d f ƒ |  j | <x;t d t | ƒ ƒ D]$} | | }
 |
 | |  j |  j d k r=Pn  |
 |  j |  j k  rYqn  | |
 d k roqn  | | | d d k  rqn  |  j j |
 ƒ | |  j d |  j d t |  j | ƒ k r)t	 j |  j | t	 j t | ƒ d
 d	 d d |  j d |  j d f ƒ f ƒ |  j | <n  |  j | |
 |  j d |
 !j ƒ  d k  rZqn  | | d |
 d }
 | d k rg  t |
 |  j d |
 d ƒ D]# } |  j | | d k r›| ^ q›} g  t |
 d |
 |  j d ƒ D]# }
 |  j | |
 d k rã|
 ^ qã} ní t	 j |  j | |
 |  j d |
 !d  d  d … t |  j | |
 |  j d |
 !ƒ k ƒ d } t	 j |  j | |
 |
 |  j d !t |  j | |
 |
 |  j d !ƒ k ƒ d } | d } | d } |
 | d } | |
 } | g } | g } x0| D](}
 x| D]} | |  j | | d f <|
 |  j | | d f <d |
 | d |  j |  j | | d f <|  j | | |  j |
 |  j !|  j | | d	 d	 |
 | d |  j … f <|  j | | |  j |
 |  j !|  j | | d	 |
 | d |  j d	 d |
 | d |  j … f <| d 7} qWqWqW|  j | d  | … d  d  … f |  j | <|  j | 7_ d  S(   Ni    t   -i   s   ++s   --s
   UMI count=s    | i   i   i   iÿÿÿÿ(   t   getChromSizet   getReadHistot   setR   R   R   R   R   R   R   R   R   t   strR   t   sortedt   keysR   R	   R   R   R   t   appendt   concatenatet   maxR   R   (   R   R    R!   R"   R#   t   allreadst   tpoint_countt   Nt   nt   read_countsR%   t
   counts_sett   nonzerosR   R.   t   kt   jt   all_jt   all_it   max_indices_plust   max_indices_minust   aux_jt   aux_i(    (    s\   /oak/stanford/groups/akundaje/marinovg/programs/PeakXus/src/analyzeTransitions6_symmetric.pyt   calcTransitionsU   sn    H %" %"- D
    / ]+ HK^Q


		*Lc*c         C   s­  t  j d | ƒ } g  t d t |  j | ƒ ƒ D]Ñ } | j t d |  j | | d d t |  j | | d f d ƒ … f |  j | | d t |  j | | d f d ƒ d d t |  j | | d f d ƒ … f | |  j | | d f d | | f ƒ^ q/ }	 g  |	 D] }
 |
 j ƒ  ^ q}	 x t d t |	 ƒ ƒ D]‰} |	 | d t	 k rod |  j | | d f <q;t |  j | | d f ƒ } t |  j | | d f ƒ } t | | ƒ d } | d k r†|	 | d |  j | | d f <|	 | d t
 j |  j | | | d !ƒ t
 j |  j | | | d !ƒ t
 j |  j | | | d !ƒ t
 j |  j | | | d !ƒ |  j | | d f <q;|	 | d |  j | | d f <|	 | d |  j | | d f <q;W| j ƒ  | j ƒ  | j ƒ  d	 Gt |  j | ƒ Gt |  j | ƒ |  _ |  j | t
 j |  j | d  d  … d f d k ƒ d d  d  … f |  j | <d
 Gt |  j | ƒ Gt |  j | ƒ d k r©| d k r©|  j | ƒ n  d  S(   Nt	   processesi    t   argsi   i   iÿÿÿÿi   iþÿÿÿs   num t-points before testing: s    num significant t-points: (   t   mpt   PoolR   R   R   t   apply_asynct   backgroundModelPositionst   intt   gett   TrueR   R   R   R   t   closet	   terminatet   joinR   R   t
   delOverlap(   R   R    t   p_thresholdt   yatesR#   t   pseudot   allowoverlapt   poolR9   t   rest   pt   current_startt   current_endt   current_middle(    (    s\   /oak/stanford/groups/akundaje/marinovg/programs/PeakXus/src/analyzeTransitions6_symmetric.pyt   testBackgroundÇ   s2    ô ž#


N% c      	   C   s¸  x±|  j  D]¦} x*t d t |  j  | ƒ ƒ D]} |  j  | | d f d k rV q- n  t t t |  j  | | d f ƒ t |  j  | | d f ƒ d ƒ ƒ } |  j  | | d f } |  j  | | d f } | | d } |  j  | | d f } | }	 | }
 g  } xý |
 d k r|
 d 8}
 |  j  | |
 d f d k r>qn  t |  j  | |
 d f ƒ } t |  j  | |
 d f ƒ } | | d } |  j  | |
 d f } t | j t t | | d ƒ ƒ ƒ ƒ d k rý| | k rí| j |	 ƒ |
 }	 qþ| j |
 ƒ qPqW| d k r- x( | D] } d |  j  | | d f <qWq- q- W| d k rš|  j  | t j |  j  | d  d  … d f d k ƒ d d  d  … f |  j  | <n  d Gt |  j  | ƒ GHq
 Wd  S(   Ni   iþÿÿÿiÿÿÿÿi    i   s'   , num t-points after removing overlap: (	   R   R   R   R+   RH   t   intersectionR/   R   R   (   R   RR   t   cR%   t   current_siteRV   RW   RX   t	   current_St   smallest_p_indexR:   t   deletedt   candidate_startt   candidate_endt   candidate_middlet   candidate_St   d(    (    s\   /oak/stanford/groups/akundaje/marinovg/programs/PeakXus/src/analyzeTransitions6_symmetric.pyRN   ÷   sB    # G
 .	 " Qc         C   s   |  j  | S(   N(   R   (   R   R    (    (    s\   /oak/stanford/groups/akundaje/marinovg/programs/PeakXus/src/analyzeTransitions6_symmetric.pyt   getTRegions&  s    c         C   s   |  j  | d  d  … |  j f S(   N(   R   t   P(   R   R    (    (    s\   /oak/stanford/groups/akundaje/marinovg/programs/PeakXus/src/analyzeTransitions6_symmetric.pyt   getTransitions(  s    c         C   s   |  j  | S(   N(   R   (   R   R    (    (    s\   /oak/stanford/groups/akundaje/marinovg/programs/PeakXus/src/analyzeTransitions6_symmetric.pyt   getNumwindows*  s    c         C   s   |  j  S(   N(   R   (   R   R    (    (    s\   /oak/stanford/groups/akundaje/marinovg/programs/PeakXus/src/analyzeTransitions6_symmetric.pyt   getNumtPoints,  s    c         C   s   |  j  S(   N(   R   (   R   (    (    s\   /oak/stanford/groups/akundaje/marinovg/programs/PeakXus/src/analyzeTransitions6_symmetric.pyR   .  s    c         C   sU   t  | d ƒ @ } t j | d d ƒ} x! |  j D] } | j | g ƒ q1 WWd  QXd  S(   Nt   wbt	   delimiters   	(   t   opent   csvt   writerR   t   writerow(   R   t   filenamet   csvfileR   t   t(    (    s\   /oak/stanford/groups/akundaje/marinovg/programs/PeakXus/src/analyzeTransitions6_symmetric.pyt   saveTransitions0  s     (   t   __name__t
   __module__R   R   R   R'   RA   RY   RN   Re   Rg   Rh   Ri   Rs   (    (    (    s\   /oak/stanford/groups/akundaje/marinovg/programs/PeakXus/src/analyzeTransitions6_symmetric.pyR      s   				$	r	0	/					c         C   s8  g  } g  } xt  d t | ƒ ƒ D]õ } | | d k  r› | g  t  d t |  | ƒ ƒ D] }	 | ^ qU 7} | g  t  d t | | ƒ ƒ D] }	 | ^ q… 7} q" | g  t  d t | | ƒ ƒ D] }	 | t | d ƒ ^ q¸ 7} | g  t  d t |  | ƒ ƒ D] }	 | t | d ƒ ^ qö 7} q" W| d k  r•g  t  d t | d ƒ d ƒ D] } | ^ qE}
 t j t j | d |
 ƒd t j | d |
 ƒd g ƒ } n  | d k rÁt | d | d ƒ \ } } nM | d k rðt | d | d | ƒ \ } } n t | | | | | ƒ \ } } | | k r't | | f St	 | | f Sd  S(   Ni    i   i   t   bins(
   R   RH   R   t   arrayt	   histogramt	   chi2Yatest   gtestt   kstest_laplaceRJ   t   False(   t   observed_plust   observed_minusRO   R4   RP   RQ   t
   signal_post   bg_posR%   R:   Rv   t   contigRU   t   G(    (    s\   /oak/stanford/groups/akundaje/marinovg/programs/PeakXus/src/analyzeTransitions6_symmetric.pyRG   8  s&    03>B0>   #c         C   s	  t  |  ƒ d k  r. t  | ƒ d k  r. t d f St | ƒ } t j | ƒ } t j | ƒ } t j | ƒ } t j | ƒ } xz t d | ƒ D]i }	 |	 | d k  rÇ | |	 c |  |	 7<| |	 c | |	 7<q† | |	 c |  |	 7<| |	 c | |	 7<q† Wt  | ƒ t  | ƒ }
 t  | ƒ t  | ƒ } |
 | } g  } g  } x t d | ƒ D]ï}	 | d k r`| } n  | } | } | t  | ƒ t  | ƒ d k r–t d f S| } | } | d k rjd } x³ | | k  rft d | d ƒ } | | d k  r&| | d k rc| | c d 8<| | c d 7<| d 7} qcq·| | d k r·| | c d 8<| | c d 7<| d 7} q·q·Wn  xÇ t d | ƒ D]¶ } x­ t d | ƒ D]œ } | | k r¨qn  | g  t d t | | | | ƒ ƒ D] } t | | ƒ ^ qÍ7} | g  t d t | | | | ƒ ƒ D] } t | | ƒ ^ q7} qWqzWqEWt | ƒ d k  s\t | ƒ d k  rft d f Sd t	 t	 | ƒ t	 | ƒ g ƒ f } d } t
 t j | d | d | ƒd t | ƒ t j | d | d | ƒd t | ƒ ƒ } | | k rût | f St | f Sd  S(   Ni   g      ð?i    i   i
   Rv   R   (   R   RJ   RH   R   R   R   R   t   absR   R1   Rz   Rx   t   floatR|   (   R}   R~   RO   R4   t   N_avgt   reads_towards_plust   reads_away_plust   reads_towards_minust   reads_away_minusR%   t   x_towardt   x_awayt
   mean_movedt   distances_signalt   distances_bgt   r1t   reads_signal_plust   reads_signal_minust   reads_bg_plust   reads_bg_minust   movedt   r2R:   R9   R5   t	   bin_ranget   nbinsRU   (    (    s\   /oak/stanford/groups/akundaje/marinovg/programs/PeakXus/src/analyzeTransitions6_symmetric.pyt   backgroundModelCombinede  sl    $ 

 	  
 BN$ 
$U
c      
   C   sÊ   | | } |  | }  t  |  ƒ d } d t g  t d t  |  ƒ ƒ D]X } |  | d k rB | | d k rB t |  | ƒ t t |  | ƒ t | | ƒ ƒ ^ qB ƒ } d t j j | d | ƒ} | | f S(   Ni   i   i    t   df(   R   R   R   R„   R   R   t   chi2t   cdf(   t   Ot   Et   alphat   dofR%   R‚   RU   (    (    s\   /oak/stanford/groups/akundaje/marinovg/programs/PeakXus/src/analyzeTransitions6_symmetric.pyRz   ¼  s    ~c         C   s"   t  j |  | ƒ \ } } | | f S(   N(   R   t   ks_2samp(   Rœ   R   t   KSRU   (    (    s\   /oak/stanford/groups/akundaje/marinovg/programs/PeakXus/src/analyzeTransitions6_symmetric.pyt   kstestÎ  s    c         C   sH  t  d t | d ƒ d ƒ } t j |  d | ƒd } t j | d | ƒd } | | t |  ƒ | t | ƒ } | | t | ƒ | t | ƒ } t j t |  ƒ | t | ƒ ƒ } t j t | ƒ | t | ƒ ƒ }	 t j | ƒ }
 t j | ƒ } t t j | | ƒ ƒ } t	 | |	 | ƒ } | | k r:| | f Sd | f Sd  S(   Ni    i   i   Rv   g      ð?(
   R   RH   R   Rx   R   t   rintt   cumsumR1   Rƒ   t   get_KS_2samp_critical(   Rœ   R   Rž   t   p_thresR4   Rv   t   O_histt   E_histt   nOt   nEt   O_cumt   E_cumt   max_difft   D(    (    s\   /oak/stanford/groups/akundaje/marinovg/programs/PeakXus/src/analyzeTransitions6_symmetric.pyR{   Ø  s    	####
c         C   s_  i9 d d f d d f 6d d f d d f 6d d f d d f 6d d f d d f 6d d f d d f 6d d f d	 d
 f 6d d f d	 d f 6d d f d	 d f 6d d d f d	 d f 6d d d f d	 d f 6d d d f d	 d f 6d d d f d	 d f 6d d d f d	 d f 6d d f d d f 6d d f d d
 f 6d d d f d d f 6d d d f d d f 6d d d f d d f 6d d d d f d d f 6d d d d f d d f 6d d d d f d d f 6d d d d f d d f 6d d d f d
 d f 6d d d f d
 d f 6d d d d f d
 d f 6d d d d f d
 d f 6d d d d f d
 d f 6d d d d f d
 d f 6d d d  d f d
 d f 6d d d f d d f 6d d! d d! f d d f 6d d d" d f d d f 6d d# d d# f d d f 6d d d d f d d f 6d# d$ d  d$ f d d f 6d d% d d% f d d f 6d! d& d! d& f d d f 6d d' d( d' f d d f 6d& d) d! d) f d d f 6d* d+ d( d+ f d d f 6d, d- d d- f d d f 6d d. d* d. f d d f 6d' d/ d d/ f d d f 6d d% d( d% f d d f 6d d0 d d0 f d d f 6d/ d1 d* d1 f d d f 6d2 d3 d d3 f d d f 6d) d4 d# d4 f d d f 6d+ d5 d* d5 f d d f 6d+ d6 d, d6 f d d f 6d7 d8 d) d8 f d d f 6d0 d9 d+ d9 f d d f 6d- d: d d: f d d f 6d0 d; d$ d; f d d f 6d1 d< d- d< f d d f 6d= d> d% d> f d d f 6d. d? d3 d? f d d f 6} i d@ dA 6dB dC 6} |  | k  rÂ| } |  } | }  n  | dD k  r×dD } n  |  d k r	| | t  j |  | |  | ƒ } nR | |  f | k r$d } n7 | dA k rG| | |  f dE } n | | |  f dD } | S(F   Ng      $@g      ð?i   i   i	   i
   i   i   i   i   i   i   g      5@g      8@g      ;@g      >@g     €@@g      B@i   g      4@g      <@g      @@g      D@g      F@g      H@g     €A@g     €F@g      I@g     €K@g     €C@g      N@g     €E@g      E@g      A@g      K@g     €P@g      R@g     €H@g      L@g      G@g     €O@g     €J@g     €Q@g     €M@g     @S@g      U@g      P@g      T@g      V@g      Q@g      X@g     @T@g     €V@g     ÀX@g     ÀR@g      [@g      Y@g     €[@g      ^@g     @^@g     €U@g     €`@g      b@g®Gázú?g{®Gáz„?g®Gáz®û?gš™™™™™©?i   i    (   R   t   sqrt(   t   n1t   n2Rž   t   Ct   c_alphat   auxR®   (    (    s\   /oak/stanford/groups/akundaje/marinovg/programs/PeakXus/src/analyzeTransitions6_symmetric.pyR¥   ÷  s*    	ÿ ÿ ÿ ÿ ÿ ’	 	 & 	 c            s^   t  ˆ ƒ d } t ‡  ‡ f d †  t d t  ˆ ƒ ƒ Dƒ ƒ } d t j j | d | ƒ| f S(   Ni   c         3   s7   |  ]- } t  ˆ | ˆ  | ƒ d  d ˆ  | Vq d S(   g      à?i   N(   Rƒ   (   t   .0R%   (   R   Rœ   (    s\   /oak/stanford/groups/akundaje/marinovg/programs/PeakXus/src/analyzeTransitions6_symmetric.pys	   <genexpr>  s    i    R™   (   R   R   R   R   Rš   R›   (   Rœ   R   RŸ   t   X2(    (   R   Rœ   s\   /oak/stanford/groups/akundaje/marinovg/programs/PeakXus/src/analyzeTransitions6_symmetric.pyRy     s    .(   t   ReadContainer6R    t   operatorR   t   scipyR   t   timeR   t   mathR   R   t   randomR   t   multiprocessingRD   t   numpyR   Rm   R   RG   R˜   Rz   R¢   R{   R¥   Ry   (    (    (    s\   /oak/stanford/groups/akundaje/marinovg/programs/PeakXus/src/analyzeTransitions6_symmetric.pyt   <module>   s$   ÿ "	-	W		
		