ó
?îec           @   s   d  Z  d d l Z d d l m Z m Z d d l m Z d d l m Z d d l	 Z	 e	 j
 d i e j ƒ  d 6ƒ d d l m Z d d	 l m Z d d
 l m Z m Z d d l m Z d d l m Z m Z m Z d d l m Z m Z d d l m Z m Z d e f d „  ƒ  YZ d e f d „  ƒ  YZ  d e f d „  ƒ  YZ! d d d „  ƒ  YZ" d „  Z# d e f d „  ƒ  YZ$ d d d „  ƒ  YZ% d e f d „  ƒ  YZ& d S(    sv   
Script with classes and functions for nucleosome calling.

@author: Alicia Schep, Greenleaf Lab, Stanford University
iÿÿÿÿN(   t   optimizet   signal(   t   copy(   t   bisect_leftt
   setup_argst   include_dirs(   t   calculateCov(   t   OccupancyTrack(   t   Trackt   CoverageTrack(   t   Chunk(   t
   call_peakst   reduce_peakst   read_chrom_sizes_from_bam(   t   FragmentMat2Dt	   BiasMat2D(   t   InsertionBiasTrackt   PWMt   SignalTrackc           B   s    e  Z d  Z d „  Z d „  Z RS(   s   Class for getting V-plot signalc         C   s   t  j |  | | | d ƒ d  S(   NR   (   R   t   __init__(   t   selft   chromt   startt   end(    (    sB   /tmp/pip-install-bGcd2k/NucleoATAC/nucleoatac/NucleosomeCalling.pyR      s    c         C   s~   |  j  | j  | j } | d k  r2 t d ƒ ‚ n  t j | j | j | j | j  | | j | ƒ | j	 d d ƒd |  _
 d  S(   Ni    sK   Insufficient flanking region on                     mat to calculate signalt   modet   valid(   R   t   wt	   ExceptionR   t	   correlatet   gett   lowert   upperR   t   matt   vals(   R   R    t   vmatt   offset(    (    sB   /tmp/pip-install-bGcd2k/NucleoATAC/nucleoatac/NucleosomeCalling.pyt   calculateSignal   s    (   t   __name__t
   __module__t   __doc__R   R$   (    (    (    sB   /tmp/pip-install-bGcd2k/NucleoATAC/nucleoatac/NucleosomeCalling.pyR      s   	t   NormSignalTrackc           B   s    e  Z d  Z d „  Z d „  Z RS(   s)   Class for storing normalized signal trackc         C   s   t  j |  | | | d ƒ d  S(   Ns   normalized signal(   R   R   (   R   R   R   R   (    (    sB   /tmp/pip-install-bGcd2k/NucleoATAC/nucleoatac/NucleosomeCalling.pyR   (   s    c         C   s5   | j  |  j |  j ƒ | j  |  j |  j ƒ |  _ d  S(   N(   R   R   R   R!   (   R   t   rawt   bias(    (    sB   /tmp/pip-install-bGcd2k/NucleoATAC/nucleoatac/NucleosomeCalling.pyt   calculateNormSignal*   s    (   R%   R&   R'   R   R+   (    (    (    sB   /tmp/pip-install-bGcd2k/NucleoATAC/nucleoatac/NucleosomeCalling.pyR(   &   s   	t	   BiasTrackc           B   s    e  Z d  Z d „  Z d „  Z RS(   s6   Class for getting Bias Signal Track-- Background modelc         C   s   t  j |  | | | d ƒ d  S(   NR*   (   R   R   (   R   R   R   R   (    (    sB   /tmp/pip-install-bGcd2k/NucleoATAC/nucleoatac/NucleosomeCalling.pyR   /   s    c         C   s  |  j  | j  | j } | d k  r2 t d ƒ ‚ n  | |  _ | |  _ t |  j |  j  |  j ƒ |  _ |  j j	 |  j | j
 | j | j d d ƒ | j |  _ t j |  j j | j
 | j |  j j  | |  j j | ƒ | j d d ƒd |  _ |  j |  j |  j j |  _ d  S(   Ni    sK   Insufficient flanking region on                     mat to calculate signali   i   R   R   (   R   R   R   R"   t   bias_matR	   R   R   t   covt   calculateCoverageR   R   R!   t   nuc_covR   R   R   R    (   R   R    R"   R0   R#   (    (    sB   /tmp/pip-install-bGcd2k/NucleoATAC/nucleoatac/NucleosomeCalling.pyt   calculateBackgroundSignal1   s    		(   R%   R&   R'   R   R1   (    (    (    sB   /tmp/pip-install-bGcd2k/NucleoATAC/nucleoatac/NucleosomeCalling.pyR,   -   s   	t   SignalDistributionc           B   s>   e  Z d  Z d „  Z d „  Z d d „ Z d „  Z d „  Z RS(   s,   Class for determining distribution of signalc         C   sw   | |  _  | |  _ | |  _ | j | j | j | | j | | j d ƒ } | t j | ƒ |  _	 |  j	 j
 ƒ  |  _ d  S(   Ni   (   t   positiont   readsR"   R   R   R   R   t   npt   sumt   prob_matt   flattent   probs(   R   R3   R"   R-   R4   (    (    sB   /tmp/pip-install-bGcd2k/NucleoATAC/nucleoatac/NucleosomeCalling.pyR   F   s    			0c         C   s:   t  j j |  j |  j ƒ } t  j | |  j j j ƒ } | S(   N(	   R5   t   randomt   multinomialR4   R9   t   reshapeR"   R    t   shape(   R   t   sim_vectt   sim_mat(    (    sB   /tmp/pip-install-bGcd2k/NucleoATAC/nucleoatac/NucleosomeCalling.pyt   simulateReadsM   s    iè  c            s%   t  ‡  f d †  t | ƒ ƒ ˆ  _ d  S(   Nc            s   t  j ˆ  j ƒ  ˆ  j j ƒ S(   N(   R5   R6   R@   R"   R    (   t   x(   R   (    sB   /tmp/pip-install-bGcd2k/NucleoATAC/nucleoatac/NucleosomeCalling.pyt   <lambda>R   t    (   t   mapt   ranget   scores(   R   t   numiters(    (   R   sB   /tmp/pip-install-bGcd2k/NucleoATAC/nucleoatac/NucleosomeCalling.pyt   simulateDistQ   s    c         C   s:   t  j |  j j ƒ } t |  j | |  j ƒ } t  j | ƒ S(   N(   R5   t   ravelR"   R    R   R9   R4   t   sqrt(   R   t   flatvt   var(    (    sB   /tmp/pip-install-bGcd2k/NucleoATAC/nucleoatac/NucleosomeCalling.pyt   analStdS   s    c         C   s!   t  j |  j |  j j |  j ƒ S(   N(   R5   R6   R7   R"   R    R4   (   R   (    (    sB   /tmp/pip-install-bGcd2k/NucleoATAC/nucleoatac/NucleosomeCalling.pyt   analMeanW   s    (   R%   R&   R'   R   R@   RH   RM   RN   (    (    (    sB   /tmp/pip-install-bGcd2k/NucleoATAC/nucleoatac/NucleosomeCalling.pyR2   D   s   			c         C   sT   d t  j d t  j | ƒ t  j |  | d d | ƒ } | | t | ƒ } | S(   sB   compute values of normal pdf with given mean and sd at values in xg      ð?i   (   R5   RJ   t   pit   expt   max(   RA   t   vR   t   meant   norm(    (    sB   /tmp/pip-install-bGcd2k/NucleoATAC/nucleoatac/NucleosomeCalling.pyRT   \   s    !t
   Nucleosomec           B   sM   e  Z d  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z	 RS(   s7   Class for storing information about a single nucleosomec         C   sž   | j  |  _  | |  _ | d |  _ | j j d | ƒ |  _ | j j d | ƒ |  _ | j j d | ƒ |  _ | j j d | ƒ |  _ | j j d | ƒ |  _ d  S(   Ni   t   pos(	   R   R   R   t   nfr_covR   R0   t
   nuc_signalt   norm_signalt   smoothed(   R   RV   t   nuctrack(    (    sB   /tmp/pip-install-bGcd2k/NucleoATAC/nucleoatac/NucleosomeCalling.pyR   e   s    	c   	      C   sc  | j  j | j j | j j |  j | j j j |  j | j j j d ƒ } | j j | j j | j j |  j | j j j |  j | j j j d ƒ } | j	 j | j j | j j |  j | j j j |  j | j j j d ƒ } | j j j  | } | t
 j | ƒ } | t
 j | ƒ } t
 j t
 j | ƒ | ƒ } t
 j t
 j | ƒ | ƒ } | | |  _ d  S(   Ni   (   R    R   t   paramsR   R   R   R"   R   R-   t   bias_mat_prenormR5   R6   t   logt   lr(	   R   R[   R    t   null_matR-   t	   nuc_modelt
   null_modelt   nuc_likt   null_lik(    (    sB   /tmp/pip-install-bGcd2k/NucleoATAC/nucleoatac/NucleosomeCalling.pyt   getLRn   s    000c         C   sD   t  |  j | j j | j |  j ƒ } | j ƒ  } |  j | |  _ d  S(   N(	   R2   R   R\   R"   R-   R0   RM   RY   t   z(   R   R[   t   st   std(    (    sB   /tmp/pip-install-bGcd2k/NucleoATAC/nucleoatac/NucleosomeCalling.pyt	   getZScore{   s    c         C   s‡   yU | j  j d |  j ƒ |  _  | j j d |  j ƒ |  _ | j j d |  j ƒ |  _ Wn+ t j |  _  t j |  _ t j |  _ n Xd  S(   NRV   (   t   occR   R   t	   occ_lowert	   occ_upperR5   t   nan(   R   R[   (    (    sB   /tmp/pip-install-bGcd2k/NucleoATAC/nucleoatac/NucleosomeCalling.pyt   getOcc€   s    c            sd  d „  ‰  ‡  f d †  ‰ ‡ f d †  } |  j  | j  } | j } t | | ƒ } | d k r… | | j j d } | j j d f } nn | | | d | j j k  rÌ | | d } | | | d d f } n' | | j j d } | j j d f } | t | ƒ d k r$| | j j d d } n` | | d | | j j k  rl| | d } | | | d | f 7} n | | j j d d } | j j | | !}	 d |	 |	 d k  <t | ƒ d k rd d f d	 t |	 ƒ d
 f | d d | d d f f }
 | j j	 d t |	 ƒ d | d f } nðt | ƒ d k rëd d f d	 t |	 ƒ d
 f | d d | d d f d d f d	 t |	 ƒ d
 f | d d | d d f f }
 | j j	 d t |	 ƒ d | d | j j	 d t |	 ƒ d | d f } n#t | ƒ d k rd d f d	 t |	 ƒ d
 f | d d | d d f d d f d	 t |	 ƒ d
 f | d d | d d f d d f d	 t |	 ƒ d
 f | d d | d d f f	 }
 | j j	 d t |	 ƒ d | d | j j	 d t |	 ƒ d | d | j j	 d t |	 ƒ d | d f	 } n  | | |
 |	 ƒ } t
 j | d d ƒ |  _ | d d |  _ | d d | |  _ d  S(   Nc      	   S   s   t  |  ƒ } t j | ƒ } t  | ƒ d } xM t | ƒ D]? } | t |  | | d | d | d | d | d ƒ 7} q8 W| S(   s)   Add several normal distributions togetheri   i   i   (   t   lenR5   t   zerosRE   RT   (   RA   R\   t   lt   fitt   it   j(    (    sB   /tmp/pip-install-bGcd2k/NucleoATAC/nucleoatac/NucleosomeCalling.pyt   addNormsŠ   s    =c            s@   t  j d t | ƒ d t | ƒ ƒ } t ˆ  | |  ƒ | d ƒ S(   s5   error function for normal fit; to be used for fitNormi    i   i   (   R5   t   linspaceRo   R6   (   t   parst   yRA   (   Ru   (    sB   /tmp/pip-install-bGcd2k/NucleoATAC/nucleoatac/NucleosomeCalling.pyt   err_func’   s    %c      	      s1   | f } t  j ˆ  |  d | d | d d ƒ} | S(   s;   Fit a normal to the signal with lower and upperbounds to sdt   argst   boundst   methods   L-BFGS-B(   R    t   minimize(   t   guesst   boundt   sigt   at   res(   Ry   (    sB   /tmp/pip-install-bGcd2k/NucleoATAC/nucleoatac/NucleosomeCalling.pyt   fitNorm–   s    	$i    i   i   i   i2   gü©ñÒMbP?gš™™™™™ñ?i
   gÍÌÌÌÌÌì?RA   i   iÄ	  i   iÄ	  i   iÄ	  i   iÄ	  i   iÄ	  i   iÄ	  (   R   t   sorted_nuc_keysR   R\   t   nonredundant_sepRo   RZ   R!   RQ   t	   smooth_sdR5   RJ   t   fuzzt   weightt   fit_pos(   R   R[   Rƒ   t   indext   allnucsRA   t   leftt   meanst   rightR€   R{   t   guessesR‚   (    (   Ru   Ry   sB   /tmp/pip-install-bGcd2k/NucleoATAC/nucleoatac/NucleosomeCalling.pyt   getFuzz‰   sR    		;*5;!*55;!!*c         C   sj   d j  t t |  j |  j |  j |  j |  j |  j |  j	 |  j
 |  j |  j |  j |  j |  j g ƒ ƒ } | S(   Ns   	(   t   joinRD   t   strR   R   R   Rf   Rj   Rk   Rl   R_   RY   RX   R0   RW   R‡   (   R   t   out(    (    sB   /tmp/pip-install-bGcd2k/NucleoATAC/nucleoatac/NucleosomeCalling.pyt   asBedÃ   s    <c         C   s   | j  |  j ƒ  d ƒ d  S(   Ns   
(   t   writeR”   (   R   t   handle(    (    sB   /tmp/pip-install-bGcd2k/NucleoATAC/nucleoatac/NucleosomeCalling.pyR•   È   s    (
   R%   R&   R'   R   Re   Ri   Rn   R   R”   R•   (    (    (    sB   /tmp/pip-install-bGcd2k/NucleoATAC/nucleoatac/NucleosomeCalling.pyRU   c   s   							:	t   NucParametersc        	   B   s/   e  Z d  Z d e d d d d d d d „ Z RS(   s:   Class for storing parameters related to nucleosome callingi   ix   i   i    i   c         C   s¼   | |  _  | |  _ | j |  _ | j |  _ | j j d |  _ | |  _ | |  _ | |  _	 | |  _
 | |  _ |
 |  _ |	 |  _ | |  _ t j | ƒ |  _ t | ƒ |  _ | |  _ | |  _ d  S(   Ni   (   t   atacR"   R   R   R    R=   t   windowt   fragmentsizest	   min_readst   min_zt   min_lrR†   t   redundant_sepR…   t   fastaR   t   opent   pwmR   t   chrst   bamt	   occ_track(   R   R"   Rš   R£   RŸ   R¡   R¤   R˜   t   sdR…   Rž   Rœ   R   R›   (    (    sB   /tmp/pip-install-bGcd2k/NucleoATAC/nucleoatac/NucleosomeCalling.pyR   Î   s"    											N(   R%   R&   R'   t   Nonet   TrueR   (    (    (    sB   /tmp/pip-install-bGcd2k/NucleoATAC/nucleoatac/NucleosomeCalling.pyR—   Ì   s   	t   NucChunkc           B   sƒ   e  Z d  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z	 d „  Z
 d	 „  Z d
 „  Z d „  Z d „  Z d „  Z RS(   sI   Class for storing and determining collection of nucleosome positions
    c         C   s(   | j  |  _  | j |  _ | j |  _ d  S(   N(   R   R   R   (   R   t   chunk(    (    sB   /tmp/pip-install-bGcd2k/NucleoATAC/nucleoatac/NucleosomeCalling.pyR   é   s    c         C   s   | |  _  d  S(   N(   R\   (   R   t
   parameters(    (    sB   /tmp/pip-install-bGcd2k/NucleoATAC/nucleoatac/NucleosomeCalling.pyt
   initializeí   s    c         C   s’   t  |  j |  j t |  j j |  j j d d ƒ |  j t |  j j |  j j d d ƒ d |  j j d |  j j ƒ|  _	 |  j	 j
 |  j j ƒ d  S(   Ni   i   i    R˜   (   R   R   R   RQ   R\   R™   R   R   R˜   R    t   makeFragmentMatR£   (   R   (    (    sB   /tmp/pip-install-bGcd2k/NucleoATAC/nucleoatac/NucleosomeCalling.pyt   getFragmentMatï   s    0Hc         C   sM  t  |  j |  j |  j j |  j |  j j d |  j j ƒ |  _ t |  j |  j |  j j |  j j d |  j |  j j |  j j d d d t	 ƒ} |  j j
 d  k	 rÝ | j |  j j
 |  j j |  j j ƒ |  j j | ƒ n  t  |  j |  j |  j j |  j |  j j d |  j j ƒ |  _ t |  j j ƒ |  j _ |  j j |  j j ƒ d  S(   Ni    i   i   R^   (   R   R   R   R\   R™   R   R   R-   R   R§   RŸ   R¦   t   computeBiasR¢   R¡   t   makeBiasMatR]   R   R    t   normByInsertDistRš   (   R   t
   bias_track(    (    sB   /tmp/pip-install-bGcd2k/NucleoATAC/nucleoatac/NucleosomeCalling.pyR¯   ó   s    %'.%%c         C   s  t  |  j |  j |  j ƒ |  _ |  j j |  j |  j j |  j j	 |  j j
 ƒ t |  j |  j |  j ƒ |  _ |  j j |  j |  j j |  j ƒ t |  j |  j |  j ƒ |  _ |  j j |  j |  j j ƒ t |  j |  j |  j ƒ |  _ |  j j |  j |  j ƒ d S(   s   Gets Nucleosome Signal TrackN(   R	   R   R   R   R0   R/   R    R\   R   R   R™   R,   R*   R1   R-   R"   R   RX   R$   R(   RY   R+   (   R   (    (    sB   /tmp/pip-install-bGcd2k/NucleoATAC/nucleoatac/NucleosomeCalling.pyt   getNucSignalÿ   s    !"c         C   sJ   t  |  j |  j |  j ƒ |  _ |  j j |  j d |  j j |  j j	 ƒ d S(   s-   get number of reads of sub-nucleosomal lengthi    N(
   R	   R   R   R   RW   R/   R    R\   R   R™   (   R   (    (    sB   /tmp/pip-install-bGcd2k/NucleoATAC/nucleoatac/NucleosomeCalling.pyt   getNFR  s    c      
   C   s¥   d |  j  j d } t |  j |  j |  j d ƒ |  _ t |  j j	 ƒ } |  j j
 | ƒ d |  j j	 |  j j	 d k  <|  j j | d d d |  j  j d d	 d
 t ƒd S(   s!   Smooth thenormalized signal tracki   i   s   Smooth Signali    R™   t   gaussianR¥   R   t   sameRT   N(   R\   R†   R   R   R   R   RZ   R   RY   R!   t   assign_trackt   smooth_trackR§   (   R   t
   window_lent   tmp(    (    sB   /tmp/pip-install-bGcd2k/NucleoATAC/nucleoatac/NucleosomeCalling.pyt   smoothSignal  s    !c         C   sÅ   t  |  j |  j |  j d ƒ |  _ |  j j |  j j ƒ |  j j d  d } t  |  j |  j |  j d ƒ |  _ |  j j | ƒ |  j j d  d } t  |  j |  j |  j d ƒ |  _	 |  j	 j | ƒ d S(   sI   gets occupancy track-- either reads in from bw handle given, or makes newt	   Occupancyiõÿÿÿs   lower_bound.bedgraph.gzs   upper_bound.bedgraph.gzN(
   R   R   R   R   Rj   t
   read_trackR\   R¤   Rk   Rl   (   R   t
   lower_filet
   upper_file(    (    sB   /tmp/pip-install-bGcd2k/NucleoATAC/nucleoatac/NucleosomeCalling.pyRn     s    !!!c            ss  i  ˆ  _  ˆ  j j ˆ  j j } t | d d d ˆ  j j d ˆ  j j d d ˆ  j j d ƒ} x  | D]˜ } t | ˆ  j	 ˆ  ƒ } | j
 ˆ  j j k rd | j ˆ  ƒ | j ˆ  j j k rü | j ˆ  ƒ | j ˆ  j j k rù | j ˆ  ƒ | ˆ  j  | <qù qü qd qd Wt j t ˆ  j  j ƒ  ƒ ƒ ˆ  _ t ˆ  j t ‡  f d †  ˆ  j ƒ ˆ  j j ƒ ˆ  _ t j ˆ  j ˆ  j ƒ ˆ  _ d S(	   s   Find peaks in datat
   min_signali    t   sept   boundaryi   t   orderc            s   ˆ  j  |  j S(   N(   t   nuc_collectionRf   (   RA   (   R   (    sB   /tmp/pip-install-bGcd2k/NucleoATAC/nucleoatac/NucleosomeCalling.pyRB   9  RC   N(   RÃ   RY   R!   RZ   R   R\   Rž   R…   RU   R   R0   R›   Re   R_   R   Ri   Rf   Rœ   Rn   R5   t   arrayt   sortedt   keysR„   R   RD   t   nonredundantt	   setdiff1dt	   redundant(   R   t   combinedt   cands1Rs   t   nuc(    (   R   sB   /tmp/pip-install-bGcd2k/NucleoATAC/nucleoatac/NucleosomeCalling.pyt   findAllNucs&  s&    	#!	c         C   sÒ   t  j d |  j ƒ  d |  j ƒ  ƒ } t  j |  j ƒ  ƒ } x` |  j D]U } |  j | j |  ƒ | t | |  j | j d |  j | j	 |  j | j
 ƒ 7} qD Wt |  j |  j |  j d ƒ |  _ |  j j | ƒ d  S(   Ni    i   i   s   Fitted Nucleosome Signal(   R5   Rv   t   lengthRp   R„   RÃ   R   RT   R‡   Rˆ   R‰   R   R   R   R   t   fittedR¶   (   R   RA   Rr   RÌ   (    (    sB   /tmp/pip-install-bGcd2k/NucleoATAC/nucleoatac/NucleosomeCalling.pyRr   <  s    %?c         C   s   |  j  j ƒ  |  _ d S(   s   make insertion track for chunkN(   R    t   getInst   ins(   R   (    (    sB   /tmp/pip-install-bGcd2k/NucleoATAC/nucleoatac/NucleosomeCalling.pyt   makeInsertionTrackE  s    c         C   s}   |  j  | ƒ |  j ƒ  |  j ƒ  |  j ƒ  |  j ƒ  |  j ƒ  | j d k	 r[ |  j ƒ  n  |  j	 ƒ  |  j
 ƒ  |  j ƒ  d S(   sD   wrapper to carry out all methods needed to call nucleosomes and nfrsN(   R«   R­   R¯   R²   R³   Rº   R¤   R¦   Rn   RÍ   Rr   RÒ   (   R   R\   (    (    sB   /tmp/pip-install-bGcd2k/NucleoATAC/nucleoatac/NucleosomeCalling.pyt   processH  s    






c         C   s1   |  j  j ƒ  } x | D] } t |  | ƒ q Wd S(   s/   remove data from chunk-- deletes all attributesN(   t   __dict__RÆ   t   delattr(   R   t   namest   name(    (    sB   /tmp/pip-install-bGcd2k/NucleoATAC/nucleoatac/NucleosomeCalling.pyt
   removeDataU  s    (   R%   R&   R'   R   R«   R­   R¯   R²   R³   Rº   Rn   RÍ   Rr   RÒ   RÓ   RØ   (    (    (    sB   /tmp/pip-install-bGcd2k/NucleoATAC/nucleoatac/NucleosomeCalling.pyR¨   æ   s   							
	
					(    (    ('   R'   t   numpyR5   t   scipyR    R   R   t   bisectR   t	   pyximportt   installt   get_includet   nucleoatac.multinomial_covR   t   nucleoatac.OccupancyR   t   pyatac.tracksR   R	   t   pyatac.chunkR
   t   pyatac.utilsR   R   R   t   pyatac.chunkmat2dR   R   t   pyatac.biasR   R   R   R(   R,   R2   RT   RU   R—   R¨   (    (    (    sB   /tmp/pip-install-bGcd2k/NucleoATAC/nucleoatac/NucleosomeCalling.pyt   <module>   s*    	i