ó
?îec           @   s|  d  Z  d d l m Z m Z m Z d d l Z d d l j Z	 d d l
 Z
 e
 j d i e j ƒ  d 6ƒ d d l m Z d d l m Z m Z d d l m Z d d	 l m Z m Z m Z d d
 l m Z m Z d d l m Z m Z d d l m Z d d d „  ƒ  YZ  d d d „  ƒ  YZ! d „  Z" d e f d „  ƒ  YZ# d e f d „  ƒ  YZ$ d d d „  ƒ  YZ% d e f d „  ƒ  YZ& d S(   sg   
Classes for computing nucleosome occupancy

@author: Alicia Schep, Greenleaf Lab, Stanford University
iÿÿÿÿ(   t   signalt   optimizet   statsNt
   setup_argst   include_dirs(   t   FragmentSizes(   t   Trackt   CoverageTrack(   t   Chunk(   t   smootht
   call_peakst   read_chrom_sizes_from_fasta(   t   FragmentMat2Dt	   BiasMat2D(   t   InsertionBiasTrackt   PWM(   t   gammat   FragmentMixDistributionc           B   sA   e  Z d  Z d d d „ Z d	 d „ Z d
 d „ Z d	 d „ Z RS(   s,   Class for modelling insert size distributioni    iÐ  c         C   s   | |  _  | |  _ d  S(   N(   t   lowert   upper(   t   selfR   R   (    (    s:   /tmp/pip-install-bGcd2k/NucleoATAC/nucleoatac/Occupancy.pyt   __init__   s    	c         C   s2   t  |  j |  j ƒ |  _ |  j j | d | ƒd  S(   Nt   chunks(   R   R   R   t   fragmentsizest   calculateSizes(   R   t   bamfilet	   chunklist(    (    s:   /tmp/pip-install-bGcd2k/NucleoATAC/nucleoatac/Occupancy.pyt   getFragmentSizes   s    i#   is   c      	      s  t  j |  j j |  j | d ƒ t |  j j |  j | d ƒ ƒ k ƒ d d |  j } t | d | ƒ | d f } t  j | d | d ƒ ‰ |  j j | d | d ƒ ‰ d „  ‰  t  j | d d ƒ t  j	 d ƒ } g  t
 | d d ƒ D] ‰ d ^ qì } d d d f } xt t
 d | d d ƒ D][ ‰ ‡  ‡ ‡ ‡ f d	 †  } t j | | d
 t d t j ƒ} | d | ˆ <| d | ˆ <q%Wt  j | ƒ } | | }	 t |  j |  j d ˆ  t  j |  j |  j ƒ | | | ƒ ƒ|  _ t  j |  j j |  j | d ƒ |  j j | d |  j ƒ f ƒ }
 t |
 |
 d k ƒ d |
 |
 d k <t |  j |  j d |
 ƒ|  _ t  j t  j | d |  j ƒ |  j j | d |  j ƒ |  j j | d |  j ƒ f ƒ } t t |
 ƒ d t | | d k ƒ d ƒ | | d k <t |  j |  j d | ƒ|  _ d S(   s.   Model NFR distribution with gamma distributioni   i    c   	      S   sª   | d } | d } | d } |  | } t  j t | ƒ ƒ } | d k rX | d k } n | d k } | | | | d t  j | | | ƒ | | t | ƒ | | <| S(   Ni    i   i   (   t   npt   zerost   lent   expR   (	   t   Xt   ot   pt   kt   thetat   at   x_modt   rest   nz(    (    s:   /tmp/pip-install-bGcd2k/NucleoATAC/nucleoatac/Occupancy.pyt	   gamma_fit#   s    



Bt   infg{®Gáz„?i
   i–   i   c            s!   t  j ˆ  ˆ ˆ |  ƒ ˆ d ƒ S(   Ni   (   R   t   sum(   R"   (   R)   t   it   xt   y(    s:   /tmp/pip-install-bGcd2k/NucleoATAC/nucleoatac/Occupancy.pyt   <lambda>3   t    t   full_outputt   finisht   valsgš™™™™™¹?gü©ñÒMbP?N(   g{®Gáz„?i
   (   g{®Gáz„?i–   (   g{®Gáz„?i   (   R   t   whereR   t   getR   t   maxt   mint   aranget   onest   floatt   rangeR   t   brutet   Truet   fmint   argminR   R   t   nfr_fit0t   concatenatet   nfr_fitR   t   nuc_fit(   R   t
   boundariest   bt	   res_scoret	   res_paramt   prangest   ft   tmprest   whichresR'   t   nfrt   nuc(    (   R)   R,   R-   R.   s:   /tmp/pip-install-bGcd2k/NucleoATAC/nucleoatac/Occupancy.pyt   modelNFR   s2    V	$'
@A$#7c         C   s…  t  j ƒ  } t  j t |  j |  j ƒ |  j j ƒ  d d ƒt  j t |  j |  j ƒ |  j j ƒ  d d ƒt  j t |  j |  j ƒ |  j	 j ƒ  d d ƒt  j t |  j |  j ƒ |  j
 j ƒ  d d ƒt  j ƒ  t  j d ƒ t  j d ƒ | rw| j | ƒ t  j | ƒ d j | j d ƒ d	  d
 g ƒ } t j |  j j ƒ  |  j	 j ƒ  |  j
 j ƒ  f ƒ } t j | | d d ƒn
 | j ƒ  d S(   s   plot the Fitst   labelt   Observeds   NFR Fits   Nucleosome Models	   NFR Models   Fragment sizes   Relative Frequencyt   .iÿÿÿÿt   txtt	   delimiters   	N(   t   pltt   figuret   plotR;   R   R   R   R5   R@   RC   RB   t   legendt   xlabelt   ylabelt   savefigt   closet   joint   splitR   t   vstackt   savetxtt   show(   R   t   filenamet   figt	   filename2t   out(    (    s:   /tmp/pip-install-bGcd2k/NucleoATAC/nucleoatac/Occupancy.pyt   plotFitsC   s"    '...
#!N(   i#   is   (   t   __name__t
   __module__t   __doc__R   t   NoneR   RN   Re   (    (    (    s:   /tmp/pip-install-bGcd2k/NucleoATAC/nucleoatac/Occupancy.pyR      s
   &t   OccupancyCalcParamsc           B   s   e  Z d  Z d d „ Z RS(   s1   Class with parameters for occupancy determinationgÍÌÌÌÌÌì?c         C   s®   | |  _  | |  _ | j j | | ƒ } | t j | ƒ |  _ | j j | | ƒ } | t j | ƒ |  _ t j	 d d d ƒ |  _
 t |  j
 ƒ |  _ t j j | d ƒ |  _ d  S(   Ni    i   ie   (   R   R   RC   R5   R   R+   t	   nuc_probsRB   t	   nfr_probst   linspacet   alphasR   t   lR   t   chi2t   ppft   cutoff(   R   R   R   t   insert_distt   ciRk   Rl   (    (    s:   /tmp/pip-install-bGcd2k/NucleoATAC/nucleoatac/Occupancy.pyR   [   s    		(   Rf   Rg   Rh   R   (    (    (    s:   /tmp/pip-install-bGcd2k/NucleoATAC/nucleoatac/Occupancy.pyRj   Y   s   c            s.  | j  | ‰ ˆ t j ˆ ƒ ‰ | j | ‰ ˆ t j ˆ ƒ ‰ t ‡ ‡ f d †  | j ƒ ‰ t j t ‡  ‡ f d †  t | j ƒ ƒ ƒ } t	 d ƒ | t j
 | ƒ <| j t j | ƒ } d t | ƒ | } | j t t j | | j k  ƒ d ƒ } | j t t j | | j k  ƒ d ƒ } | | | f S(   sm   function to calculate occupancy based on insert distribution
    also takes OccupancyCalcParams as input
    c            s   t  j |  ˆ d |  ˆ  ƒ S(   Ni   (   R   t   log(   t   alpha(   Rl   Rk   (    s:   /tmp/pip-install-bGcd2k/NucleoATAC/nucleoatac/Occupancy.pyR/   p   R0   c            s   t  j ˆ |  ˆ  ƒ S(   N(   R   R+   (   t   j(   t   insertsR-   (    s:   /tmp/pip-install-bGcd2k/NucleoATAC/nucleoatac/Occupancy.pyR/   q   R0   R*   i   i    (   Rk   R   R+   Rl   t   mapRn   t   arrayR;   Ro   R:   t   isnant   argmaxR6   R7   R4   Rr   (   Rx   t   biast   paramst   loglikst   occt   ratiosR   R   (    (   Rx   Rl   Rk   R-   s:   /tmp/pip-install-bGcd2k/NucleoATAC/nucleoatac/Occupancy.pyt   calculateOccupancyh   s    -))t   OccupancyTrackc           B   s/   e  Z d  Z d „  Z d „  Z d d d „ Z RS(   s(   Class for computing nucleosome occupancyc         C   s   t  j |  | | | d ƒ d  S(   Nt	   occupancy(   R   R   (   R   t   chromt   startt   end(    (    s:   /tmp/pip-install-bGcd2k/NucleoATAC/nucleoatac/Occupancy.pyR   ~   s    c   
      C   s   |  j  | j  } | | j k  r1 t d ƒ | ‚ n  t j |  j |  j  ƒ t d ƒ |  _ t j |  j |  j  ƒ t d ƒ |  _ t j |  j |  j  ƒ t d ƒ |  _	 xVt
 | j t |  j ƒ | j ƒ D]3} t j | j d d d | j d |  j  | | j d |  j  | | j d ƒ d	 d ƒ} t j | j d d d | j d |  j  | | j d |  j  | | j d ƒ d	 d ƒ} t | ƒ d k rÅ | | j } t | | j d t |  j ƒ ƒ }	 t | | | j ƒ \ |  j | |	 +|  j | |	 +|  j	 | |	 +qÅ qÅ Wd
 S(   s   Calculate Occupancy tracksH   For calculateOccupancyMLE, mat does not have sufficient flanking regionst   nanR   i    R   R†   R‡   i   t   axisN(   R†   t   flankt	   ExceptionR   R9   R‡   R:   R3   t   lower_boundt   upper_boundt   xranget   halfstepR   t   stepR+   R5   R   R7   R‚   t   occ_calc_params(
   R   t   matt   bias_matR~   t   offsetR,   t   new_insertst   new_biast   leftt   right(    (    s:   /tmp/pip-install-bGcd2k/NucleoATAC/nucleoatac/Occupancy.pyt   calculateOccupancyMLE€   s"    &&&(/	/	#iy   i   c         C   s‹   t  |  j | d d d | d d d t ƒ|  _ t  |  j | d d d | d d d t ƒ|  _ t  |  j | d d d | d d d t ƒ|  _ d  S(   Nt   windowt   gaussiant   sdt   modet   samet   norm(   R	   R3   R=   t   smoothed_valsRŒ   t   smoothed_lowerR   t   smoothed_upper(   R   t
   window_lenRœ   (    (    s:   /tmp/pip-install-bGcd2k/NucleoATAC/nucleoatac/Occupancy.pyt   makeSmoothed“   s    (   Rf   Rg   Rh   R   R™   R¤   (    (    (    s:   /tmp/pip-install-bGcd2k/NucleoATAC/nucleoatac/Occupancy.pyRƒ   |   s   		t   OccPeakc           B   s#   e  Z d  „  Z d „  Z d „  Z RS(   c         C   sž   | j  |  _  | |  _ | d |  _ d |  _ | j j | | j j |  _ | j j | | j j |  _ | j j | | j j |  _	 | j
 j d | ƒ |  _ d S(   s!   Class for storing occupancy peaksi   t   *t   posN(   R…   R†   R‡   t   strandR€   R    R¡   t	   occ_lowerR¢   t	   occ_uppert   covR5   t   reads(   R   R§   t   chunk(    (    s:   /tmp/pip-install-bGcd2k/NucleoATAC/nucleoatac/Occupancy.pyR   œ   s    		c      
   C   sF   d j  t t |  j |  j |  j |  j |  j |  j |  j	 g ƒ ƒ } | S(   Ns   	(
   R\   Ry   t   strR…   R†   R‡   R€   R©   Rª   R¬   (   R   Rd   (    (    s:   /tmp/pip-install-bGcd2k/NucleoATAC/nucleoatac/Occupancy.pyt   asBed¦   s    Bc         C   s   | j  |  j ƒ  d ƒ d S(   s   write bed line for peaks   
N(   t   writeR¯   (   R   t   handle(    (    s:   /tmp/pip-install-bGcd2k/NucleoATAC/nucleoatac/Occupancy.pyR°   ©   s    (   Rf   Rg   R   R¯   R°   (    (    (    s:   /tmp/pip-install-bGcd2k/NucleoATAC/nucleoatac/Occupancy.pyR¥   ›   s   	
	t   OccupancyParametersc           B   s,   e  Z d  Z d d d d d d d d „ Z RS(   s=   Class for storing parmeers related to Occupancy determinationix   gš™™™™™¹?i<   gÍÌÌÌÌÌì?i   c         C   sÐ   | |  _  t | ƒ |  _ | |  _ | d  k	 rB t j | ƒ |  _ n  | d d |  _ | |  _	 | |  _
 |	 |  _ | |  _ t d | | d |
 ƒ|  _ | d d k r¯ | d } n  | |  _ |  j d d |  _ d  S(   Ni   i   i    Rt   (   t   sepR   t   chrst   fastaRi   R   t   opent   pwmRš   t   min_occRŠ   t   bamR   Rj   R‘   R   R   (   R   Rs   R   Rµ   R·   R³   R¸   RŠ   Rd   R¹   Rt   R   (    (    s:   /tmp/pip-install-bGcd2k/NucleoATAC/nucleoatac/Occupancy.pyR   ±   s    							N(   Rf   Rg   Rh   Ri   R   (    (    (    s:   /tmp/pip-install-bGcd2k/NucleoATAC/nucleoatac/Occupancy.pyR²   ¯   s   	t   OccChunkc           B   s_   e  Z d  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z	 d „  Z
 d	 „  Z RS(
   s8   Class for calculating occupancy and occupancy peaks
    c         C   s:   | j  |  _  | j |  _ | j |  _ i  |  _ g  |  _ d  S(   N(   R†   R‡   R…   t   peakst   nfrs(   R   R­   (    (    s:   /tmp/pip-install-bGcd2k/NucleoATAC/nucleoatac/Occupancy.pyR   Æ   s
    	c         C   sX   t  |  j |  j |  j j |  j |  j j d |  j j ƒ |  _ |  j j |  j j	 ƒ d  S(   Ni    (
   R   R…   R†   R~   RŠ   R‡   R   R’   t   makeFragmentMatR¹   (   R   (    (    s:   /tmp/pip-install-bGcd2k/NucleoATAC/nucleoatac/Occupancy.pyt   getFragmentMatÌ   s    %c         C   sá   t  |  j |  j |  j j |  j |  j j d |  j j ƒ |  _ |  j j d  k	 rÝ t
 |  j |  j |  j j |  j j d |  j |  j j |  j j d d d t ƒ} | j |  j j |  j j |  j j ƒ |  j j | ƒ n  d  S(   Ni    i   i   Ru   (   R   R…   R†   R~   RŠ   R‡   R   R“   Rµ   Ri   R   Rš   R=   t   computeBiasR´   R·   t   makeBiasMat(   R   t
   bias_track(    (    s:   /tmp/pip-install-bGcd2k/NucleoATAC/nucleoatac/Occupancy.pyRÀ   Ð   s    %'.%c         C   sj   t  |  j |  j |  j ƒ |  _ |  j j |  j |  j |  j ƒ |  j j	 d |  j j
 d |  j j d ƒ d S(   s   calculate occupancy for chunkR£   Rœ   g      @N(   Rƒ   R…   R†   R‡   R€   R™   R’   R“   R~   R¤   Rš   RŠ   (   R   (    (    s:   /tmp/pip-install-bGcd2k/NucleoATAC/nucleoatac/Occupancy.pyt   calculateOccØ   s    c         C   sJ   t  |  j |  j |  j ƒ |  _ |  j j |  j d |  j j |  j j	 ƒ d S(   s   Get read coverage for regionsi    N(
   R   R…   R†   R‡   R«   t   calculateCoverageR’   R~   R   Rš   (   R   (    (    s:   /tmp/pip-install-bGcd2k/NucleoATAC/nucleoatac/Occupancy.pyt   getCovÝ   s    c         C   s‰   t  |  j j d |  j j d |  j j ƒ} xX | D]P } t | |  j |  ƒ } | j |  j j k r1 | j	 d k r1 | |  j
 | <q1 q1 Wd S(   s   Call peaks of occupancy profileR³   t
   min_signali    N(   R
   R€   R    R~   R³   R¸   R¥   R†   R©   R¬   R»   (   R   R»   t   peakt   tmp(    (    s:   /tmp/pip-install-bGcd2k/NucleoATAC/nucleoatac/Occupancy.pyt	   callPeaksá   s
    *$c         C   s¯   t  j |  j j ƒ } x“ |  j j ƒ  D]‚ } |  j j d |  j | j |  j j	 d |  j | j d |  j j	 ƒ } t  j
 | d d ƒ} | t t
 | ƒ ƒ } | | 7} q% W| S(   s#   Get nucleosomal insert distributionR†   R‡   i   R‰   (   R   R   R~   R   R»   t   keysR’   R5   R†   RŠ   R+   R:   (   R   t   nuc_distRÆ   t   subt   sub_sum(    (    s:   /tmp/pip-install-bGcd2k/NucleoATAC/nucleoatac/Occupancy.pyt
   getNucDistè   s    Gc         C   s?   | |  _  |  j ƒ  |  j ƒ  |  j ƒ  |  j ƒ  |  j ƒ  d S(   s<   proces chunk -- calculat occupancy, get coverage, call peaksN(   R~   R¾   RÀ   RÂ   RÄ   RÈ   (   R   R~   (    (    s:   /tmp/pip-install-bGcd2k/NucleoATAC/nucleoatac/Occupancy.pyt   processñ   s    	



c         C   s1   |  j  j ƒ  } x | D] } t |  | ƒ q Wd S(   s/   remove data from chunk-- deletes all attributesN(   t   __dict__RÉ   t   delattr(   R   t   namest   name(    (    s:   /tmp/pip-install-bGcd2k/NucleoATAC/nucleoatac/Occupancy.pyt
   removeDataù   s    (   Rf   Rg   Rh   R   R¾   RÀ   RÂ   RÄ   RÈ   RÍ   RÎ   RÓ   (    (    (    s:   /tmp/pip-install-bGcd2k/NucleoATAC/nucleoatac/Occupancy.pyRº   Ã   s   									(    (    (    ('   Rh   t   scipyR    R   R   t   numpyR   t   matplotlib.pyplott   pyplotRT   t	   pyximportt   installt   get_includet   pyatac.fragmentsizesR   t   pyatac.tracksR   R   t   pyatac.chunkR   t   pyatac.utilsR	   R
   R   t   pyatac.chunkmat2dR   R   t   pyatac.biasR   R   t   scipy.specialR   R   Rj   R‚   Rƒ   R¥   R²   Rº   (    (    (    s:   /tmp/pip-install-bGcd2k/NucleoATAC/nucleoatac/Occupancy.pyt   <module>   s&    D	