
?ec           @   s   d  Z  d d l Z d d l j Z d d l m Z d d l m	 Z	 d d l
 m Z d d l Z e j d i e j   d 6 d d l m Z m Z d d	 l m Z m Z m Z d
 e	 f d     YZ d e f d     YZ d e f d     YZ d S(   sw   
General tools for dealing with ATAC-Seq data using Python.

@author: Alicia Schep, Greenleaf Lab, Stanford University
iN(   t   BedGraphFile(   t   Chunk(   t   smootht
   setup_argst   include_dirs(   t   getInsertionst   getStrandedInsertions(   t   get_sequencet
   seq_to_matt
   complementt   Trackc           B   s   e  Z d  Z d d e d  Z d d d  Z d d d e d  Z d d e	 j
 d d  Z d d  Z d   Z d	 d d
 e d  Z d d d d  Z d d d d d  Z d d e d  Z RS(   s0   Generic class for various types of signal trackst   trackc         C   sq   t  j |  | | | d | | |  _ | d  k r= d  |  _ n0 t |  |  j   k ra | |  _ n t d   d  S(   Nt   names5   Input vals must be of length as set by start and end!(   R   t   __init__t   logt   Nonet   valst   lent   lengtht	   Exception(   t   selft   chromt   startt   endR   R   R   (    (    s3   /tmp/pip-install-bGcd2k/NucleoATAC/pyatac/tracks.pyR      s    	c         C   s\   | r | |  _  n  | r$ | |  _ n  t |  |  j |  j  k rO t d   n  | |  _ d S(   s   Assign values to tracks`   The values being assigned to track do not                     span the start to end of the trackN(   R   R   R   R   R   (   R   R   R   R   (    (    s3   /tmp/pip-install-bGcd2k/NucleoATAC/pyatac/tracks.pyt   assign_track   s    c   
   	   C   s  | d k r |  j } n  | d k r0 |  j } n  | d k rH |  j } n  t |  |  j |  j k r t |  G|  j |  j GHt d   n  d } d } d } x t t |   D] }	 | |	 | k r q t j | |	  r | |	 } q | d k	 ret j |  re| s| d k rN| d j	 t
 t |  j | | |	 | g   d 7} n  | |	 } | |	 } q | |	 } | |	 } q W| d k r| r| d j	 t
 t |  j | | | g   d 7} qnA t j |  s| d j	 t
 t |  j | | | g   d 7} n  | j |  d S(   sz   Write track to output file handle

        If vals are specified use those values
        Othersise use self.vals
        sN   Error! Inconsistency between length of             values and start/end valuesi    t    s   	s   
N(   R   R   R   R   R   R   t   ranget   npt   isnant   joint   mapt   strR   t   write(
   R   t   handleR   R   R   t
   write_zerot
   prev_valuet   start_ranget   outputt   i(    (    s3   /tmp/pip-install-bGcd2k/NucleoATAC/pyatac/tracks.pyt   write_track%   s>    6

52c         C   s   | r | |  _  n  | r$ | |  _ n  | rM |  j  | |  _  |  j | |  _ n  t |  } | j |  j |  j  |  j d | |  _ | j   d S(   s)   Read track values from BigWig file handlet   emptyN(   R   R   R    t   readR   R   t   close(   R   t   bedgraphR   R   R(   t   flankR!   (    (    s3   /tmp/pip-install-bGcd2k/NucleoATAC/pyatac/tracks.pyt
   read_trackK   s    i   c         C   s=   |  j  r d GHn  |  j | } t j  |  |  _ t |  _  d S(   sD   Log values.  Add psuedo count so values don't equal 0 before loggings&   Logging a track that is already log...N(   R   R   R   t   True(   R   t   pseudot   adjusted(    (    s3   /tmp/pip-install-bGcd2k/NucleoATAC/pyatac/tracks.pyR   X   s
    	c         C   s3   |  j  s d GHn  t j |  j  |  _ t |  _  d S(   s   Take exponent of valuess(   taking exponent of a non-logged track...N(   R   R   t   expR   t   False(   R   (    (    s3   /tmp/pip-install-bGcd2k/NucleoATAC/pyatac/tracks.pyR1   _   s    	t   flatt   validc         C   sq   t  |  _ t |  j | d | d | d | d | |  _ | d k rm |  j | d |  _ |  j | d |  _ n  d S(   s    smoothing of trackt   windowt   sdt   modet   normR4   i   N(   R.   t   smoothedR   R   R   R   (   R   t
   window_lenR5   R6   R7   R8   (    (    s3   /tmp/pip-install-bGcd2k/NucleoATAC/pyatac/tracks.pyt   smooth_tracke   s    	c         C   s   | r5 y |  j  | |  j SWq t d   q Xnr | d k rM |  j } n  | d k re |  j } n  | |  j } | |  j } y |  j  | | !SWn t d   n Xd S(   s8   Obtain value of track at particular interval or positions-   Looks like position given doesn't match tracksW   Looks like dimensions from get probaby don't match track, or there are no vals in trackN(   R   R   R   R   R   (   R   R   R   t   post   x1t   x2(    (    s3   /tmp/pip-install-bGcd2k/NucleoATAC/pyatac/tracks.pyt   getn   s    c   	      C   s$  | d k r |  j } n  | d k r0 |  j } n  | d k rH |  j } n  |  j | |  } t j   } t j t | |  |  t j	 |  j
  t j |  | r| j |  t j |  d j | j d  d  d g  } t j t | |  | f  } t j | | d d n
 | j   d S(   s   plot the values t   .it   txtt	   delimiters   	N(   R   R   R   R   R?   t   pltt   figuret   plotR   t   xlabelR   t   ylabelt   savefigR*   R   t   splitR   t   vstackt   savetxtt   show(	   R   R   R   R   t   filenamet   valuest   figt	   filename2t   out(    (    s3   /tmp/pip-install-bGcd2k/NucleoATAC/pyatac/tracks.pyRE      s$    #i    c      	   C   sG   |  j  d k r7 t j |  | d | d | d | } | St d   d S(   sD   Modification of slop method for Chunk class to check if vals are sett   upt   downt   news!   Cannot slop Track if vals are setN(   R   R   R   t   slopR   (   R   t	   chromDictRR   RS   RT   RQ   (    (    s3   /tmp/pip-install-bGcd2k/NucleoATAC/pyatac/tracks.pyRU      s    $N(   t   __name__t
   __module__t   __doc__R   R2   R   R   R.   R'   R   t   nanR-   R   R1   R;   R?   RE   RU   (    (    (    s3   /tmp/pip-install-bGcd2k/NucleoATAC/pyatac/tracks.pyR
      s   	
&	t   InsertionTrackc           B   s}   e  Z d  Z d   Z d d d e d  Z d d d e d  Z d d d d	 g d
 d
 d  Z d d d d	 g d
 d
 d  Z RS(   s1   Class for getting and storing insertion positionsc         C   s   t  j |  | | | d  d  S(   Nt
   insertions(   R
   R   (   R   R   R   R   (    (    s3   /tmp/pip-install-bGcd2k/NucleoATAC/pyatac/tracks.pyR      s    i    i  c         C   sN   |  j  | |  _  |  j | |  _ t | |  j |  j  |  j | | |  |  _ d S(   s   Compute inserion trackN(   R   R   R   R   R   (   R   t   bamfileR,   t   lowert   uppert   atac(    (    s3   /tmp/pip-install-bGcd2k/NucleoATAC/pyatac/tracks.pyt   calculateInsertions   s    c         C   sj   |  j  | |  _  |  j | |  _ t | |  j |  j  |  j | | |  \ |  _ |  _ |  j |  j |  _ d S(   s<   Compute inserion track for plus and minus strands separatelyN(   R   R   R   R   t   plust   minusR   (   R   R]   R,   R^   R_   R`   (    (    s3   /tmp/pip-install-bGcd2k/NucleoATAC/pyatac/tracks.pyt   calculateStrandedInsertions   s    3t   Ct   Gt   At   Ti
   c         C   s   t  j t |  | | d f  } t  j |  j  d k r? | St | |  } t |  j |  j | |  j	 |  } t
 | |  } t | |  }	 xY t |  j    D]E }
 | |  j |
 |	 d d  | |
 | | |
 | d  f 7} q W| S(   s"   Get sequence content at insertionsi   i    N(   R   t   zerosR   t   sumR   t   maxR   R   R   R   R   R   R   R   (   R   t   fastat   nucleotidesRR   RS   t   matt   offsett	   seq_chunkt   sequencet   seq_matR&   (    (    s3   /tmp/pip-install-bGcd2k/NucleoATAC/pyatac/tracks.pyt   getInsertionSequences   s    ##Cc      	   C   sR  t  j t |  | | d f  } t  j |  j  d k r? | St | |  } t |  j |  j | |  j	 |  } t
 | |  } t |  }	 t | |  }
 t |	 |  } x t |  j    D] } | |  j | |
 d d  | | | | | | d  f 7} | |  j | t  j | d d  | | | | | | d  f  7} q W| S(   s>   Get sequence content at insertions, taking into account strandi   i    N(   R   Ri   R   Rj   R   Rk   R   R   R   R   R   R	   R   R   R   Rb   Rc   t   fliplr(   R   Rl   Rm   RR   RS   Rn   Ro   Rp   Rq   t   minus_sequenceRr   t   minus_seq_matR&   (    (    s3   /tmp/pip-install-bGcd2k/NucleoATAC/pyatac/tracks.pyt   getStrandedInsertionSequences   s    ##?L(	   RW   RX   RY   R   R.   Ra   Rd   Rs   Rw   (    (    (    s3   /tmp/pip-install-bGcd2k/NucleoATAC/pyatac/tracks.pyR[      s   	t   CoverageTrackc           B   s)   e  Z d  Z d   Z d   Z d   Z RS(   s)   Class for computing read center converagec         C   s   t  j |  | | | d  d  S(   Nt   coverage(   R
   R   (   R   R   R   R   (    (    s3   /tmp/pip-install-bGcd2k/NucleoATAC/pyatac/tracks.pyR      s    c      	   C   s   |  j  | j  | d } | d k  r3 t d   n  | | j } | | j } | d k r t j | j | |  | |  f d d } n% t j | j | |  f d d } t | | d d d d d	 t |  _ d
 S(   s6   Compute coverage of fragment centers using flat windowi   i    sa   Insufficient flanking region on                     mat to calculate coverage with desired windowt   axisR5   R3   R7   R4   R8   N(	   R   R   R^   R   Rj   Rn   R   R2   R   (   R   Rn   R^   R_   R:   Ro   t	   collapsed(    (    s3   /tmp/pip-install-bGcd2k/NucleoATAC/pyatac/tracks.pyt   calculateCoverage   s    2%c         C   s   |  j  | j  | d } | d k  r3 t d   n  | | j } | | j } | d k r t j | j | |  | |  f d d } n% t j | j | |  f d d } t | | d | d d d d	 d
 t |  _ d S(   s9   Compute coverage of fragment centers using gaussia windowi   i    sa   Insufficient flanking region on                     mat to calculate coverage with desired windowRz   R6   R5   t   gaussianR7   R4   R8   N(	   R   R   R^   R   Rj   Rn   R   R2   R   (   R   Rn   R^   R_   R:   R6   Ro   R{   (    (    s3   /tmp/pip-install-bGcd2k/NucleoATAC/pyatac/tracks.pyt   calculateCoverageSmooth   s    2%(   RW   RX   RY   R   R|   R~   (    (    (    s3   /tmp/pip-install-bGcd2k/NucleoATAC/pyatac/tracks.pyRx      s   		(   RY   t   numpyR   t   matplotlib.pyplott   pyplotRC   t   pyatac.bedgraphR    t   pyatac.chunkR   t   pyatac.utilsR   t	   pyximportt   installt   get_includet	   fragmentsR   R   t
   pyatac.seqR   R   R	   R
   R[   Rx   (    (    (    s3   /tmp/pip-install-bGcd2k/NucleoATAC/pyatac/tracks.pyt   <module>   s    -