ó
?îec           @   sħ   d  Z  d d l Z d d l Z d d l m Z d d l Z d d l Z d d l m Z d   Z	 d d d e d  Z d	   Z d
 d d d d  Z d   Z d   Z d   Z d S(   sw   
General tools for dealing with ATAC-Seq data using Python.

@author: Alicia Schep, Greenleaf Lab, Stanford University
i˙˙˙˙N(   t   signal(   t   copyc         C   s   t  j |  d t } | S(   s   Conduct shell command.t   shell(   t
   subprocesst   check_outputt   True(   t   cmdt   output(    (    s2   /tmp/pip-install-bGcd2k/NucleoATAC/pyatac/utils.pyt   shell_command   s    t   flatt   validc         C   sN  | d
 k r t  d   n  | d d k rE t j d  | d 7} n  | d k rn | d k rn | d d } n  | d k r t j | |  } n  | d k r­ t j |  } n  t |   } d | t j	 |   <t j
 | | d	 | } | rJt j t |    }	 d |	 t j	 |   <t j
 | |	 d	 | }
 t j |
 |
 d k <| |
 } n  | S(   s	  smoothes input signal using either flat or gaussian window

    options for window are "flat" and "gaussian"
    if window is "gaussian", sd should be provided.
    for guassian default sd is (window_len-1)/6
    norm means whether window should integrate to 1
    R	   t   gaussians=   Incorrect window input for smooth. Options are flat, gaussiani   i   s;   Window length is even number.  Needs to be odd so adding 1.g      @i    t   mode(   R	   R   N(   t	   Exceptiont   warningst   warnt   NoneR    R   t   npt   onesR   t   isnant   convolvet   lent   nan(   t   sigt
   window_lent   windowt   sdR   t   normt   wt	   sig_nonant   smoothedt   norm_sigt   smoothed_norm(    (    s2   /tmp/pip-install-bGcd2k/NucleoATAC/pyatac/utils.pyt   smooth   s*    	c   	      C   s1  t  j |  j  } t  j |  j  } t  j |  } |  j d } xà | d k r"| | } | d 7} | | d k rC d | | <d | | <| d } x< | d k rÏ |  | |  | | k  rÏ d | | <| d 7} q W| d } xB | |  j k  r|  | |  | | k  rd | | <| d 7} qŬ WqC qC W|  | d k S(   sŜ   Greedy algorithm for taking peaks and turning to set with at least sep distance
        between peaks.  First include peak with highest sig value, then next greatest
        not within sep distance from that one, and so oni   i    i˙˙˙˙(   R   t   zerost   sizet   argsort(	   t   peaksR   t   sept   excludet   keept   stt   jt   indt   k(    (    s2   /tmp/pip-install-bGcd2k/NucleoATAC/pyatac/utils.pyt   reduce_peaks8   s&    




'

*
i    ix   i   c   
      C   s5  t  t j |    d k ry t  t j |    t |   k rI t j g   St |  t j |    } | |  t j |   <n  | d	 k r | d } n  t j j d d  } t |   } t	 j
 |  d | j d d
 |  d | d } | |  | | k } | | | k } | | | | k  } |  | }	 t | |	 |  S(   sı   Greedy algorithm for peak calling-- first call all local maxima,
    then call greatest maxima as a peak, then next greatest that isn't within
    'sep' distance of that peak, and so oni    i   t   seedi   i   i
   iô˙˙˙t   orderNgê-q=(   t   sumR   R   R   t   arrayt   minR   t   randomt   RandomStateR    t	   argrelmaxt   uniformR-   (
   t   sigvalst
   min_signalR&   t   boundaryR/   t   replaceR3   t   lR%   R   (    (    s2   /tmp/pip-install-bGcd2k/NucleoATAC/pyatac/utils.pyt
   call_peaksR   s     !$
c         C   sj   i  } t  j |   } | j } | j } | j   x2 t t |   D] } t | |  | | | <qD W| S(   s/   get chromosome size information from fasta file(   t   pysamt	   FastaFilet
   referencest   lengthst   closet   rangeR   t   int(   t	   fastafilet   outt   fastat	   chr_namest   chr_lengthst   i(    (    s2   /tmp/pip-install-bGcd2k/NucleoATAC/pyatac/utils.pyt   read_chrom_sizes_from_fastah   s    		
c         C   sm   i  } t  j |  d  } | j } | j } | j   x2 t t |   D] } t | |  | | | <qG W| S(   s,   get chromosome size information from bamfilet   rb(   R=   t   SamfileR@   R?   RA   RB   R   RC   (   t   bamfileRE   t   bamRH   RG   RI   (    (    s2   /tmp/pip-install-bGcd2k/NucleoATAC/pyatac/utils.pyt   read_chrom_sizes_from_bamt   s    		
c         C   sZ   i  } t  |  d  } x> | D]6 } | j d  j d  } t | d  | | d <q W| S(   s:   get chromosome size information from chromosome sizes filet   rs   
s   	i   i    (   t   opent   rstript   splitRC   (   t	   sizesFileRE   t   ft   linet   keys(    (    s2   /tmp/pip-install-bGcd2k/NucleoATAC/pyatac/utils.pyt   read_chrom_sizes   s    (   t   __doc__R   t   numpyR   t   scipyR    R=   R   R   R   R   R   R!   R-   R<   RJ   RO   RX   (    (    (    s2   /tmp/pip-install-bGcd2k/NucleoATAC/pyatac/utils.pyt   <module>   s   		 			