ó
?îec           @   sv   d  Z  d d l Z d d l Z d d l Z d „  Z e j d d ƒ Z d „  Z d „  Z	 d „  Z
 d	 „  Z d
 „  Z d S(   sw   
General tools for dealing with ATAC-Seq data using Python.

@author: Alicia Schep, Greenleaf Lab, Stanford University
iÿÿÿÿNc         C   s_   t  j | ƒ } | j |  j |  j |  j ƒ } |  j d k rK t | ƒ } n  | j ƒ  | j	 ƒ  S(   s©   obtain sequence for an interval

        chunk:  chunk object for which sequenceuence is to be fetched
        fastafile: filename for fasta file with sequenceuence
    t   -(
   t   pysamt	   FastaFilet   fetcht   chromt   startt   endt   strandt   reverse_complementt   closet   upper(   t   chunkt	   fastafilet   handlet   sequence(    (    s0   /tmp/pip-install-bGcd2k/NucleoATAC/pyatac/seq.pyt   get_sequence   s    
t   ACGTt   TGCAc         C   s   |  j  t ƒ S(   s#   Get complement of DNA sequenceuence(   t	   translatet   DNA_Translation(   R   (    (    s0   /tmp/pip-install-bGcd2k/NucleoATAC/pyatac/seq.pyt
   complement   s    c         C   s   t  |  d d d … ƒ S(   s+   Get reverse complement of DNA sequenceuenceNiÿÿÿÿ(   R   (   R   (    (    s0   /tmp/pip-install-bGcd2k/NucleoATAC/pyatac/seq.pyR       s    c      
      sæ   t  | d ƒ ‰  t ‡  f d †  t t  | ƒ Dƒ ƒ sD t d ƒ ‚ n  t j t  | ƒ t  |  ƒ ˆ  d f ƒ } xr t t  | ƒ ƒ D]^ } t j t t g  t t  |  ƒ ˆ  d ƒ D]! } |  | | ˆ  !| | k ^ q­ ƒ ƒ | | <q€ W| S(   s'   Turn sequenceuence into matrix encodingi    c         3   s   |  ] } | ˆ  k Vq d  S(   N(    (   t   .0t   x(   t   l(    s0   /tmp/pip-install-bGcd2k/NucleoATAC/pyatac/seq.pys	   <genexpr>(   s    si   Usage Error! Nucleotides must all be of same length! No mixing single nucleotides with dinucleotides, etci   (	   t   lent   allt   mapt	   Exceptiont   npt   zerost   ranget   arrayt   int(   R   t   nucleotidest   matt   it   j(    (   R   s0   /tmp/pip-install-bGcd2k/NucleoATAC/pyatac/seq.pyt
   seq_to_mat%   s    %)\c         C   s«   t  j t | ƒ ƒ } d } t |  d ƒ } xl | D]d } | d d k r1 | j d ƒ j ƒ  } | g  | D] } | j | ƒ ^ qf 7} | t | ƒ 7} q1 q1 W| j ƒ  | | S(   s%   Get genomewide nucleotide frequenciesg        t   ri    t   >s   
(   R   R   R   t   opent   rstripR
   t   countR	   (   t   fastaR!   t   outt   nt   ft   lineR   R#   (    (    s0   /tmp/pip-install-bGcd2k/NucleoATAC/pyatac/seq.pyt   getNucFreqs/   s    &
c   	      C   s­   t  j t | ƒ ƒ } d } t j | ƒ } xn |  D]f } | j | j | j | j ƒ } | j	 ƒ  } | g  | D] } | j
 | ƒ ^ qk 7} | t | ƒ 7} q1 W| j ƒ  | | S(   s2   Get nucleotide frequences within regions of genomeg        (   R   R   R   R   R   R   R   R   R   R
   R*   R	   (	   t   chunksR+   R!   R,   R-   R   R   R   R#   (    (    s0   /tmp/pip-install-bGcd2k/NucleoATAC/pyatac/seq.pyt   getNucFreqsFromChunkList=   s    &
(   t   __doc__t   stringt   numpyR   R   R   t	   maketransR   R   R   R%   R0   R2   (    (    (    s0   /tmp/pip-install-bGcd2k/NucleoATAC/pyatac/seq.pyt   <module>   s   				
	