ó
?îec           @   sT   d  Z  d d l Z d d l Z d d d „  ƒ  YZ d „  Z d e f d „  ƒ  YZ d S(	   sw   
General tools for dealing with ATAC-Seq data using Python.

@author: Alicia Schep, Greenleaf Lab, Stanford University
iÿÿÿÿNt   Chunkc           B   sP   e  Z d  Z d d d d „ Z d „  Z d „  Z d d e d „ Z e d	 „ Z RS(
   s<   Class that stores reads for a particular chunk of the genomei   t   regiont   *c         C   s:   | |  _  | |  _ | |  _ | |  _ | |  _ | |  _ d  S(   N(   t   chromt   startt   endt   weightt   strandt   name(   t   selfR   R   R   R   R   R   (    (    s2   /tmp/pip-install-bGcd2k/NucleoATAC/pyatac/chunk.pyt   __init__   s    					c         C   s   |  j  |  j S(   N(   R   R   (   R	   (    (    s2   /tmp/pip-install-bGcd2k/NucleoATAC/pyatac/chunk.pyt   length   s    c      	   C   s@   d j  t t |  j |  j |  j |  j |  j |  j g ƒ ƒ } | S(   s   represent output as beds   	(	   t   joint   mapt   strR   R   R   R   R   R   (   R	   t   out(    (    s2   /tmp/pip-install-bGcd2k/NucleoATAC/pyatac/chunk.pyt   asBed   s    <i    c      
   C   sÈ   |  j  d k rE t d |  j | ƒ } t | |  j |  j | ƒ } n3 t d |  j | ƒ } t | |  j |  j | ƒ } | r² t |  j | | d |  j d |  j d |  j  ƒ} | S| |  _ | |  _ d S(   s3   extend region, checking for chromosomal constraintst   -i    R   R   R   N(	   R   t   maxR   t   minR   R   R    R   R   (   R	   t	   chromDictt   upt   downt   newt   newStartt   newEndR   (    (    s2   /tmp/pip-install-bGcd2k/NucleoATAC/pyatac/chunk.pyt   slop   s     	c      
   C   s¤   |  j  d k r3 |  j |  j ƒ  d } | d } n! |  j |  j ƒ  d } | d } | rŽ t |  j | | d |  j d |  j d |  j  ƒ} | S| |  _ | |  _ d  S(   NR   i   i   R   R   R   (   R   R   R   R   R    R   R   R   (   R	   R   R   R   R   (    (    s2   /tmp/pip-install-bGcd2k/NucleoATAC/pyatac/chunk.pyt   center)   s    
	(	   t   __name__t
   __module__t   __doc__R
   R   R   t   FalseR   R   (    (    (    s2   /tmp/pip-install-bGcd2k/NucleoATAC/pyatac/chunk.pyR       s   		c         C   s`   |  j  | j  k  r d S|  j  | j  k r, d S|  j | j k  rB d S| j | j k rX d Sd Sd S(   s   Compare positions of two chunksiÿÿÿÿi   i    N(   R   R   (   t   chunk1t   chunk2(    (    s2   /tmp/pip-install-bGcd2k/NucleoATAC/pyatac/chunk.pyt   _chunkCompare9   s    t	   ChunkListc        	   B   sÂ   e  Z d  „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d d e d „ Z	 e d d	 „ Z
 d
 „  Z e d d d d d d d d „ ƒ Z e d d d „ ƒ Z d d d „ Z d d d d „ Z RS(   c         G   s   t  j |  | ƒ d  S(   N(   t   listR
   (   R	   t   args(    (    s2   /tmp/pip-install-bGcd2k/NucleoATAC/pyatac/chunk.pyR
   H   s    c         G   s[   t  | ƒ d k r! t d ƒ ‚ n6 t | d t ƒ rK t j |  | d ƒ n t d ƒ ‚ d  S(   Ni   s   Wrong number of argumentsi    s   Expecting ChunkList(   t   lent
   ValueErrort
   isinstanceR#   R$   t   extend(   R	   R%   (    (    s2   /tmp/pip-install-bGcd2k/NucleoATAC/pyatac/chunk.pyR)   J   s
    c         G   s[   t  | ƒ d k r! t d ƒ ‚ n6 t | d t ƒ rK t j |  | d ƒ n t d ƒ ‚ d  S(   Ni   s   Wrong number of argumentsi    s   Expecting Chunk(   R&   R'   R(   R    R$   t   append(   R	   R%   (    (    s2   /tmp/pip-install-bGcd2k/NucleoATAC/pyatac/chunk.pyR*   Q   s
    c         G   sb   t  | ƒ d k r! t d ƒ ‚ n= t | d t ƒ rR t j |  | d | d ƒ n t d ƒ ‚ d  S(   Ni   s   Wrong number of argumentsi    s   Expecting Chunk(   R&   R'   R(   R    R$   t   insert(   R	   R%   (    (    s2   /tmp/pip-install-bGcd2k/NucleoATAC/pyatac/chunk.pyR+   X   s
    c         C   s   t  j |  d t ƒd S(   s   sort regionst   cmpN(   R$   t   sortR"   (   R	   (    (    s2   /tmp/pip-install-bGcd2k/NucleoATAC/pyatac/chunk.pyR-   _   s    c         C   sH   t  g  t t |  ƒ d ƒ D]' } t |  | |  | d ƒ d k ^ q ƒ S(   s   check that regions are sortedi   iÿÿÿÿ(   t   allt   xrangeR&   R"   (   R	   t   i(    (    s2   /tmp/pip-install-bGcd2k/NucleoATAC/pyatac/chunk.pyt   isSortedb   s    i    c         C   sQ   t  ƒ  } x0 |  D]( } | j | j | | | d t ƒƒ q W| rF | S| |  (d  S(   NR   (   R#   R*   R   t   True(   R	   R   R   R   R   R   R0   (    (    s2   /tmp/pip-install-bGcd2k/NucleoATAC/pyatac/chunk.pyR   e   s    	&iÿÿÿÿc         C   sÔ   |  j  ƒ  s |  j ƒ  n  t ƒ  } |  d } xƒ t d t |  ƒ ƒ D]l } |  | j | j k r— |  | j | j | k r— t |  | j | j ƒ | _ qB | j	 | ƒ |  | } qB W| j	 | ƒ | rÉ | S| |  (d S(   s#   Merge overlapping or nearby regionsi    i   N(
   R1   R-   R#   t   rangeR&   R   R   R   R   R*   (   R	   R   t   sepR   t   previousR0   (    (    s2   /tmp/pip-install-bGcd2k/NucleoATAC/pyatac/chunk.pyt   mergem   s    	
0c         C   s/   d } x" |  D] } | | j  ƒ  d 7} q W| S(   s   format regions as bedt    s   
(   R   (   R	   R   t   chunk(    (    s2   /tmp/pip-install-bGcd2k/NucleoATAC/pyatac/chunk.pyR   ~   s    i   s
   FASTA filec         C   s5  |  d d k r% t  j |  d ƒ } n t |  d ƒ } t ƒ  }	 d }
 d } d } | d k	 rd g  } n  xS| D]K} | j d ƒ j d ƒ } | r  | | d }
 n  | r· | | d } n  | rÎ | | d } n  t | d ƒ } t | d ƒ } | d	 } | d k	 r)| | j ƒ  k r)| j | ƒ qk n  | rt| | k  rD| } n  | | | d	 | k rt| | d	 | } qtn  | | | k rk |	 j t	 | d	 | | d
 |
 d | d | ƒƒ qk qk W| j
 ƒ  | d k	 r1t | ƒ d	 k r1t | ƒ } t t | ƒ ƒ d | d d j | ƒ d d } t j | ƒ n  |	 S(   s2   Make a list of chunks from a tab-delimited bedfileiýÿÿÿs   .gzt   rt   +s   
s   	i   i   i    R   R   R   s.    chromosome names in bed file not included in s   :
s   
 s4   These regions will be ignored in subsequent analysisN(   t   gzipt   openR#   t   Nonet   rstript   splitt   intt   keysR*   R    t   closeR&   t   setR   R   t   warningst   warn(   t   bedfilet
   weight_colt
   strand_colt   name_colR   t
   min_offsett
   min_lengtht   chrom_sourcet   infileR   R   R   R   t
   bad_chromst   linet   in_lineR   R   R   t   warn_message(    (    s2   /tmp/pip-install-bGcd2k/NucleoATAC/pyatac/chunk.pyt   read„   sL    		
	
,c            s   t  ˆ j ƒ  ƒ } ˆ d k r; t ‡ ‡ f d †  | Dƒ Œ  St ƒ  } xN | D]F ‰  | j t ‡  ‡ ‡ ‡ f d †  t ˆ ˆ ˆ  ˆ ˆ ƒ Dƒ Œ  ƒ qK W| Sd S(   s3   Convert dictionary of chromosome sizes to Chunklistc         3   s)   |  ] } t  | ˆ ˆ  | ˆ ƒ Vq d  S(   N(   R    (   t   .0R   (   R   t   offset(    s2   /tmp/pip-install-bGcd2k/NucleoATAC/pyatac/chunk.pys	   <genexpr>µ   s    c         3   s6   |  ], } t  ˆ  | t | ˆ ˆ ˆ  ˆ ƒ ƒ Vq d  S(   N(   R    R   (   RS   R0   (   R   R   RT   t	   splitsize(    s2   /tmp/pip-install-bGcd2k/NucleoATAC/pyatac/chunk.pys	   <genexpr>¹   s   N(   t   sortedRA   R=   R#   R)   R/   (   R   RU   RT   t   chrsR   (    (   R   R   RT   RU   s2   /tmp/pip-install-bGcd2k/NucleoATAC/pyatac/chunk.pyt   convertChromSizes°   s    	&c         C   s  | d k	 r· g  } d } d } d } xe t t |  ƒ ƒ D]Q } | |  | j ƒ  7} | | k r7 | j |  | | d !ƒ d } | d } q7 q7 W| | k r³ | j |  | | d !ƒ n  | S| d k	 rý g  t d t |  ƒ | ƒ D] } |  | | | !^ qÜ } | St d ƒ ‚ d S(   s*   splits list of chunks into set of sublistsi    i   s(   Need to provide items or bases argument!N(   R=   R3   R&   R   R*   R/   t	   Exception(   R	   t   basest   itemsR   R0   t   jt   k(    (    s2   /tmp/pip-install-bGcd2k/NucleoATAC/pyatac/chunk.pyR?   ¼   s$    6s   bed files
   fasta files8   Regions on these chromosomes will be ignored in analysisc         C   s»   t  g  |  D] } | j | k r
 | j ^ q
 ƒ } t | ƒ d k r· g  |  D] } | j | k rJ | ^ qJ |  (t t | ƒ ƒ d | d | d d j | ƒ d | } t j | ƒ n  d  S(   Ni    s    chromosome names in s    not included in s   :
s   
s   
 (   RC   R   R&   R   R   RD   RE   (   R	   t   chromst   chunklist_sourceRL   RE   t   xRN   RQ   (    (    s2   /tmp/pip-install-bGcd2k/NucleoATAC/pyatac/chunk.pyt   checkChromsÑ   s
    1);N(   R   R   R
   R)   R*   R+   R-   R1   R   R   R6   R   t   staticmethodR=   RR   RX   R?   Ra   (    (    (    s2   /tmp/pip-install-bGcd2k/NucleoATAC/pyatac/chunk.pyR#   G   s    							*(    (   R   R;   RD   R    R"   R$   R#   (    (    (    s2   /tmp/pip-install-bGcd2k/NucleoATAC/pyatac/chunk.pyt   <module>   s
   .	