ó
¦s§]c           @   s	  d  d l  Z  d  d l Z d  d l m Z d  d l Td  d l Td e f d „  ƒ  YZ d e f d „  ƒ  YZ	 d e f d	 „  ƒ  YZ
 d d d d d
 d „ Z d d d „ Z d d d „ Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d d d d „ Z d „  Z d S(   iÿÿÿÿN(   t   Tracker(   t   *t   ChromParametersc           B   s;   e  Z d  Z d „  Z d „  Z d „  Z d „  Z d „  Z RS(   s9   Basic information on chromosome, inferred from input filec         C   sg   | d  k	 r t | ƒ |  _ n  | d  k	 r< t | ƒ |  _ n  | d  k	 rZ t | ƒ |  _ n  | |  _ d  S(   N(   t   Nonet   intt   minPost   maxPost   rest   name(   t   selfR   R   R   R   (    (    sm   /oak/stanford/groups/akundaje/marinovg/Symbiodinium/2019-09-27-figures/multi_mds/miniMDS-master/data_tools.pyt   __init__   s    c         C   s   t  |  j |  j |  j ƒ d S(   s   Number of possible locii   (   R   R   R   R   (   R	   (    (    sm   /oak/stanford/groups/akundaje/marinovg/Symbiodinium/2019-09-27-figures/multi_mds/miniMDS-master/data_tools.pyt	   getLength   s    c         C   sE   | |  j  k  s% | |  j |  j k r) d St | |  j  |  j ƒ Sd S(   s`   Converts genomic coordinate into absolute index. Absolute indexing includes empty (zero) points.N(   R   R   R   R   R   (   R	   t   genCoord(    (    sm   /oak/stanford/groups/akundaje/marinovg/Symbiodinium/2019-09-27-figures/multi_mds/miniMDS-master/data_tools.pyt   getAbsoluteIndex   s    %c         C   s   |  j  |  j | S(   s/   Converts absolute index into genomic coordinate(   R   R   (   R	   t	   abs_index(    (    sm   /oak/stanford/groups/akundaje/marinovg/Symbiodinium/2019-09-27-figures/multi_mds/miniMDS-master/data_tools.pyt   getGenCoord   s    c         C   sE   |  j  | } |  j | | } |  j | | } t | | | |  j ƒ S(   s*   Creates low-res version of this chromosome(   R   R   R   R   R   (   R	   t   resRatiot   lowRest	   lowMinPost	   lowMaxPos(    (    sm   /oak/stanford/groups/akundaje/marinovg/Symbiodinium/2019-09-27-figures/multi_mds/miniMDS-master/data_tools.pyt	   reduceRes#   s    (   t   __name__t
   __module__t   __doc__R
   R   R   R   R   (    (    (    sm   /oak/stanford/groups/akundaje/marinovg/Symbiodinium/2019-09-27-figures/multi_mds/miniMDS-master/data_tools.pyR   	   s   					t	   Structurec           B   s•   e  Z d  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z	 d „  Z
 d	 „  Z d
 „  Z d „  Z d „  Z d „  Z d „  Z d „  Z RS(   sB   Intrachromosomal structure of points or substructures in 3-D spacec         C   sk   | |  _  t | ƒ d k s' | d  k r3 g  |  _ n |  j | ƒ | |  _ | d  k	 rg t | ƒ |  _ n  d  S(   Ni    (   t   pointst   lenR   t
   structurest   setstructurest   chromR   t   offset(   R	   R   R   R   R   (    (    sm   /oak/stanford/groups/akundaje/marinovg/Symbiodinium/2019-09-27-figures/multi_mds/miniMDS-master/data_tools.pyR
   ,   s    		c         C   s    g  |  j  ƒ  D] } | j ^ q S(   N(   t	   getPointst   pos(   R	   t   point(    (    sm   /oak/stanford/groups/akundaje/marinovg/Symbiodinium/2019-09-27-figures/multi_mds/miniMDS-master/data_tools.pyt	   getCoords6   s    c         C   sA   x: t  | |  j ƒ  ƒ D]# \ } } | |  j | |  j _ q Wd  S(   N(   t   zipt   nonzero_abs_indicesR   R   R    (   R	   t   coordst   coordR   (    (    sm   /oak/stanford/groups/akundaje/marinovg/Symbiodinium/2019-09-27-figures/multi_mds/miniMDS-master/data_tools.pyt	   setCoords9   s    "c         C   s)   t  j g  |  j ƒ  D] } | j ^ q ƒ S(   s)   Absolute indices for all non-zero points.(   t   npt   arrayR   t   absolute_index(   R	   R!   (    (    sm   /oak/stanford/groups/akundaje/marinovg/Symbiodinium/2019-09-27-figures/multi_mds/miniMDS-master/data_tools.pyR$   =   s    c         C   s$   |  j  ƒ  t |  j j |  j j ƒ S(   sV   Nonzero bin numbers with indexing relative to chromosome position 0 (not chrom.minPos)(   R$   R   R   R   R   (   R	   (    (    sm   /oak/stanford/groups/akundaje/marinovg/Symbiodinium/2019-09-27-figures/multi_mds/miniMDS-master/data_tools.pyt   nonzero_bins_whole_chromA   s    c         C   s!   |  j  t j |  j  d k ƒ d S(   s   All non-zero pointsi    (   R   R(   t   where(   R	   (    (    sm   /oak/stanford/groups/akundaje/marinovg/Symbiodinium/2019-09-27-figures/multi_mds/miniMDS-master/data_tools.pyR   E   s    c         C   sŒ   |  j  | | d !} |  j j | ƒ |  j _ |  j j | ƒ |  j _ x. t j | d k ƒ d D] } | | | _ q^ W| |  _  |  j ƒ  d S(   sL   Set structure's points to only include start_abs_index through end_abs_indexi   i    N(	   R   R   R   R   R   R(   R,   R*   t   set_rel_indices(   R	   t   start_abs_indext   end_abs_indexR   R   (    (    sm   /oak/stanford/groups/akundaje/marinovg/Symbiodinium/2019-09-27-figures/multi_mds/miniMDS-master/data_tools.pyt   subsamplePointsI   s     	c         C   s)   g  |  j  ƒ  D] } |  j j | ƒ ^ q S(   s)   Non-zero genomic coordinates of structure(   R$   R   R   (   R	   R   (    (    sm   /oak/stanford/groups/akundaje/marinovg/Symbiodinium/2019-09-27-figures/multi_mds/miniMDS-master/data_tools.pyt   getGenCoordsT   s    c         C   s   |  j  j | ƒ } | d k r" d S| |  j 8} | d k rw | t |  j ƒ k  rw |  j | } | d k rm d S| j Sn d Sd S(   s0   Converts genomic coordinate into relative index.i    N(   R   R   R   R   R   R   t   relative_index(   R	   R   R   R!   (    (    sm   /oak/stanford/groups/akundaje/marinovg/Symbiodinium/2019-09-27-figures/multi_mds/miniMDS-master/data_tools.pyt   get_rel_indexX   s    !
c         C   s˜   | |  _  t j t g  | D] } t | j ƒ  ƒ ^ q ƒ d d t j ƒ|  _ xD |  j  D]9 } x0 | j D]% } | d k rg | |  j | j <qg qg WqW Wd  S(   Ni   t   dtypei    (   R   R(   t   zerost   maxR$   t   objectR   R*   (   R	   R   t	   structureR!   (    (    sm   /oak/stanford/groups/akundaje/marinovg/Symbiodinium/2019-09-27-figures/multi_mds/miniMDS-master/data_tools.pyR   h   s    	Dc         C   s,   t  | g  |  j | ƒ } |  j j | ƒ d S(   s&   Creates substructure containing pointsN(   R   R   R   t   append(   R	   R   R   t   substructure(    (    sm   /oak/stanford/groups/akundaje/marinovg/Symbiodinium/2019-09-27-figures/multi_mds/miniMDS-master/data_tools.pyt   createSubstructurep   s    c         C   sã   | d k r' t j t j d ƒ ƒ } n  | d k rQ t j t j d ƒ ƒ j } n  t j |  j ƒ  ƒ } t | ƒ } t j | | j t j	 | d | f ƒ j ƒ } x; t
 |  j ƒ  ƒ D]' \ } } | | |  j | |  j _ q´ Wd S(   s   Rotates by r; translates by ti   i   N(   R   R(   t   matt   identityR5   t   TR"   R   R)   t   tilet	   enumerateR$   R   R   R    (   R	   t   rt   tt   at   nt   a_transformedt   iR   (    (    sm   /oak/stanford/groups/akundaje/marinovg/Symbiodinium/2019-09-27-figures/multi_mds/miniMDS-master/data_tools.pyt	   transformv   s    /c         C   s,  t  | d ƒ } | j |  j j d ƒ | j t |  j j ƒ d ƒ | j t |  j j ƒ d ƒ |  j } x© |  j D]ž } | d k r· | j d j	 t | ƒ d d d f ƒ d ƒ nS | j d j	 t | ƒ t | j
 d ƒ t | j
 d ƒ t | j
 d ƒ f ƒ d ƒ | d 7} qv WWd  QX| j ƒ  d  S(   Nt   ws   
i    s   	t   nani   i   (   t   opent   writeR   R   t   strR   R   R   R   t   joinR    t   close(   R	   t   outpatht   outR   R!   (    (    sm   /oak/stanford/groups/akundaje/marinovg/Symbiodinium/2019-09-27-figures/multi_mds/miniMDS-master/data_tools.pyRK   ‚   s    	/Sc         C   sS   xL t  |  j ƒ  ƒ D]8 \ } } | |  j k s4 t ‚ | |  j | |  j _ q Wd S(   s;   Relative indexing is index relative to non-zero points onlyN(   R@   R$   R   t   AssertionErrorR   R2   (   R	   RF   R   (    (    sm   /oak/stanford/groups/akundaje/marinovg/Symbiodinium/2019-09-27-figures/multi_mds/miniMDS-master/data_tools.pyR-      s    c         C   sv   t  |  ƒ } xc t |  j ƒ D]R \ } } | d k r | j \ } } } | | | | | | f |  j | _ q q Wd S(   s,   Rescale radius of gyration of structure to 1i    N(   t   radius_of_gyrationR@   R   R    (   R	   t   rgRF   R!   t   xt   yt   z(    (    sm   /oak/stanford/groups/akundaje/marinovg/Symbiodinium/2019-09-27-figures/multi_mds/miniMDS-master/data_tools.pyt   rescale–   s
    (   R   R   R   R
   R"   R'   R$   R+   R   R0   R1   R3   R   R;   RG   RK   R-   RW   (    (    (    sm   /oak/stanford/groups/akundaje/marinovg/Symbiodinium/2019-09-27-figures/multi_mds/miniMDS-master/data_tools.pyR   *   s    	
													t   Pointc           B   s   e  Z d  Z d „  Z RS(   s   Point in 3-D spacec         C   sR   | |  _  | |  _ | d  k	 r0 t | ƒ |  _ n  | d  k	 rN t | ƒ |  _ n  d  S(   N(   R    R   R   R   R*   R2   (   R	   R    R   R*   R2   (    (    sm   /oak/stanford/groups/akundaje/marinovg/Symbiodinium/2019-09-27-figures/multi_mds/miniMDS-master/data_tools.pyR
       s    		(   R   R   R   R
   (    (    (    sm   /oak/stanford/groups/akundaje/marinovg/Symbiodinium/2019-09-27-figures/multi_mds/miniMDS-master/data_tools.pyRX   ž   s   i    c      
   C   sú  | d k r t |  ƒ } n  | d k r3 | j } n  | d k rK | j } n  t g  g  | | ƒ } t j t | | | j ƒ d d t	 ƒ| _
 | d k	 r« t d | ƒ } n  t |  ƒ 3} x| D]}	 |	 j ƒ  j ƒ  }	 t |	 d ƒ }
 t |	 d ƒ } |
 | k r¿|
 | k r¿| | k r¿| | k r¿| j j |
 ƒ } | j j | ƒ } | | k r¿t d | j | d ƒ | j
 t |
 | | j ƒ <t d | j | d ƒ | j
 t | | | j ƒ <q¿n  | d k	 rÁ | j ƒ  qÁ qÁ W| j ƒ  Wd QX| j ƒ  | S(	   s5   Initializes structure from intrachromosomal BED file.i   R4   s   Identifying locii   i    N(   i    i    i    (   i    i    i    (   R   t   chromFromBedR   R   R   R(   R5   R   R   R7   R   R    RJ   t   stript   splitR   R   RX   t	   incrementRN   R-   (   t   patht   sizeR   t   startt   endR   R8   t   trackert   listFilet   linet   pos1t   pos2t
   abs_index1t
   abs_index2(    (    sm   /oak/stanford/groups/akundaje/marinovg/Symbiodinium/2019-09-27-figures/multi_mds/miniMDS-master/data_tools.pyt   structureFromBed¨   s4    -006
c         C   s›  t  j j } d } d j |  ƒ GHt |  ƒ } x t | ƒ D]ò \ } } | j ƒ  j ƒ  } | d k sr | d k rt	 | d ƒ } t	 | d ƒ }	 | d k rÈ t
 | |	 f ƒ }
 |
 | k  rÈ |
 } qÈ n  | d k rt | |	 f ƒ } | | k rþ | } qþ qn  | d k r< | d } t	 | d ƒ | } q< q< W| j ƒ  Wd QXt	 t j t | ƒ | ƒ ƒ | } t	 t j t | ƒ | ƒ ƒ | } t | | | | ƒ S(   s?   Initialize ChromParams from intrachromosomal file in BED formati    s   Scanning {}i   i   i   N(   t   syst
   float_infoR6   t   formatRJ   R@   RZ   R[   R   R   t   minRN   R(   t   floort   floatt   ceilR   (   R]   R   R   t   overall_minPost   overall_maxPost   infileRF   Rc   Rd   Re   t   curr_minPost   curr_maxPosR   R   (    (    sm   /oak/stanford/groups/akundaje/marinovg/Symbiodinium/2019-09-27-figures/multi_mds/miniMDS-master/data_tools.pyRY   Í   s0    
##c      	   C   s  | d k r t |  | ƒ } n  | j ƒ  } t | ƒ } t j | | f ƒ } | d k	 ri t d | ƒ } n  t |  ƒ â } xÎ | D]Æ } | j ƒ  j	 ƒ  } t
 | d ƒ }	 t
 | d ƒ }
 | j |	 ƒ } | j |
 ƒ } | d k	 r,| d k	 r,t | d ƒ } | | | f c | 7<| | | f c | 7<n  | d k	 r | j ƒ  q q W| j ƒ  Wd QXt j g  | D] } t | ƒ ^ qfƒ } t t j | d k ƒ d ƒ d k rÔt j | j ƒ  ƒ t j | d k ƒ d GHn  t t j | d k ƒ d ƒ d k sÿt ‚ | S(   s=   Converts BED file to matrix. Only includes loci in structure.s   Filling matrixi   i   i   Ni    (   R   Rh   R$   R   R(   R5   R    RJ   RZ   R[   R   R3   Rn   R\   RN   R)   t   sumR,   R1   RQ   (   R]   R^   R8   t   abs_indicest	   numpointsR<   Ra   Rr   Rc   t   loc1t   loc2t   index1t   index2t   valt   rowt   rowsums(    (    sm   /oak/stanford/groups/akundaje/marinovg/Symbiodinium/2019-09-27-figures/multi_mds/miniMDS-master/data_tools.pyt
   matFromBedè   s4    (%.+c         C   sj  |  j  j | ƒ } t t |  j ƒ | ƒ d } t t j | d t j ƒg  | |  j	 | ƒ } g  t
 | ƒ D] } g  ^ qj } xK |  j ƒ  D]= } g  } | j |  j	 }	 t |	 | ƒ }
 | |
 j | ƒ q‰ W| j	 } x t | ƒ D]‚ \ } } t | ƒ d k rà t j t j g  | D] } | j ^ qƒ d d ƒ} t | | | | j	 | ƒ | j | <| d 7} qà qà W| S(   s   Reduces resolution of structurei   R4   i    t   axis(   R   R   R   R   R   R   R(   R5   R7   R   t   rangeR   R*   R9   R@   t   meanR)   R    RX   (   t   highstructureR   t   lowChromt   low_nt   lowstructureRF   t   allPointsToMerget	   highPointt   pointsToMerget   high_abs_indext   low_abs_indext   indexR!   t	   meanCoord(    (    sm   /oak/stanford/groups/akundaje/marinovg/Symbiodinium/2019-09-27-figures/multi_mds/miniMDS-master/data_tools.pyt	   highToLow  s     .	4#c      	   C   s¡  t  } t |  ƒ T} | j ƒ  j ƒ  } t | j ƒ  j ƒ  ƒ } t | j ƒ  j ƒ  ƒ } t | d  | | ƒ } t g  g  | d ƒ } d } xÎ | rW| j ƒ  j ƒ  j ƒ  }	 t	 |	 ƒ d k rÃ t
 } qŠ t |	 d ƒ }
 |	 d d k rì d } nX t |	 d ƒ } t |	 d ƒ } t |	 d ƒ } t | | | f | |
 | ƒ } | d 7} | j j | ƒ qŠ W| j ƒ  Wd  QXt j | j ƒ | _ | j j | j j |
 | j _ | S(   Ni    i   RI   i   i   (   t   TrueRJ   t   readlineRZ   R   R   R   R   R[   R   t   FalseRn   RX   R   R9   RN   R(   R)   R   R   R   R   (   R]   t   hasMoreRr   R   R   R   R   R8   RŒ   Rc   t   numR!   RT   RU   RV   (    (    sm   /oak/stanford/groups/akundaje/marinovg/Symbiodinium/2019-09-27-figures/multi_mds/miniMDS-master/data_tools.pyt   structure_from_file$  s2    			
 c         C   s¥  i  } xZ t  |  ƒ D]L \ } } x= | j ƒ  D]/ } | | k rQ | | c d 7<q, d | | <q, Wq Wg  } t |  ƒ } x4 | j ƒ  D]& } | | | k r‚ | j | ƒ q‚ q‚ Wt j | ƒ } xã |  D]Û } t | d | d | j j	 | j j	 | j j
 ƒ } t j | j ƒ  d t ƒ} xr t  | ƒ D]d \ } } | j j | ƒ }	 | j | ƒ }
 | j |	 | j j } t | | |
 | ƒ | |
 | j <q#W| | _ | | _ qÂ Wd S(   s/   Enforce that points be shared by all structuresi   i    iÿÿÿÿR4   N(   R@   R1   R   t   keysR9   R(   t   sortR   R   R   R   R5   R   R7   R   R   R   R    RX   (   R   t   gen_coord_dictRF   R8   t	   gen_coordt	   consensusRD   t	   new_chromt
   new_pointst   old_abs_indext   new_abs_indexR    (    (    sm   /oak/stanford/groups/akundaje/marinovg/Symbiodinium/2019-09-27-figures/multi_mds/miniMDS-master/data_tools.pyt   make_compatibleA  s,    3$	c         C   s¬   |  d j  } |  d j } x8 |  D]0 } | j  | k s< t ‚ | j | k s! t ‚ q! Wt g  |  D] } | j ^ q_ ƒ } t g  |  D] } | j ^ q ƒ } t | | | | ƒ S(   s(   Enforce that chromosomes have same rangei    (   R   R   RQ   R6   R   Rl   R   R   (   t   chromst   consensus_rest   consensus_nameR   R   R   (    (    sm   /oak/stanford/groups/akundaje/marinovg/Symbiodinium/2019-09-27-figures/multi_mds/miniMDS-master/data_tools.pyt   consensus_chrom^  s    ""c   
      C   s`  i  } xZ t  |  ƒ D]L \ } } x= | j ƒ  D]/ } | | k rQ | | c d 7<q, d | | <q, Wq Wg  } t |  ƒ } x4 | j ƒ  D]& } | | | k r‚ | j | ƒ q‚ q‚ Wt j | ƒ } xž |  D]– } t j | j j	 ƒ  d t
 ƒ} xf t  | ƒ D]X \ } } | j j | ƒ } | j | | j j }	 t |	 | j | | ƒ | | | j <qó W| | _ qÂ Wd S(   sN   Enforce that points be shared by all structures. Don't change ChromParameters.i   R4   N(   R@   R1   R   R•   R9   R(   R–   R5   R   R   R7   R   R   R   R    RX   (
   R   R—   RF   R8   R˜   R™   RD   R›   R   R    (    (    sm   /oak/stanford/groups/akundaje/marinovg/Symbiodinium/2019-09-27-figures/multi_mds/miniMDS-master/data_tools.pyt   make_points_compatiblei  s&    'c         C   s¾   t  | | ƒ } t |  ƒ t | ƒ } xc t | j ƒ D]R \ } } | d k r5 | j \ } } }	 | | | | |	 | f | j | _ q5 q5 Wt | |  ƒ \ }
 } | | :} | j |
 | ƒ d  S(   Ni    (   RŽ   RR   R@   R   R    t   getTransformationRG   (   t   trueLowt   highSubstructuret	   res_ratiot   inferredLowt   scaling_factorRF   R!   RT   RU   RV   RA   RB   (    (    sm   /oak/stanford/groups/akundaje/marinovg/Symbiodinium/2019-09-27-figures/multi_mds/miniMDS-master/data_tools.pyRG   ƒ  s    ,
i   gš™™™™™©?c         C   sX  t  |  | | ƒ } t | j ƒ  ƒ t | ƒ k s6 t ‚ t | ƒ } t j | ƒ } xš t t | ƒ ƒ D]† } x} t | ƒ D]o }	 d | | | |	 f | | | |	 d }
 |
 d k rw |
 d | } | | | |	 f <| | |	 | f <qw qw Wqd Wt j g  | D] } t	 | ƒ ^ qû ƒ } t t j
 | d k ƒ d ƒ d k sAt ‚ | t j | ƒ } | S(   Ni   i    g      ð¿(   R   R   R$   RQ   t   get_expectedR(   t
   zeros_likeR   R)   Ru   R,   R‚   (   R]   R8   R^   t   alphat   weightt
   contactMatt   expectedt   distMatRF   t   jt	   correctedt   distR}   R~   (    (    sm   /oak/stanford/groups/akundaje/marinovg/Symbiodinium/2019-09-27-figures/multi_mds/miniMDS-master/data_tools.pyt   distmat”  s    $,(+c         C   s6   t  |  ƒ $ } x t | ƒ D] \ } } q WWd  QX| S(   N(   RJ   R@   (   R]   t   in_fileRF   Rc   (    (    sm   /oak/stanford/groups/akundaje/marinovg/Symbiodinium/2019-09-27-figures/multi_mds/miniMDS-master/data_tools.pyt   size_from_bedª  s    
(   Ri   t   numpyR(   t   toolsR    t   linear_algebrat   tadR7   R   R   RX   R   Rh   RY   R   RŽ   R”   Rž   R¢   R£   RG   R´   R¶   (    (    (    sm   /oak/stanford/groups/akundaje/marinovg/Symbiodinium/2019-09-27-figures/multi_mds/miniMDS-master/data_tools.pyt   <module>   s$   

!t
%#						