3
s]9                 @   s   d dl Z d dlZd dlmZ d dlT d dlT G dd deZG dd deZ	G dd	 d	eZ
d"d
dZd#ddZd$ddZdd Zdd Zdd Zdd Zdd Zdd Zd%ddZd d! ZdS )&    N)Tracker)*c               @   s8   e Zd ZdZdd Zdd Zdd Zdd	 Zd
d ZdS )ChromParametersz9Basic information on chromosome, inferred from input filec             C   s@   |d k	rt || _|d k	r$t || _|d k	r6t || _|| _d S )N)intminPosmaxPosresname)selfr   r   r   r	    r   m/oak/stanford/groups/akundaje/marinovg/Symbiodinium/2019-09-27-figures/multi_mds/miniMDS-master/data_tools.py__init__   s    


zChromParameters.__init__c             C   s   t | j| j | j d S )zNumber of possible loci   )r   r   r   r   )r
   r   r   r   	getLength   s    zChromParameters.getLengthc             C   s6   || j k s|| j| j krdS t|| j  | j S dS )z`Converts genomic coordinate into absolute index. Absolute indexing includes empty (zero) points.N)r   r   r   r   )r
   genCoordr   r   r   getAbsoluteIndex   s    z ChromParameters.getAbsoluteIndexc             C   s   | j | j|  S )z/Converts absolute index into genomic coordinate)r   r   )r
   	abs_indexr   r   r   getGenCoord   s    zChromParameters.getGenCoordc             C   s6   | j | }| j| | }| j| | }t|||| jS )z*Creates low-res version of this chromosome)r   r   r   r   r	   )r
   resRatioZlowResZ	lowMinPosZ	lowMaxPosr   r   r   	reduceRes#   s    
zChromParameters.reduceResN)	__name__
__module____qualname____doc__r   r   r   r   r   r   r   r   r   r   	   s   	r   c               @   s   e Zd ZdZdd Zdd Zdd Zdd	 Zd
d Zdd Z	dd Z
dd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zd S )!	StructurezBIntrachromosomal structure of points or substructures in 3-D spacec             C   sH   || _ t|dks|d kr"g | _n
| j| || _|d k	rDt|| _d S )Nr   )pointslen
structuressetstructureschromr   offset)r
   r   r   r   r    r   r   r   r   ,   s    
zStructure.__init__c             C   s   dd | j  D S )Nc             S   s   g | ]
}|j qS r   )pos).0pointr   r   r   
<listcomp>7   s    z'Structure.getCoords.<locals>.<listcomp>)	getPoints)r
   r   r   r   	getCoords6   s    zStructure.getCoordsc             C   s2   x,t || j D ]\}}|| j|| j  _qW d S )N)zipnonzero_abs_indicesr   r    r!   )r
   coordsZcoordr   r   r   r   	setCoords9   s    zStructure.setCoordsc             C   s   t jdd | j D S )z)Absolute indices for all non-zero points.c             S   s   g | ]
}|j qS r   )absolute_index)r"   r#   r   r   r   r$   ?   s    z1Structure.nonzero_abs_indices.<locals>.<listcomp>)nparrayr%   )r
   r   r   r   r(   =   s    zStructure.nonzero_abs_indicesc             C   s   | j  t| jj| jj  S )zVNonzero bin numbers with indexing relative to chromosome position 0 (not chrom.minPos))r(   r   r   r   r   )r
   r   r   r   nonzero_bins_whole_chromA   s    z"Structure.nonzero_bins_whole_chromc             C   s   | j tj| j dkd  S )zAll non-zero pointsr   )r   r,   where)r
   r   r   r   r%   E   s    zStructure.getPointsc             C   sj   | j ||d  }| jj|| j_| jj|| j_x$tj|dkd D ]}||| _qFW || _ | j  dS )zLSet structure's points to only include start_abs_index through end_abs_indexr   r   N)	r   r   r   r   r   r,   r/   r+   set_rel_indices)r
   Zstart_abs_indexZend_abs_indexr   r   r   r   r   subsamplePointsI   s    zStructure.subsamplePointsc                s    fdd j  D S )z)Non-zero genomic coordinates of structurec                s   g | ]} j j|qS r   )r   r   )r"   r   )r
   r   r   r$   V   s    z*Structure.getGenCoords.<locals>.<listcomp>)r(   )r
   r   )r
   r   getGenCoordsT   s    zStructure.getGenCoordsc             C   s^   | j j|}|dkrdS || j8 }|dkrV|t| jk rV| j| }|dkrNdS |jS ndS dS )z0Converts genomic coordinate into relative index.Nr   )r   r   r    r   r   relative_index)r
   r   r   r#   r   r   r   get_rel_indexX   s    

zStructure.get_rel_indexc             C   sb   || _ tjtdd |D d tjd| _x2| j D ](}x"|jD ]}|dkr>|| j|j< q>W q2W d S )Nc             S   s   g | ]}t |j qS r   )maxr(   )r"   	structurer   r   r   r$   j   s    z+Structure.setstructures.<locals>.<listcomp>r   )dtyper   )r   r,   zerosr5   objectr   r+   )r
   r   r6   r#   r   r   r   r   h   s    $zStructure.setstructuresc             C   s    t |g | j|}| jj| dS )z&Creates substructure containing pointsN)r   r   r   append)r
   r   r    Zsubstructurer   r   r   createSubstructurep   s    zStructure.createSubstructurec             C   s   |dkrt jt jd}|dkr2t jt jdj}t j| j }t|}t j||j t j|d|f j}x.t	| j
 D ]\}}|| | j|| j  _qxW dS )zRotates by r; translates by tN   r   )r,   matidentityr8   Tr&   r   r-   tile	enumerater(   r   r    r!   )r
   rtanZa_transformedir   r   r   r   	transformv   s    "zStructure.transformc             C   s   t |d}|j| jjd  |jt| jjd  |jt| jjd  | j}x~| jD ]t}|dkr|jdj	t|dddfd  n>|jdj	t|t|j
d t|j
d t|j
d fd  |d7 }qXW W d Q R X |j  d S )Nw
r   	nanr      )openwriter   r	   strr   r   r    r   joinr!   close)r
   Zoutpathoutr   r#   r   r   r   rN      s    ">zStructure.writec             C   s>   x8t | j D ](\}}|| jks$t|| j|| j  _qW dS )z;Relative indexing is index relative to non-zero points onlyN)rA   r(   r    AssertionErrorr   r3   )r
   rF   r   r   r   r   r0      s    zStructure.set_rel_indicesc             C   sV   t | }xHt| jD ]:\}}|dkr|j\}}}|| || || f| j| _qW dS )z,Rescale radius of gyration of structure to 1r   N)radius_of_gyrationrA   r   r!   )r
   ZrgrF   r#   xyzr   r   r   rescale   s
    zStructure.rescaleN)r   r   r   r   r   r&   r*   r(   r.   r%   r1   r2   r4   r   r;   rG   rN   r0   rX   r   r   r   r   r   *   s    
r   c               @   s   e Zd ZdZdd ZdS )PointzPoint in 3-D spacec             C   s4   || _ || _|d k	rt|| _|d k	r0t|| _d S )N)r!   r   r   r+   r3   )r
   r!   r   r+   r3   r   r   r   r      s    
zPoint.__init__N)r   r   r   r   r   r   r   r   r   rY      s   rY   c             C   sd  |dkrt | }|dkr|j}|dkr,|j}tg g ||}tjt|| |j d td|_	|dk	rnt
d|}t| }x|D ]}	|	j j }	t|	d }
t|	d }|
|ko|
|ko||ko||kr2|jj|
}|jj|}||kr2td|j|d|j	t|
| |j < td|j|d|j	t|| |j < |dk	r~|j  q~W |j  W dQ R X |j  |S )	z5Initializes structure from intrachromosomal BED file.Nr   )r7   zIdentifying loci   r   )r   r   r   )r   r   r   )chromFromBedr   r   r   r,   r8   r   r   r9   r   r   rM   stripsplitr   r   rY   	incrementrQ   r0   )pathsizer   startendr    r6   trackerZlistFilelinepos1pos2Z
abs_index1Z
abs_index2r   r   r   structureFromBed   s4    "


"
$$rg   c             C   s"  t jj}d}tdj|  t| }xt|D ]\}}|j j }|dksR|dkrt	|d }t	|d }	|dkrt
||	f}
|
|k r|
}|dkrt||	f}||kr|}|dkr.|d }t	|d | }q.W |j  W dQ R X t	tjt|| | }t	tjt|| | }t||||S )z?Initialize ChromParams from intrachromosomal file in BED formatr   zScanning {}Nr   rZ   rL   )sys
float_infor5   printformatrM   rA   r\   r]   r   minrQ   r,   floorfloatceilr   )r_   r   r   Zoverall_minPosZoverall_maxPosinfilerF   rd   re   rf   Zcurr_minPosZcurr_maxPosr	   r   r   r   r   r[      s0    
r[   c             C   sp  |dkrt | |}|j }t|}tj||f}|dk	rBtd|}t| }x|D ]}|j j }t	|d }	t	|d }
|j
|	}|j
|
}|dk	r|dk	rt|d }|||f  |7  < |||f  |7  < |dk	rR|j  qRW |j  W dQ R X tjdd |D }ttj|dkd dkrLttj|j tj|dkd   ttj|dkd dkslt|S )	z=Converts BED file to matrix. Only includes loci in structure.NzFilling matrixr   rZ      c             S   s   g | ]}t |qS r   )sum)r"   rowr   r   r   r$     s    zmatFromBed.<locals>.<listcomp>r   )rg   r(   r   r,   r8   r   rM   r\   r]   r   r4   rn   r^   rQ   r-   r/   rj   r2   rS   )r_   r`   r6   Zabs_indicesZ	numpointsr=   rc   rp   rd   Zloc1Zloc2Zindex1index2valrowsumsr   r   r   
matFromBed   s4    





$ rw   c             C   s   | j j|}tt| j| d }ttj|tjdg || j	| }dd t
|D }x:| j D ].}g }|j| j	 }t|| }	||	 j| q^W |j	}
x`t|D ]T\}}t|dkrtjtjdd |D dd}t||||j	 |
|j|< |
d7 }
qW |S )zReduces resolution of structurer   )r7   c             S   s   g | ]}g qS r   r   )r"   rF   r   r   r   r$     s    zhighToLow.<locals>.<listcomp>r   c             S   s   g | ]
}|j qS r   )r!   )r"   r#   r   r   r   r$     s    )axis)r   r   r   r   r   r   r,   r8   r9   r    ranger%   r+   r:   rA   meanr-   rY   )highstructurer   ZlowChromZlow_nlowstructureZallPointsToMergeZ	highPointZpointsToMergeZhigh_abs_indexZlow_abs_indexindexrF   Z	meanCoordr   r   r   	highToLow  s      r~   c             C   s,  d}t | }|j j }t|j j }t|j j }t|d ||}tg g |d}d}x|r|j j j }	t|	dkrd}q\t|	d }
|	d dkrd}n@t|	d }t|	d }t|	d }t	|||f||
|}|d7 }|j
j| q\W |j  W d Q R X tj|j
|_
|jj|jj|
  |j_|S )NTr   Fr   rK   rL   r<   )rM   readliner\   r   r   r   r]   r   rn   rY   r   r:   rQ   r,   r-   r   r   r   r   )r_   ZhasMorerp   r	   r   r   r   r6   r}   rd   numr#   rU   rV   rW   r   r   r   structure_from_file$  s2    
r   c             C   s8  i }xHt | D ]<\}}x2|j D ]&}||kr>||  d7  < q d||< q W qW g }t| }x&|j D ]}|| |krd|j| qdW tj|}x| D ]}t|d |d |jj	 |jj	|jj
}tj|j td}xTt |D ]H\}}|jj|}	|j|}
|j|	|j  j}t|||
|||
|j < qW ||_||_qW dS )z/Enforce that points be shared by all structuresr   r   )r7   N)rA   r2   r   keysr:   r,   sortr   r   r   r	   r8   r   r9   r   r   r    r!   rY   )r   gen_coord_dictrF   r6   	gen_coord	consensusrE   Z	new_chrom
new_pointsZold_abs_indexZnew_abs_indexr!   r   r   r   make_compatibleA  s,    

&
r   c             C   sp   | d j }| d j}x(| D ] }|j |ks,t|j|kstqW tdd | D }tdd | D }t||||S )z(Enforce that chromosomes have same ranger   c             S   s   g | ]
}|j qS r   )r   )r"   r   r   r   r   r$   e  s    z#consensus_chrom.<locals>.<listcomp>c             S   s   g | ]
}|j qS r   )r   )r"   r   r   r   r   r$   f  s    )r   r	   rS   r5   rl   r   )ZchromsZconsensus_resZconsensus_namer   r   r   r   r   r   consensus_chrom^  s    


r   c       
      C   s  i }xHt | D ]<\}}x2|j D ]&}||kr>||  d7  < q d||< q W qW g }t| }x&|j D ]}|| |krd|j| qdW tj|}xt| D ]l}tj|jj	 t
d}xLt |D ]@\}}|jj|}|j||j  j}	t|	|j|||||j < qW ||_qW dS )zNEnforce that points be shared by all structures. Don't change ChromParameters.r   )r7   N)rA   r2   r   r   r:   r,   r   r8   r   r   r9   r   r   r    r!   rY   )
r   r   rF   r6   r   r   rE   r   r   r!   r   r   r   make_points_compatiblei  s&    

r   c             C   s   t ||}t| t| }xHt|jD ]:\}}|dkr&|j\}}}	|| || |	| f|j| _q&W t|| \}
}|| }|j|
| d S )Nr   )r~   rT   rA   r   r!   getTransformationrG   )trueLowhighSubstructure	res_ratioinferredLowscaling_factorrF   r#   rU   rV   rW   rB   rC   r   r   r   rG     s    
"rG   rZ   皙?c             C   s   t | ||}t|j t|ks$tt|}tj|}xztt|D ]j}xdt|D ]X}	d| |||	f  ||||	 d    }
|
dkrR|
d|  }||||	f< |||	|f< qRW qDW tjdd |D }ttj	|dkd dkst|tj
| }|S )Nr   r   g      ?c             S   s   g | ]}t |qS r   )rr   )r"   rs   r   r   r   r$     s    zdistmat.<locals>.<listcomp>g      )rw   r   r(   rS   get_expectedr,   
zeros_likery   r-   r/   rz   )r_   r6   r`   alphaweight
contactMatexpecteddistMatrF   jZ	correcteddistrv   r   r   r   distmat  s    
(r   c          
   C   s.   t | }xt|D ]\}}qW W d Q R X |S )N)rM   rA   )r_   Zin_filerF   rd   r   r   r   size_from_bed  s    
r   )NNNNr   )NN)NN)NrZ   r   )rh   numpyr,   toolsr   linear_algebratadr9   r   r   rY   rg   r[   rw   r~   r   r   r   r   rG   r   r   r   r   r   r   <module>   s$   !t

%

#
