o
    Uݢg                     @  s   d Z ddlmZ ddlmZ ddlZddlZddl	m
  mZ er/ddlmZmZ ddlmZ dddZdddZdddZ	ddddZdS )zGFunctions to compute various statistical operations on sparse matrices.    )annotations)TYPE_CHECKINGN)
csc_matrix
csr_matrix)CountMatrixmatrixr   returncsc_matrix | csr_matrixc                 C  sB   |   }tdt|}|| }| j tj}t	|| |S )Ng      ?)
Zget_counts_per_bcmaxnpmedianmcopyastypefloat64sparsefuncsinplace_column_scale)r   Zcounts_per_bcZmedian_counts_per_bcZscaling_factorsr    r   h/oak/stanford/groups/akundaje/marinovg/programs/cellranger-9.0.1/lib/python/cellranger/analysis/stats.pynormalize_by_umi   s   r   c              	   C  s   |   }t| jd td|  }| j tj}t	|| t
tt|jdddk}|D ]}d|d|f< q5|S )zPerform feature normalization.   r   axisgV瞯<)Zget_numbcs_per_featurer   logbcs_dimr   r   r   r   r   inplace_row_scalewheresqueezeasarraysum)r   Znumbcs_per_featureZscaling_factors_rowr   ZzeroedZbc_ixr   r   r   normalize_by_idf!   s   "r    tuple[np.ndarray, np.ndarray]c                 C  s*   t j| dd\}}t|gt|gfS )zICalculate mean and variance of each column, in a sparsity-preserving way.r   r   )r   mean_variance_axisr   array)r   muvarr   r   r   summarize_columns:   s   r&      mat_mean
np.ndarraymat_varnbinsintc              	   C  s   ||  t |  }t | t ddd| }t ||  }t |}t|dkr+|S tj	j
| |d|d\}}}||d  }t|| }	tj	j
| |	d|d\}
}}|
|d  }|| | }|S )zCalculates the normalized dispersion.

    The dispersion is calculated for each feature
    and then normalized to see how its dispersion compares to samples that had a
    similar mean value.
    r   d   r   r   )	statisticbins)r   square
percentilearangeappendr
   uniquelenscipystatsbinned_statisticabs)r(   r*   r+   Zmat_disp	quantilesZ	disp_meds_Z	disp_binsZdisp_meds_arrZdisp_abs_devZ	disp_madsZdisp_mads_arrZ	disp_normr   r   r   get_normalized_dispersion@   s"   
r<   )r   r   r   r	   )r   r	   r   r!   )r'   )r(   r)   r*   r)   r+   r,   r   r)   )__doc__
__future__r   typingr   numpyr   scipy.statsr6   Zsklearn.utils.sparsefuncsutilsr   scipy.sparser   r   cellranger.matrixr   r   r    r&   r<   r   r   r   r   <module>   s   


