o
    Uݢgr                      @  sX  d dl mZ d dlZd dlZd dlmZmZ d dlmZm	Z	m
Z
mZ d dlZd dlm  mZ er?d dlmZ G dd deZdZd	Zd
ZdZed e Zed e Zed d Zed d Ze dZdZdZG dd de
Z dGddZ!dGddZ"dHddZ#dHddZ$dHd d!Z%dId%d&Z&dJd+d,Z'dKd1d2Z(dLd6d7Z)d8d9 Z*d:d; Z+dMdEdFZ,dS )N    )annotationsN)IterableSequence)TYPE_CHECKINGClassVar
NamedTupleProtocol)tablesc                   @  s   e Zd ZU ded< dS )NamedTupleProtocolzClassVar[Sequence[str]]_fieldsN__name__
__module____qualname____annotations__ r   r   m/oak/stanford/groups/akundaje/marinovg/programs/cellranger-9.0.1/lib/python/cellranger/analysis/clustering.pyr
      s   
 r
   Zgene_expressionZantibody_captureZpeaksZkmeans_Z
graphclustZ_graphclustZkmedoidsZcelltypec                   @  s>   e Zd ZU ded< ded< ded< ded< ded	< ded
< dS )
CLUSTERINGz#np.ndarray[int, np.dtype[np.int64]]clustersznp.int64num_clustersz
np.float64cluster_scorez	np.bytes_clustering_typeglobal_sort_keydescriptionNr   r   r   r   r   r   $   s   
 r   cluster_typestrcluster_paramintc                 C  s   | t ksJ d| S )Nz_%d)CLUSTER_TYPE_KMEANSr   r   r   r   r   format_legacy_clustering_key-   s   r!   c                 C  s~   | t krt dt  d| dS | tkrdtt |f S | tkr%dtt |f S | tkr/dt|f S | ttt	fv r8| S t
d|  )zJGenerate a machine-readable string that describes a particular clustering.r   Z	_clustersz%s_%s_%d_clustersz%s_%d_clusterszUnsupported cluster type: )r   
GEX_PREFIXCLUSTER_TYPE_ANTIBODY_KMEANS	AB_PREFIXCLUSTER_TYPE_ATACATAC_PREFIXCLUSTER_TYPE_KMEDOIDSCLUSTER_TYPE_GRAPHCLUSTCLUSTER_TYPE_ATAC_GRAPHCLUSTCLUSTER_TYPE_CELLTYPES
ValueErrorr    r   r   r   format_clustering_key2   s   r,   clustering_keyc                 C  s(   t tj| }ddd |D }t|S )a  Given clustering keys, output the integer number of clusters.

    Keys are of a form like _antibody_capture_kmeans_10_clusters or
    _gene_expression_kmeans_3_clusters.

    >>> _parse_number_of_clusters("_antibody_capture_kmeans_10_clusters")
    10
    >>> _parse_number_of_clusters("_gene_expression_kmeans_3_clusters")
    3
    >>> _parse_number_of_clusters("_peaks_kmeans_5_clusters")
    5
     c                 S  s   g | ]}|qS r   r   ).0sr   r   r   
<listcomp>T   s    z-_parse_number_of_clusters.<locals>.<listcomp>)filterr   isdigitjoinr   )r-   
n_clustersr   r   r   _parse_number_of_clustersF   s   r6   c                 C  s   | t ttfv r| dfS | trt| }t|fS | tr%t| }t|fS | t	r2t| }t
|fS | trE| d\}}}tt|fS | tkrM| dfS td|  )z,Parse the output of format_clustering_key().r   r   z0Unsupported clustering type for clustering key: )r(   r)    CLUSTER_TYPE_ANTIBODY_GRAPHCLUST
startswithr"   r6   r   r$   r#   r&   r%   r'   splitr   r*   r+   )r-   r5   r   r   r   r   parse_clustering_keyX   s*   



r:   c                 C  s   t | \}}|tkrdS |tkrdS |tkrd| S |tkr"d| S |tkr*d| S |tkr2d| S |tkr8dS td| d	|  )
z)Make a cluster_key string human-readable.zGene Expression Graph-basedzPeaks Graph-basedzGene Expression K-means (K=%d)zAntibody Capture K-means (K=%d)zPeaks K-means (K=%d)zK-medoids (K=%d)Z	CelltypeszUnsupported clustering type z for clustering key: )	r:   r(   r)   r   r#   r%   r'   r*   r+   )r-   r   r   r   r   r   humanify_clustering_keyr   s$   r;   labels&np.ndarray[int, np.dtype[np._IntType]]returnc                 C  s$   t t t |  }d||   S )zRelabel clusters so they are sorted by number of members, descending.

    Args:
        labels (np.array(int)): 1-based cluster labels
       )npargsortbincount)r<   orderr   r   r   relabel_by_size   s   rD   base_dirIterable[int]barcodesIterable[bytes]c                 C  sF   t j| |}t j|dd t j|d}ddg}t|||| d S )NT)exist_okzclusters.csvZBarcodeZCluster)ospathr4   makedirsanalysis_iosave_matrix_csv)rE   r-   r<   rG   out_dirZclusters_fnheaderr   r   r   save_clustering_csv   s
   rQ   ftables.Filelegacy_group_name
namedtuplec                 C  s   |  | j|}t|\}}|tkrdS t||}|  ||}	|jD ]$}
d| d| d|
 }|| v r;| j|	|
|d q!tj	d| d q!dS )a  Soft-link a legacy-structured (CR 1.2) kmeans subgroup (dest) to a new-style (CR 1.3) subgroup (src).

    The old-style was a group called 'kmeans' with subgroups named _K.
    The new-style is a group called 'clustering' with subgroups named kmeans_K_clusters, etc.
    N/z/_)targetz'Skipped soft-link of legacy dataset to z; node doesn't exist
)
create_grouprootr:   r   r!   r   Zcreate_soft_linksysstderrwrite)rR   Znew_group_namerT   rU   r-   groupr   r   Z
legacy_keyZsubgroupfieldrW   r   r   r   create_legacy_kmeans_nodes   s   


r_   
clustering
bc_indicesSequence[int]c                 C  s$   t | j| | j| j| j| j| jdS )znSelect a subset of barcodes from a clustering object.

    Args:
        clustering (CLUSTERS namedtuple)
    r   r   r   r   r   r   )r   r   r   r   r   r   r   )r`   ra   r   r   r   subselect_barcodes   s   rd   c                 C  s   t | jdd S )z>Returns a numpy array containing cell-counts for each cluster.r?   N)r@   rB   r   )r`   r   r   r   get_cluster_sizes   s   re   c                 C  s   t | dd dS )Nc                 S  s   | j S )N)r   )xr   r   r   <lambda>   s    z"sort_clusterings.<locals>.<lambda>)key)sorted)Zclusteringsr   r   r   sort_clusterings   s   rj   r   3np.ndarray[int, np.dtype[np.int64]] | Sequence[int]r   r   floatr   str | bytesr   r   c              	   C  s>   t tj| tjdt|t|t|t|t|dS )z]Create a clustering namedtuple.

    Use numpy arrays/scalars to ensure h5 compatibility
    )dtyperc   )r   r@   asarrayint64float64bytes_rc   r   r   r   create_clustering   s   rs   )r   r   r   r   )r-   r   )r<   r=   r>   r=   )rE   r   r-   r   r<   rF   rG   rH   )rR   rS   rT   r   rU   r
   )r`   r   ra   rb   )r   rk   r   r   r   rl   r   rm   r   rl   r   rm   )-
__future__r   rJ   rZ   collections.abcr   r   typingr   r   r   r   numpyr@   cellranger.analysis.ioanalysisiorM   Zcellranger.wrapped_tablesr	   r
   r"   r$   r&   r   r#   r%   r(   r)   r7   r'   r*   r   r!   r,   r6   r:   r;   rD   rQ   r_   rd   re   rj   rs   r   r   r   r   <module>   sD   

	







