o
    Uݢg                     @  s   d Z ddlmZ ddlmZ ddlZddlm  m	Z
 ddlmZ ddlm  mZ ddlm  mZ ddlm  mZ G dd deZdZdd
dZejdfdddZdd ZdS )zK-means clustering.    )annotations)
NamedTupleNc                   @  s   e Zd ZU ded< ded< dS )KMEANSintclustersfloatcluster_scoreN)__name__
__module____qualname____annotations__ r   r   i/oak/stanford/groups/akundaje/marinovg/programs/cellranger-9.0.1/lib/python/cellranger/analysis/kmeans.pyr      s   
 r   gMbP?returnr   c                 C  s  | j d }|j}|j}|j}tt|}t|t	|tk < t
|}t
|}t|D ]+}	||	 }
t| |	ddf ||
ddf   }||
  |7  < ||
  d7  < q-d||dk< t|| }||ddtjf  | }t|d tj|dd}| | }|S )zCompute Davies-Bouldin index, a measure of clustering quality.

    Faster and possibly more reliable than silhouette score.
    r   N   g        )axis)shape
n_clustersZcluster_centers_Zlabels_sp_distZ
squareformZpdistMIN_CENTROID_DISTnpabszerosrangesquaresumsqrtnewaxisfill_diagonalmax)matrixkmeansnkZcenterslabelsZcentroid_distswsscountsilabelZsqdistZscatterZmixitudeZworst_case_mixitudeZdb_scorer   r   r   compute_db_index   s(   


*r)   r   r   random_state
int | Nonec              	   C  sf   |du rt j}tj||d}|| d }t| |}t|}t||}tj	|||||t
|dS )z_Run k-means clustering on the points in transformed_matrix.

    Find n_clusters clusters.
    N)r   r*   r   )r   num_clustersr   clustering_typeglobal_sort_keydescription)analysis_constantsRANDOM_STATE
sk_clusterZKMeansZfit_predictr)   cr_clusteringrelabel_by_sizeformat_clustering_keycreate_clusteringhumanify_clustering_key)Ztransformed_matrixr   r-   r*   r!   r   r   clustering_keyr   r   r   
run_kmeansC   s   


r9   c                 C  sH   t ||}| | jtj}t| ||| t | tjtj	t j
| dS )zSave k-means clustering results to HDF5 file object f.

    Args:
        f: The hdf5 object to save to.
        kmeans: Cluster centers.
        n_clusters: The number of clusters.
        clustering_type: whether GEX-based or Antibody-bases K-means
    N)r3   r5   create_grouprootr0   ANALYSIS_H5_CLUSTERING_GROUPanalysis_iosave_h5create_legacy_kmeans_nodesANALYSIS_H5_KMEANS_GROUP
CLUSTERING)fr   r!   r-   r8   groupr   r   r   save_kmeans_h5b   s   	rD   )r   r   )r   r   r*   r+   )__doc__
__future__r   typingr   numpyr   Zscipy.spatial.distanceZspatialZdistancer   Zsklearn.clusterZclusterr2   cellranger.analysis.clusteringanalysis
clusteringr3   cellranger.analysis.constants	constantsr0   cellranger.analysis.ioior=   r   r   r)   CLUSTER_TYPE_KMEANSr9   rD   r   r   r   r   <module>   s    
)