o
    Uݢg                  	   @  sd  d dl mZ d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	m
Z
 d dlmZ d dlZd dlm  mZ d dlmZ d dlmZ d dlm  mZ d dlm  mZ  d dl!mZ" d dl#m$  mZ% d dl&m$  mZ' d dl(m)  m  m*Z+ d dl,m)  m  m-Z. d dl/m)  m  m$Z0 d dl1m2Z3 d dl4m5Z5 d dl6m7Z7m8Z8 d dl9m:Z: d dl;m<Z<m=Z= d d	l>m?Z?m@Z@mAZA d d
lBmCZC d dlDmEZEmFZF dd ZGdzddZHdzddZIdd ZJdd ZKdd ZLdd ZM	dzd{dd ZNd!d" ZOd#d$ d%d$ g i fd&d'ZPd(d) ZQd*d+ ZRd|d-d.ZSd/d0 ZT	dzd}d3d4ZUd5d6 ZVd7d8 ZWd9d: ZXe.jYd;d<d=ZZd>d? Z[d@dA Z\d~dDdEZ]dFdG Z^					ddHdIZ_dJdK Z`dLdM Za	ddNdOZbdPdQ Zcg i fdRdSZddTdU ZedVdddWdWddWdWdXdYdZZfi dfd[d\Zgd]d^ Zhd_d` Zidadb Zjdcdd Zkdedf Zldgdh Zmdidj Zndkdl ZoddmdnZpdodp Zqdqdr ZrddxdyZsdS )    )annotationsN)Sequence)Any)MultiGenomeAnalysis)
ProjectionSingleGenomeAnalysis)get_ref_name_from_genomes)
SampleData'generate_counter_barcode_rank_plot_data)_make_rc_vectorsmake_color_mapmake_histogram_plot)get_projection_key)CountSamplePropertiesSamplePropertiesc                 C  s&   d|v r||  S | rd | |gS |S )N%s_)join)prefixname r   h/oak/stanford/groups/akundaje/marinovg/programs/cellranger-9.0.1/lib/python/cellranger/webshim/common.py
add_prefix)   s   r   c                 C  sj   |d ur|d ur||}|d u r|t jkrd}t|dks!d| v r&t|| } | dd} tdd| } | S )N    r   r    \s+)rna_libraryMULTI_REFS_PREFIXlenr   replaceresub)display_namer   prefixesprefix_format_funcr   r   r   format_name0   s   
r&   c                 C  sz   |d ur|d ur||}|d u r|t jkrd}d| v r4tdd| } t|dkr.|r.t|nd}| | } tdd| } | S )Nr   r   z%([^s])z%%\1r   r   r   )r   r   r!   r"   r   str)descriptionr   r$   r%   sr   r   r   format_descriptionD   s   r*   c                 C  sp   |dkrt | S tt| s| nd} |dkrt| dS |dkr%| dS |d dkr1|t|  S td| )	Nstringr   percentz.1%integerz,.0f%zInvalid format type: )r'   mathisnanfloat	Exception)valueformat_typer   r   r   format_valueZ   s   r5   c                 C  s@   | dd}| |d } t|dks| d u r| S t| |d S )N/r   r   )splitgetr   lookup_name)datar   Z
name_partsr   r   r   r9   i   s
   r9   c           	      C  sJ   d t| |d }t|i i }|r#|d }|d }||||d||< |S )Nzfloat("{}") {}testtitlemessage)r<   r=   r3   level)formatr'   eval)	r3   formatted_valuer>   
alarm_dictalarmsr;   Zraisedr<   r=   r   r   r   	add_alarmr   s   
rD   c                 C  s   |d }t | |}|d u rd S t||d }|tj}|d ur+t||tj||r+d S |tj}|d ur@t||tj|| d S d S )Nr   r?   )r9   r5   r8   shared_constantsZALARM_ERRORrD   Z
ALARM_WARN)r:   Zalarms_dictrC   	full_namer3   rA   Z
error_dictZ	warn_dictr   r   r   
add_alarms   s   

rG   r:   dictr   r'   metric_dictc              	   C  s   |d }| d|}| d|}	|sdg}g }
|D ]I}t||}t| |}|durb| d}t||||d}t|	|||d}t||}|||}||||d|||d|||d	g |
| qdS )
zAdd rows to a table.

    Args:
        data: Dict containing summary metrics
        name: Metric name (key in data dict)
        metric_dict: Metric display definition dict
    r?   r#   r(   NZprefix_format)r%   vfr)   )rK   r)   )rK   r)   r)r8   r   r9   r&   r*   r5   append)r:   r   rI   rows
style_functarget_funcr$   r4   r#   r(   valuesr   rF   r3   r%   Zformatted_nameZformatted_descriptionrA   styler   r   r   add_table_rows   sH   






rT   c                 C  s   d|v rt |d | | S dS )NZhiddenF)r@   )sample_propertiesrI   r   r   r   _is_metric_hidden   s   rV   c                  G     dS Nr   r   argsr   r   r   <lambda>       r[   c                  G  rW   rX   r   rY   r   r   r   r[      r\   c                   s^  g }|D ]}	|	d }
|	d }g }|D ]W  d }  d}t|  r#q|d ur_|  |}|s3| |}d v rR|d urE fdd|D }ntjd| d| d	 t|j| ||||d
 qt|j| ||| qt|dkry||
||d t|dkr|
dkst|dkr|
dkr||
||d qt	
 }|D ]	}t|j|| q|t| fS )Nr   metricsr   prefix_filterc                   s   g | ]
}| d  v r|qS )r^   r   .0xrI   r   r   
<listcomp>   s    z build_tables.<locals>.<listcomp>z6Warning: no metric prefix values to filter for metric z. The prefix name z is probably wrong.
)r$   r   )r   rO   ZheadersMappingCells)r8   rV   sysstderrwriterT   summaryr   rN   collectionsOrderedDictrG   listrR   )rU   Ztable_dictsZalarm_table_dictssample_datarP   rQ   Zmetric_headersall_prefixestablesZ
table_dictZ
table_nameZtable_metricsrO   r   r   r$   rC   rB   r   rb   r   build_tables   sh   





		rp   c                 C  s   t | ddd } g }d}t | D ]E\\}}|dt| d fv r,||||g n*||krV|dus6J |d d }||d krM||d ||g ||||g |}q|S )a  Given an array of values, sort them in descending order.

    Args:
        array: a numpy array
        ntype: a datatype to return the array values as

    Returns:
        the x,y coordinates for a line that could represent the sorted order by
        collapsing identical Y values into a single range of ranks.

    Example:
        .. code-block:: python

            array = np.array([1,1,1,2,3,3,3,3,3,4,5,9,20])
            convert_numpy_array_to_line_chart(array, float)
            # Result
            [[0, 20.0],
            [1, 9.0],
            [2, 5.0],
            [3, 4.0],
            [4, 3.0],
            [8, 3.0],
            [9, 2.0],
            [10, 1.0],
            [12, 1.0]]
    Nr   r   )npsortndenumerater   rN   )arrayntyperO   Zprevious_countindexcountZprevious_indexr   r   r   !convert_numpy_array_to_line_chart&  s   ry   c           
      C  s   t |t}|D ]B}|d |d }}||k r| d d g}n||kr/| d d | d d g}n| d d g}|D ]}|d | |d | q8q| d d }	t|	d dkrf|	d d |	d d | S )z%Generate a generic barcode rank plot.r   r   r:   ra   y)ry   intrN   r   )
chartcounts	num_cellsrO   rowrw   rx   Zseries_listseriesZ	bg_seriesr   r   r   _plot_barcode_rankQ  s"   
r   Tc              	   C  sp  t d| jd }| j}t||| t}|du rt| j}g g dd|tjd| j	d}d| }|D ]\}	}
|d 
|	|  |d	 
|
 q2t|d dkr]|d 
d |d	 
d |r| jdkrfd
nd}||d< | jdkr| j| j }tt| j| }d| j dd| d| d}nd}g }|D ]\}	}
|	| }|
| d|dd|
d qd|d< ||d< |S )a  Construct the data for a plot segment by appropriately slicing the counts.

    Args:
        plot_segment: BarcodeRankPlotSegment containing [start, end)
          of the segment, the cell density and legend visibility option
        counts: Reverse sorted UMI counts for all barcodes.
        show_name_and_hover: boolean whether to add hover and name text specific
            to the classic CR rank plot (the user might want to set these
            themselves for alternate rank plots)
    r   r   N	scattergllines)colorwidth)ra   rz   typemodelineZ
showlegendra   rz   re   Z
Backgroundr   g        d   z.0fz% Cells<br>(r6   )z	<br>Rank ,z	<br>UMIs text	hoverinfo)maxstartendry   r{   rE   ZBC_PLOT_CMAPZcell_densityZBC_RANK_PLOT_LINE_WIDTHlegendrN   r   round)Zplot_segmentr}   Zshow_name_and_hoverr   r   r   Z	plot_rowsZ	data_dictoffsetrw   rx   r   Z
n_barcodesZn_cellsZ	hoverbaseZhoverrankr   r   r   build_plot_data_dictl  sH   
  r   c                 C  s"   |D ]}| d  t|| q| S )zGenerate the RNA counter barcode rank plot.

    Inputs:
        - chart: chart element to populate data
        - counts: UMI counts reverse sorted
        - plot_segments: A list of BarcodeRankPlotSegments
    r:   )rN   r   )r|   r}   plot_segmentsZsegmentr   r   r   _plot_segmented_barcode_rank  s   r   cell_barcodes
set[bytes]c           
      C  sj   t j}|t t jkrt|dkr|d }t||tjtj	}||vr%dS t
||||d\}}	t| ||	S )a  Generate a basic RNA counter barcode rank plot without depending on SampleData/SampleProperties.

    Args:
        chart: chart element to populate data
        cell_barcodes: set of cell barcodes as bytes
        barcode_summary: barcode summary from the barcode_summary.h5
        lib_prefix: The library prefix to create the plot for
        restrict_barcodes: Optional list of cell barcodes to restrict to
    r   r   N)restrict_barcodes)r   r   get_library_type_metric_prefixGENE_EXPRESSION_LIBRARY_TYPEr   cr_utilsformat_barcode_summary_h5_keycr_constantsTRANSCRIPTOME_REGIONCONF_MAPPED_DEDUPED_READ_TYPEr
   r   )
r|   r   barcode_summarygenomesZ
lib_prefixr   genomekeycounts_per_bcr   r   r   r   plot_basic_barcode_rank  s$   

r   c           
      C  s.  t |trt|dd|dd|ddd}t |tsJ t |ts&J t |tr5|jdu s5|jdu r7dS t|j	dkr@dS t|j	dkrKt
j}n|j	d }t
t
j}t
t
j}t||tjtj}t|t
jtjtj}||jv r||\}}	t| ||	S ||jv r||\}}	t| ||	S dS )z+Generate the RNA counter barcode rank plot.	sample_idNsample_descr   r   r   r   r   r   )
isinstancerH   r   r8   r	   r   r   r   r   r   r   r   r   r   ANTIBODY_LIBRARY_TYPEr   r   r   r   r   Zcounter_barcode_rank_plot_datar   )
r|   rU   rm   r   
gex_prefixZ	ab_prefixZgex_keyZab_keyr   r   r   r   r   plot_barcode_rank  sR   








r   c                 C  s,   |j r|jdu r
dS | \}}t| ||S )z#Generate the VDJ barcode rank plot.N)r   Zvdj_barcode_supportZvdj_barcode_rank_plot_datar   )r|   rU   rm   r   r   r   r   r   plot_vdj_barcode_rank  s   r   c                 C  sH  |j d u rd S |j jdd }d}||vrd}tdddddd	f|d
dddd	fdddddd	fdddddd	fg}g }| D ]\}}||vrQtd| ||d |d d qBg }	| D ]3\}
}g }| D ]#\}}|| }t||d }|	dd}|t
|||d d qn|	| qd| d |	|d | S ) Nr   
   Zcdr3s_aaZcdr3sZclonotype_idzClonotype IDr+   ztext-align: left)labelr?   r<   rS   ZCDR3szCDR3s in clonotypeZ	frequencyZ	Frequencyr-   zNumber of cells with clonotypeztext-align: rightZ
proportionZ
Proportionz%0.4fzFraction of cell with clonotypez'Column not found in clonotype summary: r   r<   )r   r<   r?   ;z; rS   rJ   tablerO   cols)Zvdj_clonotype_summaryZilocrj   rk   items
ValueErrorrN   Ziterrowsr5   r    tk_safe_jsonjson_sanitizeupdate)r|   rU   rm   Z
clonotypesZcdr3_aa_colZcol_defsr   r   Zcol_defrO   r   Zcl_rowr   Zcol_namer3   rA   r   r   r   plot_clonotype_table  s|   



)r   )order_byc                K  s   |j }t||i  }t|dk rdS |}|tjkr%|jdd d n|tjkr3|jdd d n|tj	kr@|jdd d t
| \}	}
| d d	 |	|
d
 | S )z-Plot a HistogramMetric from the summary json.r   Nc                 S  s   t | d S )Nr   nullable_int_sort_keyra   r   r   r   r[         z'plot_histogram_metric.<locals>.<lambda>r   c                 S  s   t | d ddS )Nr   T)negater   r   r   r   r   r[         c                 S  s   t | d  S Nr   )convert_to_float_gracefullyr   r   r   r   r[     s    r:   r   )ra   rz   )ri   rl   r8   r   r   rE   Z"HISTOGRAM_METRIC_ORDER_INTEGER_BINrs   Z+HISTOGRAM_METRIC_ORDER_DECREASING_FREQUENCYZ,HISTOGRAM_METRIC_ORDER_DECREASING_PROPORTIONzipr   )r|   rU   rm   metric_namer   kwargssummary_datar   orderingra   rz   r   r   r   plot_histogram_metrict  s   



r   c              
   C  s4  | t}|d u rd S g | d< tjD ]*}|d }|d|jd }|d|jd }| d g g |d|d ddd	 qd
d ttjD }t|jd |jd |jd D ]\}}}	| d ||	  }
|
d t	| |
d t	| qVd
|jd dddd| d d< d
|jd dddd| d d< | S )Nr:   r   Zgenome0Zgenome1markersr   g333333?)r   opacity)ra   rz   r   r   markerc                 S  s   i | ]	\}}|d  |qS r   r   )r`   irK   r   r   r   
<dictcomp>      z0plot_barnyard_barcode_counts.<locals>.<dictcomp>count0count1callra   rz   z{} UMI countsZtozeroTF)r<   Z	rangemodeZ	autorangeZ
fixedrangelayoutxaxisyaxis)get_analysisr   ws_gex_constantsZGEM_CALL_LABELSr    resultrN   	enumerater   r{   r?   )r|   rU   rm   analysisZ
label_infor   Zcall_to_seriesr   r   r   r   r   r   r   plot_barnyard_barcode_counts  sD   


r   c                 C  s   | d u s
t | dkrd S dd | D }t |dkr|d nd }|d u s'| r)d S i }|j D ]\}}|jtjkr@|jtj	ksD|||< q0||_| S )Nr   c                 S     g | ]	}t |tr|qS r   r   r   r`   anr   r   r   rc     r   z#plot_preprocess.<locals>.<listcomp>)
r   Zis_zero_matrixclusteringsr   Zclustering_typecr_clusteringZCLUSTER_TYPE_KMEANSZnum_clustersr   ZMAX_WEBSHIM_KMEANS_K)analysessg_analysessg_analysisZnew_clusteringsr   Zclur   r   r   plot_preprocess  s   r   projectionsSequence[Projection]c                 C  s   t | |t|S N)r	   r   )rU   sample_data_pathsr   r   r   r   load_sample_data  s   r   c                 C  s   | t}|du rdS | tj}| }tj|v r!ttjd}ntj|v r,ttjd}|j|dj	t
jddg}t| ||t|S )z7Plot cells in t-SNE space, colored by clustering label.N   r   r   )r   r   matrixget_library_typesr   r   r   r   get_tsnetransformed_tsne_matrixr   ZTSNE_CLUSTER_DESCRIPTIONclustering_plot_funcplot_dimensions)r|   rU   rm   r   r   library_typesr   rZ   r   r   r   	plot_tsne  s   


r   c
              
   C  s  |dur|du s|dur|du r|	dusJ |dur|j }|	du r-t|dd  }
ndg|	  }
|j\}}|t||k rBdS | }dg| | d< t|D ]!}g g d|t|dt|
| dddd	d
did| d |d < qRt	|D ]2}t
j|||d f  }t
j|||d f  }|| }| d |d  }|d | |d | qx|durt|dd|d f |\}}t|dd|d f |\}}d||gi| d d< d||gi| d d< t
j| d< | S )z7Plot cells in a 2-d space, colored by clustering label.Nr   r   r:   z{} {} - {} cellsr-   r   r   r   size   )ra   rz   r   r   r   r   r   ra   rz   ranger   r   r   config)clustersrr   bincounttolistshaper   uniquer?   r5   r   rE   DATA_VALUE_FORMATrN   
percentileZCHARTS_PLOTLY_MOVABLE_CONFIG)r|   transformed_matrixr(   pc1pc2clip
clustering	diff_exprrR   original_cluster_sizesZvalue_freqsnm	max_valuer3   r   r1r2r   xminxmaxZyminZymaxr   r   r   r     sT   
  
r   c                 C  s   |j \}}	|	t||k rd S | d d }
tt|j d }t| |D ]M}tj|||d f  }tj|||d f  }t|| }d	|t
|d}tt|||}|
d | |
d | |
d d	 | |
d
 | q$| S )Nr:   r   r   z{}: {}r-   ra   rz   r   r   r   )r   r   rl   r   randomshufflerE   r   r{   r?   r5   minrN   )r|   r  rR   r(   vminvmaxr  r  r   r	  r   Zindex_orderr   r  r  r3   r   r   r   r   plot_dimensions_color9  s"   

r  c              	   C  s   | t}|s	dS | tj}| }tj|v r!tj}t|d}ntj|v r.tj}t|d}|j|}|	 }t
|tj\}	}
t| |j|dj|tj|	|
ddS )z#Plot cells colored by total counts.Nr   r   r   )r   r   r   r   r   r   r   r   Zselect_features_by_typeZget_counts_per_bcrr   r   r   ZTSNE_TOTALCOUNTS_PRCT_CLIPr  r   r   ZTSNE_TOTALCOUNTS_DESCRIPTION)r|   rU   rm   r   r   r   library_typer   Zreads_per_bcr  r  r   r   r   plot_tsne_totalcountsR  s0   



r  c                 C  s   |j  }t }ttttj|d  }|dk rd}n|tj	kr%tj	}ddddddg}t
|D ]]}	|jd d dd|	  f }
|jd d dd|	  f }t|
tjk}|||  d d d	  d | }|D ]}||j| qj|d
dd|	d  d |d
dd|	d  d q3g }|D ]`}|j|}	|j|}||g}t
|D ]D}|j|	dd|  f }|j|	dd|  f }|dks|tjkrd}nd}|t|t|d|d |t|t|d|d q|| q|dkrt|dd dd}| d ||d | S )Nr   r   r+   zGene ID)r   r   z	Gene namer      rq   numberZL2FCz-Log2 fold-change in cluster %d vs other cells)r   r   r<   zp-valuez9Adjusted p-value of differential expression in cluster %dz#DDDz#000z%.2frJ   z%.0ec                 S  s   | d d S )Nr   rK   r   )r   r   r   r   r[     r   z/_plot_differential_expression.<locals>.<lambda>T)r   reverser   r   )r   r   setr{   rr   floorr1   r   ZMAX_DE_TABLE_ENTRIESZMAX_TOP_N_GENESr   r:   flatnonzeroZTOP_DE_GENES_MIN_MEANargsortaddr   Zint_to_feature_idrN   Zfeature_id_to_intZfeature_id_to_nameZPVALUE_DEEMPHASIS_CUTOFFr   r   r5   sortedr   )r|   r   r  r  r  Z
n_clustersZ	top_genesZn_genesr   r   ZmeansZlog2fcsZkeep_indicesZtop_gene_indicesjrO   gene_id	gene_namer   Zlog2fcZadj_p_valuerS   r   r   r   _plot_differential_expressionq  sr   

"


r#  c                 C  s   | t}t| ||t|gS r   )r   r   r   r#  )r|   rU   rm   r   r   r   r   plot_differential_expression  s   
r$  c                 C  s   | t}|d u rd S g }|j D ]5\}}	|	|d< |j| |d< |j| |d< |t| g|R i |}
|
d urGtj	|	j
i|
d< ||
 q|S )Nr  r  r  filters)r   r   r   r   r  Zdifferential_expressioncopydeepcopyr   CLUSTERS_FILTER_TITLEr(   rN   )r|   rU   rm   Z	plot_funcrZ   r   r   
new_chartsZclustering_keyr  Z	new_chartr   r   r   r     s   

r   c                 C  s|   | d u ri S dd | D }t |dks| d d u ri S t |dks#J |d }dd t|j D }tj||d diS )Nc                 S  r   r   r   r   r   r   r   rc     r   z&make_chart_filters.<locals>.<listcomp>r   r   c                 S  s   g | ]}|j qS r   )r(   r_   r   r   r   rc     s    )rR   selected)r   r   Zsort_clusteringsr   rR   r   r(  )r   r   r   Zfilter_valuesr   r   r   make_chart_filters  s   r+  Zraw_rpcF)subsample_type
ref_prefix
referencesis_targetedshow_targeted_onlymetric_suffixmulti_genome_onlyshow_multi_genome_onlyc                  s  |j pi }|du r|durdd| d }nd}|r0tjg}|s'|tj dd|}nd}d| d| d	| | d
  fdd| D }|	r\dd |D }t|dkr\dS g }t	|
 |D ]l\}}|rtt|tstt|}|r|dkr|d}|ddd}|
r|dkrqet|d}|r|d}||vrqe| d| }|dkr|rt|}nd}n|r| dt| }n|}||||||f qet|dkrdS t|dd d}i }|D ]5}|\}}}}}||f}||vr	d|gd|g|dddid||< q|| d | || d | q| drJ| d d rJ| d d  d d! d"krJtd#d$ |
 D | d d  d d%< d&d t| D | d'< | S )(a  Modifies chart data entry to add traces for subsampled metrics with specified suffixes.

    Metrics must take the form `<reference>_<subsample_type>_<subsample_depth>_<metric_suffix>`,
    where metric suffix is specified as a kwarg

    Args:
        metric_suffix (str): suffix for the subsampled metric given a metric of
            the form `<reference>_<subsample_type>_<subsample_depth>_<metric_suffix>`
    N(|z)_z(.+)_z_({})r   ^z)_([0-9]+)_$c                   s   g | ]}t  |qS r   )r!   search)r`   r   Zmetric_patternr   r   rc   "      z6plot_subsampled_scatterplot_metric.<locals>.<listcomp>c                 S  s   h | ]}|d ur| dqS r   )group)r`   r   r   r   r   	<setcomp>&  s    z5plot_subsampled_scatterplot_metric.<locals>.<setcomp>r   r   r   r   r   multir  r   c                 S  s   | d | d fS )Nr   r   r   r   r   r   r   r[   W  r   z4plot_subsampled_scatterplot_metric.<locals>.<lambda>r   r   r   )ra   rz   r   r   r   ra   rz   r   shapesr   r   c                 s  s    | ]	}t |d  V  qdS )ra   N)r   )r`   tracer   r   r   	<genexpr>u  s    z5plot_subsampled_scatterplot_metric.<locals>.<genexpr>x1c                 S  s   g | ]\}}|qS r   r   )r`   r   rK   r   r   r   rc   x  s    r:   )ri   r   r   ON_TARGET_SUBSAMPLErN   OFF_TARGET_SUBSAMPLEr?   keysr   r   rR   r   r1   r;  r    r{   cr_tgt_utilsZreformat_targeted_labelr  r8   r   r   )r|   rU   rm   r,  r-  r.  r/  r0  r1  r2  r3  r   Ztargeting_groups_to_plotZtargeting_group_suffixZmetric_search_resultspointsvalZsearch_resultr   Zsubsample_depthZtargeting_groupZtrace_labelZsorted_pointsZtracespointdepthr3   Z	trace_keyr   r9  r   "plot_subsampled_scatterplot_metric  s   




&rJ  c                 C  s   |r|t  gnt  g}t|j}g }|D ]g}t|}|d}	d }
|D ]}||	}
|
d ur1 nq$|
s6J |	d urF|
d u rFtd|	 d|di }|dg }|D ]}|| ||< qT|
|| |fi |}|d u rlqt|t	rs|n|g}|
| q||fS )Nfunctionz'Could not find webshim chart function ""r   kwargs_prefixes)globalsr+  r   r&  r'  popr8   r   r   rl   extend)rU   Zchart_dictsrm   rn   modulemodulesr%  chartsZ
chart_dictrK  rL   modr   rM  r   Znew_chart_objr)  r   r   r   build_charts|  s2   



rU  c                   sj   |j   du r	| S i }|  D ]#\}}|dur.t fdd|D r. fdd|D ||< q|||< q|S )z(Only get subset of metric prefix values.Nc                 3  s    | ]}|  V  qd S r   )
startswithr`   rK   chain_filterr   r   r@    s    z&filter_vdj_prefixes.<locals>.<genexpr>c                   s$   g | ]}|  s|tjkr|qS r   )rV  r   r   rW  rX  r   r   rc     s    
z'filter_vdj_prefixes.<locals>.<listcomp>)
chain_typer   any)rn   rU   r   r   rR   r   rX  r   filter_vdj_prefixes  s   

r\  c                 C  s^   |j }|du r	| S g }| D ]}d|vr|| q|d D ]}|d|kr+|| qq|S )z!Only get subset of metric alarms.Nr%  rZ  )rZ  rN   r8   )Z
all_alarmsrU   rY  r   alarmrL   r   r   r   filter_vdj_alarms  s   

r^  c                 C  sl   | t jkrtjtjtj}}}tt 	 |}t
||}ntjtjtj}}}t 	 }||||fS )z9Get the appropriate metrics/alarms/charts for a pipeline.)rE   PIPELINE_VDJws_vdj_constantsMETRICSZMETRIC_ALARMSZCHARTSr\  
vdj_reportZVdjReporterget_all_prefixesr^  r   	cr_reportReporter)pipelinerU   r]   rC   rS  Zmetric_prefixesr   r   r   get_constants_for_pipeline  s   


rg  c                 C  s8   |  t}dg}|r|jjtjdkr|tj |S )zDInfer the set of distinct custom feature types present in a dataset.dummyr   )r   r   r   Zfeature_refZget_count_of_feature_typer   CUSTOM_LIBRARY_TYPErN   )rm   r   custom_featuresr   r   r   get_custom_features  s   
rk  c                 C  s(   |  t}|rdd |j D S dgS )z.Infer the set of genomes present in a dataset.c                 S  s   g | ]}|d kr|qS )r   r   r_   r   r   r   rc     r:  zget_genomes.<locals>.<listcomp>rh  )r   r   r   get_genomes)rm   r   r   r   r   rl    s   
rl  c                 C  s   t | j}t |}t|| \}}}}t||d< t|||||d\}	}|	r+|	|d< |r1||d< tt|d t| |d< t	||||d\}}
|rO||d< |
rU|
|d< t
| ||}|ra||d< |S )	zBuild a websummary json.

    Args:
        sample_properties (SampleProperties): object
        sample_data (SampleData): class
        pipeline (str): ?
    rj  rn   ro   rC   r.  rS  r%  info)r&  r'  __dict__rg  rk  rp   rl   r  rl  rU  build_info_dict)rU   rm   rf  viewsample_properties_as_dictr]   rC   rS  rn   ro   r%  rn  r   r   r   build_web_summary_json  s.   



rs  c                 C  s   |j du rdS i }|j d|d< g |d< |tjtjfv r3| j}|dur1|d tjt	|d |S tj
tj
g}|D ]A}| tj }||j vrJq;|j | }| tj }	|	|j vrctd|	 d|j |	}
t|
trrt	|
}
|d ||
d q;|S )z?Add miscellaneous metrics required by the web summary template.Nchemistry_descriptionr.  )r   r   zReference metadata metric z not found in metrics summary.)ri   r8   rE   ZPIPELINE_AGGRZPIPELINE_REANALYZEr   rN   r   REFERENCE_TYPEr   REFERENCE_METRIC_PREFIXvdj_constantsREFERENCE_TYPE_KEYREFERENCE_GENOMES_KEYr   r   rl   )rU   rm   rf  rn  r   Zreference_metric_prefixesr   Ztype_metricref_typeZname_metricZref_namer   r   r   rp  '  s@   
"




rp  c                 C  s  t ||\}}}}t|j}t||d< t|||||d\}	}|	s*tjd d S t	
 }
|	D ])}|s5q0|d D ]\}}}t|trG|d }t|trP|d }||
vrX||
|< q9q0t| d}tj|dd}||
  ||
  W d    d S 1 sw   Y  d S )	Nrj  rm  z;No metrics tables were generated, skipping CSV generation.
rO   rK   w
)lineterminator)rg  r&  r'  ro  rk  rp   rf   rg   rh   rj   rk   r   rH   opencsvwriterwriterowrD  rR   )filenamerU   rm   rf  r]   rC   r   rn   rr  ro   Zcsv_metricsr   metricr3   rL   r  r   r   r   build_metrics_summary_csvY  s6   



"r  c                 C  s8   z|s
dt | fW S dt |  fW S  ty   Y dS w )zReturn a sort key for s that will sort by int(s).

    Values of s that cannot be converted to an integer will be sorted
    to the begining.
    T)Fr   )r{   r   )r)   r   r   r   r   r   y  s   r   c                 C  s&   zt | W S  ty   tjj Y S w r   )r1   r   rf   
float_infor   )r)   r   r   r   r     s
   
r   c                 C  sT   t | j d  }t|\}}| j }t|dd}t	|||}||d}|S )Ng      ?F)Z
jibes_plot)	histogramZ
_resources)
rr   log10r	  toarrayr   r   Zfeature_ids_maprD  r   r   )Z	ab_matrixZ	ab_countsZshared_resourcesvectorsZvector_namesZ	color_mapZhistogram_dataantibody_histogramsr   r   r   make_antibody_histograms  s   
r  r|   rU   dict | CountSamplePropertiesrm   r	   c                 C  sH  |j du rdS t|trt|dd|dd|ddd}t|jdkr(dS t|jdkr3tj}n|jd }t	tj
}t||tjtj}t||tjtj}||j vs\||j vr^dS d}|j | dd }||k}	|	 dkrudS ||	 tj}
|j | dd }||	 tj|
 }| d	 d }| |d
< |
 |d< | S )aC  Generate a UMI depth plot.

    Each point is a barcode. X = Reads/UMI for barcode, Y = UMIs per barcode (log). This is useful
    in differentiating between index hopping vs other assay issues when the barcode rank plot
    shows poor separation between cells and background.

    See CELLRANGER-4776 for more detail.
    Nr   r   r   r   r   r   r   r:   ra   rz   )r   r   rH   r   r8   r   r   r   r   r   r   r   r   r   r   r   CONF_MAPPED_BC_READ_TYPEsumastyperr   float64r   )r|   rU   rm   r   r   Zgex_umi_keyZgex_reads_keyZ	MIN_COUNTZumisselectrz   readsra   r:   r   r   r   plot_umi_depth  sV   






	
r  r   )r:   rH   r   r'   rI   rH   )TN)r   r   )r   r   )NNNNN)NNN)F)r|   rH   rU   r  rm   r	   )t
__future__r   rj   r&  r  r/   r  r!   rf   collections.abcr   typingr   numpyrr   Zcellranger.analysis.clusteringr   r  r   cellranger.constants	constantsr   cellranger.reportreportrd  cellranger.rna.libraryrnalibraryr   Zcellranger.targeted.utilstargetedutilsrE  cellranger.utilsr   Zcellranger.vdj.constantsvdjrw  Zcellranger.vdj.reportrb  Z cellranger.webshim.constants.gexwebshimZgexr   #cellranger.webshim.constants.sharedZsharedrE   Z cellranger.webshim.constants.vdjr`  tenkit.safe_json	safe_jsonr   Zcellranger.analysis.multigenomer    cellranger.analysis.singlegenomer   r   cellranger.reference_pathsr   Zcellranger.webshim.datar	   r
   Z!cellranger.webshim.jibes_plottingr   r   r   Zcellranger.websummary.helpersr   'cellranger.websummary.sample_propertiesr   r   r   r&   r*   r5   r9   rD   rG   rT   rV   rp   ry   r   r   r   r   r   r   r   Z!HISTOGRAM_METRIC_DEFAULT_ORDERINGr   r   r   r   r   r   r  r  r#  r$  r   r+  rJ  rU  r\  r^  rg  rk  rl  rs  rp  r  r   r   r  r  r   r   r   r   <module>   s   

	8
S+
;%:	] ,

I 
V 	#	)2
 