o
    Uݢgij                     @  s  d Z ddlmZ ddlZddlZddlmZ ddlZddl	m
Z
 ddlmZmZ ddlmZ ddlmZ dd	lmZ d
ZdZdZdZh dZd9ddZdd Zdd Zdd Z	d:ddZG dd dZG dd  d eZG d!d" d"eZ G d#d$ d$eZ!G d%d& d&e Z"G d'd( d(e Z#G d)d* d*eZ$G d+d, d,e$Z%G d-d. d.eZ&G d/d0 d0eZ'G d1d2 d2eZ(G d3d4 d4eZ)G d5d6 d6Z*G d7d8 d8Z+dS );z2Tools for managing metrics with metadata attached.    )annotationsN)OrderedDict)
ensure_str)add_multi_prefixadd_species_prefix)load_chains_from_chain_type)
SampleDatadesanitize_valueZVALIDWARNERRORINFO>   	full_namealert_warn_detailtargetedalert_error_detailalert_warn_detail_csalert_error_nameinclude_cumulative
acceptablehelp_descriptionacceptable_csevaluate_typeis_species_specificsummarytargeted_csalert_warn_namecategoryalert_error_detail_csformat_typedatadictkeystrc                 C  sB   || v r| | S d|v r| dd\}}|| v rt| | |S dS )zGiven a key which may be nested using the dot notation, return the value.

    This function will return None if the key is not found. The maximum depth of nesting
    is expected to be very small (~2) so we use recursion.
    .   N)splitget_maybe_nested_key)r    r"   leftright r*   l/oak/stanford/groups/akundaje/marinovg/programs/cellranger-9.0.1/lib/python/cellranger/websummary/metrics.pyr'   1   s   r'   c                 C  s    t |  }t|dksJ dd S )Nr   z,Expected keys in Metrics CSV file not found.)required_colslen)keysZmissing_keysr*   r*   r+   _validate_expected_keys_presentA   s   r/   c                   s    fdd}|S )zDecorator to convert from strings "NaN", "Inf", "-Inf" back to floats.

    This is used to enable comparisons with numeric types.

    Args:
        func: A function with value, target as args
    c                   s   t | }  | |S Nr	   valuetargetfuncr*   r+   _convert_and_testO   s   
z(convert_value.<locals>._convert_and_testr*   )r5   r6   r*   r4   r+   convert_valueF   s   	r7   c           	      C  s   t jt| dddd}t|  }t| | D ]\}}| D ]\}}||v r4|| |j	||f< q#qt
 }|jjD ] }||v rGtd|j| ||< || t || d||< q=|S )aQ  Loads the metric metadata as a dictionary by parsing the input csv file.

    Entries can point to another entry. For example 'alert_warn_detail_cs' field
    could just say 'alert_warn_detail', meaning we want to use the content in
    'alert_warn_detail' for 'alert_warn_detail_cs' as well. Only a single level of
    redirection is supported. i.e You should not have A -> B -> C, instead use A->C
    and B->C. No circular references too (i.e A->B and B->A). The code does not
    explicitly check for these, but these simple rules should be followed when building
    the metrics csv.
    r   #high)	index_colcommentfloat_precisionz+Metrics file cannot contain duplicated keysN)pdread_csvr   setr.   tolistr/   iterrowsitemsatr   indexvaluesOSErrorlocwherenotnull)	Zmetric_csv_filer    headersirowkvmetric_datar"   r*   r*   r+   load_metric_dataV   s$    rP   c                 C  s.   t | tsJ t |tsJ | ||| dS )a  Produces a metrics csv file from.

    :param metric_annotations: An instance of MetricAnnotations that specifies which metrics will
    be output in the summary file based on the boolean `summary` column in the associated CSV file.
    :param sample_data: An instance of SampleData containing the necessary metrics we're looking for
    :param out_fname: Output csv file name
    :param species_list: A list of strings with each species (typically from the genomes of the h5)
    :return: None
    N)
isinstanceMetricAnnotationsr   output_metrics_csv)Zmetric_annotationssample_dataZ	out_fnamespecies_listr*   r*   r+   #output_metrics_csv_from_annotationsy   s   rV   c                   @  sP   e Zd ZdddZ	dddZddd	Zd
d ZdddZdddZdd Z	dS )rR   NFc                 C  s^   |du rdn|}t jt jt|}t|| _|r*t jt jtd}| | || _dS )z8Load in metric information from the associated csv file.Nzmetrics.csvzintron_mode_metrics.csv)	ospathjoindirname__file__rP   rO   _override_metric_settingsZsource_file)selfZmetric_fileintron_mode_alerts	file_pathr*   r*   r+   __init__   s   


zMetricAnnotations.__init__Tc                 C  s   | j | }|j}d}	|r+| d| }||rd| dnd7 }|	|r(d| dnd7 }	t|jr4|j|	 ndt|jr>|j|	 ndi}
t|rG|jn|jt|rO|jn|j	i}|rX|j
n|j}|r`|jn|j}t||||||||j|j|j|
||dS )zReturns a single metric object for the given key and value.

         Alerts are raised when metric falls outside
        normal range, which depends on debug status.

        if it's barnyard sample, add species suffix to name and alert_name.
         _z ()N)	alert_detail_mapr   r   r   r   r   alert_name_mapis_barnyardis_cumulative)rO   r   WARNING_THRESHOLDr   ERROR_THRESHOLDr   r   r   r   r   r   r   r   r   Metricr   r   r   )r]   r"   r2   speciesrf   debugrg   metric_infonameZalert_name_suffixre   rd   r   r   r*   r*   r+   
gen_metric   sL   

zMetricAnnotations.gen_metricc                 C  s&   dd | j  D }| |||}|S )aJ  Processes a metrics dictionary and select summary metrics based on 2nd column in.

        metrics.csv for keys provided or all registered keys in metrics.csv

        Note: The ATAC version of this returns a dictionary with key/value pairs, but this
        version will return a list of tuples with formatted-name/value
        c                 s  s    | ]
\}}|j r|V  qd S r0   )r   ).0r"   r2   r*   r*   r+   	<genexpr>   s    z<MetricAnnotations.compile_summary_metrics.<locals>.<genexpr>)rO   rB   gen_metric_list)r]   
value_dictrU   r.   metsr*   r*   r+   compile_summary_metrics   s   z)MetricAnnotations.compile_summary_metricsc                 C  sX   g }|D ]%}|| j v r"| j | }|jdur!|j}|||jgg qt| d q|S )zYProcesses a metrics dictionary and generates helptext for keys if present in metrics.csv.N  not found in registered metrics)rO   r   r   appendprint)r]   r.   outputr"   rm   r   r*   r*   r+   gen_metric_helptext   s   


z%MetricAnnotations.gen_metric_helptextc                 C  s  g }t |dk}t |dk}|D ]q}|| jvrq| j| }	|	jrm|du s&|s,|s,td|rK|	jrKt|}
t||
}|durK|| j||||dd |D ]}t	||}
t||
}|durk|| j|||||dd qMqt||}|dur|| j|||d	 q|S )
a)  Returns a list of metric objects for the provided keys, using the value dictionary to get values for.

        each metric.  When metrics are species-specific, a list of species is required.  Alerts are raised when
        metrics fall outside normal ranges, which depend on debug status.
        r%   r   Nz8Must provide a species list for species-specific metricsT)rl   rf   rg   F)rg   rl   )
r-   rO   r   
ValueErrorr   r   r'   rw   ro   r   )r]   rs   r.   rU   rl   ry   rf   is_antibody_onlyr"   rm   Zsubkeyr2   rk   r*   r*   r+   rr      sX   







z!MetricAnnotations.gen_metric_listc           	      C  s   t |tsJ | j|j|d}dd |D }dd |D }t|d}tj|dd}|||g W d    d S 1 s<w   Y  d S )N)rU   c                 S     g | ]}|j qS r*   )rn   rp   xr*   r*   r+   
<listcomp>%      z8MetricAnnotations.output_metrics_csv.<locals>.<listcomp>c                 S  r~   r*   )r2   r   r*   r*   r+   r   &  r   w
)lineterminator)rQ   r   ru   r   opencsvwriter	writerows)	r]   rT   filenamerU   rt   headerrE   fr   r*   r*   r+   rS   "  s   "z$MetricAnnotations.output_metrics_csvc                 C  s   t |}| j| dS )a  In order to enable metric settings to inherit from others, we can load up a second file with.

        settings that add or extend the previous ones.

        Args:
            filename: Filename of metrics.csv with override settings

        Returns:
            None
        N)rP   rO   update)r]   r   Znew_metric_datar*   r*   r+   r\   +  s   z+MetricAnnotations._override_metric_settingsNF)NFTFr0   )T)
__name__
__module____qualname__r`   ro   ru   rz   rr   rS   r\   r*   r*   r*   r+   rR      s    


;

8	rR   c                         e Zd Zd fdd	Z  ZS )SpatialMetricAnnotationsFc                   s   t  jd|d d S )Nzspatial_metrics.csvr^   )superr`   )r]   r^   	__class__r*   r+   r`   ;  s   z!SpatialMetricAnnotations.__init__Fr   r   r   r`   __classcell__r*   r*   r   r+   r   :      r   c                         e Zd Z fddZ  ZS )SpatialAggrMetricAnnotationsc                   .   t    tjtjtd}| | d S )Nzspatial_aggr_metrics.csvr   r`   rW   rX   rY   rZ   r[   r\   r]   r_   r   r*   r+   r`   @     
z%SpatialAggrMetricAnnotations.__init__r   r*   r*   r   r+   r   ?      r   c                      r   ) SpatialTargetedMetricAnnotationsc                   r   )Nzspatial_targeted_metrics.csvr   r   r   r*   r+   r`   G  r   z)SpatialTargetedMetricAnnotations.__init__r   r*   r*   r   r+   r   F  r   r   c                      r   )$SpatialTargetedAggrMetricAnnotationsc                   r   )Nz!spatial_targeted_aggr_metrics.csvr   r   r   r*   r+   r`   N  r   z-SpatialTargetedAggrMetricAnnotations.__init__r   r*   r*   r   r+   r   M  r   r   c                      r   ),SpatialTemplateLigationAggrMetricAnnotationsc                   r   )Nz*spatial_template_ligation_aggr_metrics.csvr   r   r   r*   r+   r`   U  
   
z5SpatialTemplateLigationAggrMetricAnnotations.__init__r   r*   r*   r   r+   r   T  r   r   c                      r   )(SpatialTemplateLigationMetricAnnotationsc                   r   )Nz%spatial_template_ligation_metrics.csvr   r   r   r*   r+   r`   ^  r   z1SpatialTemplateLigationMetricAnnotations.__init__r   r*   r*   r   r+   r   ]  r   r   c                      r   )*SpatialHDTemplateLigationMetricAnnotationsc                   r   )Nz(spatial_hd_template_ligation_metrics.csvr   r   r   r*   r+   r`   e  r   z3SpatialHDTemplateLigationMetricAnnotations.__init__r   r*   r*   r   r+   r   d  r   r   c                      r   )TargetedAggrMetricAnnotationsc                   r   )Nztargeted_aggr_metrics.csvr   r   r   r*   r+   r`   n  r   z&TargetedAggrMetricAnnotations.__init__r   r*   r*   r   r+   r   m  r   r   c                      r   )%TemplateLigationAggrMetricAnnotationsc                   r   )Nz"template_ligation_aggr_metrics.csvr   r   r   r*   r+   r`   u  r   z.TemplateLigationAggrMetricAnnotations.__init__r   r*   r*   r   r+   r   t  r   r   c                      r   )TargetedMetricAnnotationsc                   r   )Nztargeted_metrics.csvr   r   r   r*   r+   r`   |  r   z"TargetedMetricAnnotations.__init__r   r*   r*   r   r+   r   {  r   r   c                      r   )LTMetricAnnotationsFc                   s2   t  j|d tjtjtd}| | d S )Nr   zlt_metrics.csvr   )r]   r^   r_   r   r*   r+   r`     s   zLTMetricAnnotations.__init__r   r   r*   r*   r   r+   r     r   r   c                   @  s   e Zd ZdZ									d*ddZedd Zdd	 Zed
d Z	ee
dd Zee
dd Zedd Zedd Zedd Zedd Zedd Zedd Zedd Zedd Zed d! Zed"d# Zed$d% Zd&d' Zd(d) ZdS )+rj   z}Contains metadata about a single metric along with methods for evaluating its value with respect to that.

    metadata.
    NFc              
   C  s   || _ || _|| _|| _|| _|| _|| _| || _| || _	|| _
dd | j| j| j| j| j| j| j| jd	}||vrEtd| || _|| | _|	du rSd}	|	dvr^td|	 |	| _|
du rgd	}
|
| _dS )
ax  :param key: Metric identifier.

        :param name: Full Metric Name
        :param value: The value of the metric
        :param parent_metric_info: Information from the row in the metrics.csv relevant to this metric.
        :param alert_detail_map: Details text for both warning and error alerts
        :param acceptable: Value to determines the error level
        :param targeted: Value to determine the warning level
        :param evaluate_type: lt/gt/range/exists/is_true/is_false/None
        :param format_type: int/percentage/flt/float
        :param category: A category name this metric belongs to (e.g. Mapping)
        :param alert_name_map: Alert text for both warning and error alerts
        :param is_barnyard: True if this metric was generated from a barnyard experiment
        :param is_cumulative: True if this metric is accumulated over multiple genomes
        c                 S  s   dS NTr*   )r   yr*   r*   r+   <lambda>  s    z!Metric.__init__.<locals>.<lambda>)	Nltgtrangeexistsis_trueis_falseis_equalZis_not_equalzUnknown evaluation type: Nflat)r   float
percentageint
scientificzUnknown format type: ZGeneral)r"   rn   r2   rd   rf   rg   parent_metric_info_cast_threholdr   r   re   
_less_than_greater_than	_in_range_exists_is_true	_is_false	_is_equal_is_not_equalr|   r   evaluation_functionr   r   )r]   r"   rn   r2   r   rd   r   r   r   r   r   re   rf   rg   Zfunction_mapr*   r*   r+   r`     s@   

zMetric.__init__c              	   C  s`   z!t | }d|v r|dsdd | dD }W |S t| }W |S  ttfy/   | }Y |S w )N-c                 S  s   g | ]}t | qS r*   )r   strip)rp   rK   r*   r*   r+   r     s    z)Metric._cast_threhold.<locals>.<listcomp>)r#   
startswithr&   r   r|   	TypeError)tZt_strresr*   r*   r+   r     s   
zMetric._cast_threholdc                 C  s*   | j }tdtdtdi}|| | j| jdS )Npasswarnerror)	thresholdmetricrn   )threshold_typeVALID_THRESHOLDrh   ri   value_stringrn   )r]   r   Ztranslate_dictr*   r*   r+   gen_metric_dict  s   zMetric.gen_metric_dictc              
   C  s>   | j }|tkr	i S | j| jd| j| j| | j| |d| jd	S )NTra   )	Z	raw_valueformatted_valueraisedparenttitlemessageleveltestid)r   r   r2   r   r"   re   rd   r]   r   r*   r*   r+   
alarm_dict  s   zMetric.alarm_dictc                 C  s   | |k S r0   r*   r1   r*   r*   r+   r        zMetric._less_thanc                 C  s   | |kS r0   r*   r1   r*   r*   r+   r     r   zMetric._greater_thanc                 C  s   | |d ko| |d k S )Nr   r%   r*   r1   r*   r*   r+   r     s   zMetric._in_rangec                 C  s   | d uS r0   r*   r1   r*   r*   r+   r        zMetric._existsc                 C     | du S r   r*   r1   r*   r*   r+   r     r   zMetric._is_truec                 C  s   | |kS r0   r*   r1   r*   r*   r+   r     r   zMetric._is_equalc                 C  s   | |kS r0   r*   r1   r*   r*   r+   r     r   zMetric._is_not_equalc                 C  r   r   r*   r1   r*   r*   r+   r     r   zMetric._is_falsec                 C  s   | j d u r| jd u rtS | j d u r| | j| jrtS tS | jd u r.| | j| j r,tS tS | | j| j r8tS | | j| jrBtS tS r0   )r   r   r   r   r2   ri   rh   r]   r*   r*   r+   r      s   

zMetric.threshold_typec                 C  s:   | j d u r| jd u rdS | j}|tkrdS |tkrdS dS )NZBEBEBEZB4FFB4ZFFFFB4ZFFB4B4)r   r   r   r   rh   r   r*   r*   r+   color5  s   zMetric.colorc                 C     |  | jS r0   )_format_target_valuer   r   r*   r*   r+   acceptable_stringB     zMetric.acceptable_stringc                 C  r   r0   )r   r   r   r*   r*   r+   targeted_stringF  r   zMetric.targeted_stringc                 C  r   r0   )_format_valuer2   r   r*   r*   r+   r   J  r   zMetric.value_stringc                 C  s   |d u rdS | j dkrdS | j dkrdS | j dkrdS | j dkr(d	| | S | j d
kr5d| | S | j dkrK| |d  d| |d  S td)Nra   r   ZExistsr   Truer   Falser   z< r   z> r   r   z - r%   z#unreachable - invalid evaluate_type)r   r   AssertionErrorr]   r2   r*   r*   r+   r   N  s   





"zMetric._format_target_valuec                 C  st   |d u s|dkr
dS | j dkr| S | j dkr|dS | j dkr$|dS | j dkr-|d	S | j d
kr6|dS td)NNaNNoner   r   z,.2fr   z.1%r   z,.0fr   z.1ez!unreachable - invalid format_type)r   r   r   r*   r*   r+   r   a  s   




zMetric._format_value)	NNNNNNNFF)r   r   r   __doc__r`   staticmethodr   r   propertyr   r7   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r*   r*   r*   r+   rj     s^    
D












rj   c                   @  s2   e Zd Zdd ZdddZdddZdd	d
ZdS )VDJMetricAnnotationsc                 C  s$   t jt jtd}t|| _dS )z5Load metric information from the associated csv file.zvdj_metrics.csvN)rW   rX   rY   rZ   r[   rP   rO   r   r*   r*   r+   r`   s  s   zVDJMetricAnnotations.__init__FNc                 C  s   | j | }|j}|j|d}|j|d}t|jr|jj|dndt|jr+|jj|dndi}|r3|jn|j}|r;|j	n|j
}	t|rG|j|dndt|	rQ|	j|dndi}
|rY|jn|j}|ra|jn|j}t|||||
|||j|j|j|dS )zReturns a single metric object for the given key and value.

         Alerts are raised when metric falls outside
        normal range, which depends on debug status.
        chainN)rd   r   r   r   r   r   re   )rO   r   formatrh   r   ri   r   r   r   r   r   r   r   r   r   rj   r   r   r   )r]   r"   r2   rl   r   rm   rn   re   r   r   rd   r   r   r*   r*   r+   ro   y  sF   
zVDJMetricAnnotations.gen_metricc           
   
   C  s   g }|D ]V}| j | }|jrA|dusJ d| t|D ]"}|j|d}	|	|v r8|| j|||	 ||d qt|	 d qq||v rS|| j||| |d qt| d q|S )zReturns a list of metric objects for the provided keys, using the value dictionary to get values for.

        each metric. Alerts are raised when metrics fall outside normal ranges, which depend on debug status.
        NGot no chain type for r   )rl   r   z not found in metricsr{   )rO   is_chain_specificr   r   rw   ro   rx   )
r]   rs   r.   rl   
chain_typery   r"   rm   r   Zfull_keyr*   r*   r+   rr     s"   
z$VDJMetricAnnotations.gen_metric_listc                 C  s   g }|D ]O}|| j v rL| j | }|jr:|dusJ d| t|D ]}|jj|d}|||jj|dggg7 }q"q|jdurK|j}|||jggg7 }qt| d q|S )z]Processes a metrics dictionary and generates helptext for keys if present in the metrics csv.Nr   r   rv   )rO   r   r   r   r   r   rx   )r]   r.   r   ry   r"   rm   r   r   r*   r*   r+   rz     s    


z(VDJMetricAnnotations.gen_metric_helptext)FNr0   )r   r   r   r`   ro   rr   rz   r*   r*   r*   r+   r   r  s
    

6r   )r    r!   r"   r#   r0   ),r   
__future__r   r   rW   collectionsr   pandasr=   sixr   cellranger.rna.libraryr   r   cellranger.vdj.chain_typesr   cellranger.webshim.datar   tenkit.safe_jsonr
   r   rh   ri   INFO_THRESHOLDr,   r'   r/   r7   rP   rV   rR   r   r   r   r   r   r   r   r   r   r   r   rj   r   r*   r*   r*   r+   <module>   sL   
$
 1		 j