o
    Uݢgg                     @  s  d dl mZ d dlZd dlZd dlZd dlmZ d dlZd dlm	Z
 d dlmZ d dlmZ d dlm  mZ d dlm  mZ d dlm  mZ d dlmZ d dlm Z! d dl"m#Z# d dl$m%Z% d dl&m'Z' d dl(m)Z)m*Z* d dl+m,Z,m-Z- d d	l.m/Z/m0Z0m1Z1 d d
l2m3Z3 ej4ej5ej6ej7ej8ej9gZ:ej4ej5ej6ej9gZ;ej4ej6gZ<G dd de=Z>dd Z?dd Z@dd ZAdd ZBdVddZCg dZDdWddZEdd ZFdd dXd#d$ZGd%d& ZHd'd( ZIG d)d* d*eZJeJd+d+d,gd-eJd.d.d,gd-eJd/d/d,gd-eJd0g d1d-eJd2g d3d-eJd4g d5d-eJd6g d7d-eJd8g d9d-eJd:d:d,gd-eJd;d;d,gd-g
ZKejLfd<d=ZMd>d? ZNd@dA ZOdBZPdYdJdKZQg fdLdMZRdNdO ZS	dZdPdQZTdRdS ZUdTdU ZVdS )[    )annotationsN)
NamedTuple)	csv_utils)validate_reference)FeatureDefException)LIBRARIES_METRICMoleculeCounter)get_ref_name_from_genomesget_reference_genomes)TARGETING_METHOD_FILE_NAMESTARGETING_METHOD_TL_FILE_FORMATTargetingMethod)%SPATIAL_TARGET_DISALLOWED_PANEL_TYPESc                      s$   e Zd Z fddZdd Z  ZS )PreflightExceptionc                   s   t  | || _d S N)super__init__msg)selfr   	__class__ c/oak/stanford/groups/akundaje/marinovg/programs/cellranger-9.0.1/lib/python/cellranger/preflight.pyr   <   s   
zPreflightException.__init__c                 C  s   | j S r   )r   )r   r   r   r   __str__@   s   zPreflightException.__str__)__name__
__module____qualname__r   r   __classcell__r   r   r   r   r   ;   s    r   c                 C  s   | \}}|s
t |dS )zTranslate (ok,msg) style from tenkit into an exception.

    Raises:
        PreflightException: The message as an exception.
    Nr   )resultokr   r   r   r   checkD   s   r!   c                 C  s$   zt |  W dS  ty   Y dS w )NFT)int
ValueError)sr   r   r   is_intO   s   
r%   c                 C  sL   t dd | D rd S tdd | D s$tdt  dtjtjf d S )Nc                 s  s     | ]}| tjd kV  qdS )ZVDJN)getrna_libraryLIBRARY_TYPE.0xr   r   r   	<genexpr>Y   s    z*check_gex_or_ab_present.<locals>.<genexpr>c                 s  sD    | ]}| tjtjkp| tjtjkp| tjd u V  qd S r   )r&   r'   r(   GENE_EXPRESSION_LIBRARY_TYPEANTIBODY_LIBRARY_TYPEr)   r   r   r   r,   ^   s    
zeYou must specify >= 1 input library with either library_type == '%s' or library_type == '%s' to run 'z count')allanyr   cr_envproductr'   r-   r.   )
sample_defr   r   r   check_gex_or_ab_presentW   s   
r4   c                   C  s   t   d S r   )tk_preflightZwarn_deprecated_osr   r   r   r   check_osj   s   r6   Fc                   s  t  }tt|  |tjkrt|  tdd | D }t	t
|d D ]!}|| ||d  krFd}|d|| d  7 }|d7 }t|q%tdd	d
 | D ]}|d }	|	 dkr`td|	dsltd|	 tj|	s|td| d|	 tj|	rtd|	 dt|	tjstd| dt  d|	 t|	std|	 |d }
|
d ur|
D ]
}t|stdqtt| |tjkr@|rddd tD }n
ddd tD }|tj d   dkrd| }t| d ur t!vs
|r tvrt  d  d| }t||d ur? d ur? tj"kr?t# fdd|j$D s?td  d qO|tjkra|tj d   d tj%fvratd  d!tj% d"qOd S )#Nc                 s  s4    | ]}| d | d| d| dfV  qdS )	read_pathsample_namessample_indiceslanesN)r&   )r*   Zsdr   r   r   r,   u   s
    "
z#check_sample_def.<locals>.<genexpr>   zhDuplicated entry in the input FASTQ data. Please use a unique combination of fastq path and sample name.z
Path: r   z\
Note in demux mode, a unique combination fastq path, sample indices, and lanes is required.zChecking FASTQ folder...Tflushr7    z;Empty fastq path specifed. Please specify an absolute path./z1Specified FASTQ folder must be an absolute path: zOn machine: z), specified FASTQ folder does not exist: zSpecified FASTQ path z3 is a file. The path is expected to be a directory.z, z0 does not have permission to open FASTQ folder: z!Specified FASTQ folder is empty: r:   z0Lanes must be a comma-separated list of numbers.c                 s      | ]	}d | d V  qdS 'Nr   r)   r   r   r   r,          c                 s  r@   rA   r   r)   r   r   r   r,      rC   zllibrary_type field may not be an empty string.
The 'library_type' field in the libraries csv must be one of zUnknown library_type: 'z@'.
The 'library_type' field in the libraries csv must be one of c                 3  s    | ]}|j  kV  qd S r   )Zfeature_typer)   library_typer   r   r,      s    z,You declared a library with library_type = 'z', but there are no features declared with that feature_type in the feature reference.
Check that the 'library_type' field in the libraries csv matches at least 1 entry in the 'feature_type' field in the feature reference CSVzR'. For the VDJ pipeline, the library_type field in sample_def must be missing or 'rB   )&socketgethostnamer!   r5   Zcheck_gem_groupscr_constantsZPIPELINE_VDJr4   sortedrangelenr   printstrip
startswithospathexistsisfileaccessX_OKr1   r2   listdirr%   Zcheck_sample_indicesPIPELINE_COUNTjoinSPATIAL_ALLOWED_LIBRARY_TYPESPUBLIC_LIBRARY_TYPESr&   r'   r(   ALLOWED_LIBRARY_TYPESr-   r0   feature_defsZVDJ_LIBRARY_TYPE)Zsample_defsfeature_refpipeline
is_spatialhostnameZ
sd_entriesir   r3   r7   r:   Zlaneoptionsr   rD   r   check_sample_defn   s   







rb   )zchrLength.txtzchrNameLength.txtzchrName.txtzchrStart.txtZGenomezgenomeParameters.txtZSAZSAindexreference_pathstrc                 C  s  | d usJ t  }td|  d| ddd tjtjg}|D ]}tj| |}tj	|s6t
d| dqtj| tj}tj| tjd }tj	|sdtj	|sdt
d	|  d
tj d| dtD ]}tj| tj|}tj|s{t
dqft|  d S )NzChecking reference_path (z) on z...Tr<   ztYour reference does not contain the expected files, or they are not readable. Please check your reference folder on .z.gzzEYour reference is missing gene annotations that should be present at r?   zG[.gz], or they are not readable. Please check your reference folder on zLYour reference doesn't appear to be indexed. Please run the mkreference tool)rF   rG   rL   rH   ZREFERENCE_METADATA_FILEZREFERENCE_FASTA_PATHrO   rP   rW   rR   r   ZREFERENCE_GENES_GTF_PATHSTAR_REQUIRED_FILESZREFERENCE_STAR_PATHrQ   r   )rc   r_   Zrequired_filesfilenamepZp_gtfZp_gtf_gzr   r   r   check_refdata   s2   
ri   c           	      C  s   ddt jh}z	t| d|}W n tjy# } ztt||d }~ww g }|D ]S}t|  |D ]}|d u s>|| d u rGd|j	 }t||| 
 ||< q2|d 
 dkr\tdddd	d d
d d|d d|d gt j|t j ddgi}|| q(|S )NZfastqssample	librarieszjInvalid libraries CSV file: incorrrect number of columns on line number (after excluding comment lines) %dr>   z_Empty sample field in libraries csv. Please specify an non-empty sample value for each library.Z
fastq_modeZILMN_BCL2FASTQZ	gem_groupr:   r7   r8   r9   r0   )r'   r(   r   load_csv_filter_commentsCSVParseExceptionr   rd   rL   keysline_numrM   append)	Zcsv_pathrequired_colsreaderexcrk   rowkeyr   libraryr   r   r   expand_libraries_csv  s@   

rw   input	file_descrP   rz   c                C  sH   t | t jst| d|  t | jdkr"t| d|  ddS )z1Check that the input file exists and is readable.z6 file is not readable, please check file permissions: r    file z
 is empty.N)rO   rS   R_OKr   statst_size)rP   rz   r   r   r   check_file_properties+  s   r   c              
   C  sD   zt | |}t | |W S  ty! } ztt||d}~ww )zuTry creating the feature reference object.

    Raises:
        PreflightException: wrapped FeatureDefException.
    N)rna_feature_refZfrom_transcriptome_and_csvZFeatureExtractorr   r   rd   )Ztranscriptome_ref_pathfeature_ref_pathr\   exr   r   r   try_load_feature_ref6  s   
r   c                   C  s   t t  d S r   )r!   r5   Zcheck_open_fhr   r   r   r   check_environmentH  s   r   c                   @  s   e Zd ZU ded< ded< dS )_VersionCmdrd   namez	list[str]cmdN)r   r   r   __annotations__r   r   r   r   r   L  s   
 r   mroz	--version)r   r   Zmrppythonnumpy)r   -cz&import numpy; print(numpy.__version__)Zscipy)r   r   z&import scipy; print(scipy.__version__)Zpysam)r   r   z&import pysam; print(pysam.__version__)Zh5py)r   r   z$import h5py; print(h5py.__version__)Zpandas)r   r   z(import pandas; print(pandas.__version__)STARZsamtoolsc                 C  sL   t D ]!}|j}|j}t| jdddd  }t||d| dd qdS )	zPrint the versions of various dependencies to the given destination.

    Args:
        file (file-like object): The destination to which to print the versions.
       
r;   )maxsplitr   z: T)sepfiler=   N)	_PACKAGE_VERSION_CMDSr   r   
tk_subproccheck_outputrM   splitdecoderL   )destpackager   r   versionr   r   r   record_package_versions_  s    r   c                 C  s   | dk rt dd S )Nr;   z9Specified read length must be greater than or equal to 1.r   )r+   r   r   r   check_read_lengthm  s   r   c              
     s   dd t fdd| D r|du rtddd d	d
  t  fdd| D r.td|raztj|dd\}}W n tyN } ztt||d}~ww t fdd| D rct| dS dS dS )zTIf any non "Gene Expression" libraries are present then the feature-ref is required.c                 S  s    |  d}|d u rdS |tjkS NrE   F)r&   r'   r-   rj   rE   r   r   r   is_not_gene_expressionv     

z8check_feature_preflights.<locals>.is_not_gene_expressionc                 3      | ]} |V  qd S r   r   r)   )r   r   r   r,   |      z+check_feature_preflights.<locals>.<genexpr>NzFYou must specify --feature-ref when using feature barcoding libraries.c                 S      |  d}|d u rdS |tjkS r   )r&   r'   CRISPR_LIBRARY_TYPEr   r   r   r   is_crispr_guide_capture  r   z9check_feature_preflights.<locals>.is_crispr_guide_capturec                 S  r   r   )r&   r'   ANTIGEN_LIBRARY_TYPEr   r   r   r   is_antigen_capture  r   z4check_feature_preflights.<locals>.is_antigen_capturec                 3  r   r   r   r)   )r   r   r   r,     r   zEAnalysis of Antigen Capture libraries is unsupported in this product.r   Zindex_offsetc                 3  r   r   r   r)   )r   r   r   r,     r   )r0   r   r   parse_feature_def_filer   rd   Zcheck_crispr_target_gene_name)r3   r   r[   _rs   r   )r   r   r   r   check_feature_preflightss  s2   	r   
   target_panel
str | Nonefeature_reference_pathparse_filesboolexpected_targeting_methodr^   c             
     s  |du r| du r|du rt d| du rdS t| dd ztj| t|ddgd W n tjy? } zt t||d}~ww |durJt|| g |r<|du rVdd}nt	j
|dd	 jD }ztj| ||d
\}}	}
W n ty } zt t||d}~ww |durt||nd}t|	tk rt d| dt|	 dt|v rtt|| }|d }||kst d| d| ddurtfdd|	D rt d| d|dd}|r|tv rtd| d dur>|dur@dd	 jD   fdd|	D }dt|i}tj|tjd\}}
zt|j| W dS  ty; } zt t||d}~ww dS dS dS )a3  Check preflights for the target panel.

    Args:
        target_panel (str | None): Target panel csv
        reference_path (str | None): Location of the reference
        feature_reference_path (str | None): Location of the feature reference path
        parse_files (bool): Flag to parse the GTF to verify that the gene is present
        expected_targeting_method (str): Type of targeting method used (Need more details)
        is_spatial (bool): Is this a spatial sample or not?

    Raises:
        PreflightException: Any Preflight specific Exception
    NzDMust specify a transcriptome or probe set or feature reference path.zThe probe set CSVry   ztarget panel or probe setgene_id)rg   descriptive_namerq   c                 S  s   i | ]}|j |jqS r   )r   idr*   Zgener   r   r   
<dictcomp>  s    z.check_targeting_preflights.<locals>.<dictcomp>)Zref_gene_indexgene_name_to_idr   z+Ten or more genes must be specified in the zI file for compatibility with downstream analysis. Number of genes found: re   Zreference_genomez+The reference genome of the transcriptome 'z' and probe set 'z' must be identical.c                 3  s    | ]
}  |d u V  qd S r   )Zgene_id_to_intr*   r   )
gene_indexr   r   r,     s    
z-check_targeting_preflights.<locals>.<genexpr>zLThere are no gene IDs in common between the reference transcriptome and the 
panel_typez"The provided targeted panel type "z>" is UNSUPPORTED in this product and results may be incorrect.c                 S  s   i | ]}|j t |qS r   )r   r   r   r   r   r     s    c                   s   g | ]
}| v r | qS r   r   r   )gene_id_to_indexr   r   
<listcomp>  s
    z.check_targeting_preflights.<locals>.<listcomp>r>   r   )r   r   r   rl   r   r&   rm   rd   check_targeting_methodcr_referenceZNewGeneIndexZload_from_referenceZgenestgt_simple_utilsZparse_target_csv	ExceptionrK   $MINIMUM_TARGET_PANEL_GENES_PREFLIGHTr   r	   r
   r/   r   martianalarmrI   r   r   Zcheck_crispr_target_gener   )r   rc   r   r   r   r^   errr   Ztarget_panel_metadataZtarget_panel_genesr   r   Ztranscriptome_reference_genomeZprobe_set_reference_genomer   Ztarget_set_gene_indicesZtarget_sets_dictZcsv_feature_defser   )r   r   r   check_targeting_preflights  s   






5r   c                 C  s   t | d;}| }|dkrtd|  d|D ]}tt| t  }||	 vr8td|  d| dqW d    d S 1 sDw   Y  d S )Nrr   zThe molecule info file z has no associated libraries.z& is too old and is missing the metric z. Please rerun.)
r   openZget_library_infor   nextiterZget_all_metricsr   valuesrn   )Zmol_info_fnZmetrics_expectedmcZnum_librariesZmetricZlibrary_metricsr   r   r   ,check_molecule_info_contains_library_metrics  s   
"r   c                 C  sh   d}d}t  }| D ](}|dkr|}n||kr-||| ||| td| |d7 }q	dS )zTakes in a list of target sets.r   Nz8Target sets are not the same. Found inconsistent genes: r;   )setupdate
differencer   )target_setscounterZglobal_target_setZinconsistent_genes
target_setr   r   r   check_target_features_same(  s   
r   c                 C  s\   d }|d urt ddd t|dd |r|d urt||}t ddd t| |tj|d d S )Nz#Checking feature definition file...Tr<   zfeature referencery   zChecking sample info...)r]   r^   )rL   r   r   rb   rH   rV   )r3   rc   
full_checkr   r^   r\   r   r   r   check_sample_info;  s   


r   c           	   
   C  sD  | du rdS t |dkrtd| t |dkrtdt| }t|d d| dd z_t|d |}t |dkrJtd| d	|d  d
t|| }|du r_td| d| d|j| krstd| d|j	 d| dt
| |j\}}}}t|d t| || W dS  tjy } ztt||d}~ww )zyDetermines the targeting method specified by a given panel or target file and compares the result to the specified value.Nr;   z'Multiple different target sets found:
	r   z1A target or probe set was expected, but not foundzThe z csvry   r{   z contains no metadata.z(No file format version specified in the z  file header. Please check your z file.zThe header in the z file indicates this is a z file. Please check your )rK   r   r   Zget_file_namer   r   Zload_target_csv_metadataZ-determine_targeting_method_info_from_metadatamethodZfile_format_tagZ.get_target_panel_or_probe_set_file_format_specZfile_versionZcheck_target_csv_metadatar   rm   rd   )	targeting_methodr   Zexpected_nameZtarget_set_metadataZtarget_method_infor   Zrequired_metadataZconflicting_metadatar   r   r   r   r   M  sZ   

	
r   c                 C  s   t ddd |d urt| |d urt ddd t| |d ur*t ddd t| | r5t ddd t  t ddd |rA|d nd }|rI|d nd }|d urW|d urYtd	d S d S )
NzChecking reference...Tr<   zChecking read 1 length...zChecking read 2 length...zChecking system environment...zChecking optional arguments...Zper_gem_wellzECannot specify both --force-cells and --expect-cells in the same run.)rL   ri   r   r   r   )r   rc   	r1_length	r2_lengthrecovered_cellsforce_cellsr   r   r   check_common_preflights  s(   r   )NNF)rc   rd   )rP   rd   rz   rd   )r   r   rc   r   r   r   r   r   r   rd   r^   r   )NF)W
__future__r   rO   rF   systypingr   r   Zcellranger.constants	constantsrH   Zcellranger.envenvr1   Zcellranger.referenceZ	referencer   Zcellranger.rna.feature_refZrnar\   r   Zcellranger.rna.libraryrv   r'   Z cellranger.targeted.simple_utilsZtargetedZsimple_utilsr   Ztenkit.log_subprocessZlog_subprocessr   Ztenkit.preflight	preflightr5   Z
cellrangerr   Zcellranger.fast_utilsr   cellranger.feature_refr   Zcellranger.molecule_counterr   r   Zcellranger.reference_pathsr	   r
   Z&cellranger.targeted.targeted_constantsr   r   r   Z$cellranger.targeted.targeted_spatialr   r-   r   r.   r   ZMULTIPLEXING_LIBRARY_TYPEZCUSTOM_LIBRARY_TYPErZ   rY   rX   r   r   r!   r%   r4   r6   rb   rf   ri   rw   r   r   r   r   r   stdoutr   r   r   r   r   r   r   r   r   r   r   r   r   r   <module>   s   
	
h
 )-
u
8