o
    Uݢg                     @  s   d Z ddlmZ ddlZddlZddlmZ ddlmZ ddl	Z
ddlmZ ddlmZ ddlmZ er:ddlmZ d+ddZd,ddZd,ddZ	d-d.ddZd/ddZd0ddZd1ddZd2d%d&Zd3d)d*ZdS )4zAssorted grab-bag of miscellaneous helper methods.

Do not add to this module.  Instead, find or create a module with a name
that indicates to a potential user what sorts of methods they might find
in that module.
    )annotationsN)Iterable)TYPE_CHECKING)load_target_csv_metadata)AlignmentFilereference_pathstrreturn"dict[str, dict | list | str | int]c                 C  sF   t j| tj}t|}t|W  d    S 1 sw   Y  d S N)ospathjoincr_constantsREFERENCE_METADATA_FILEopenjsonload)r   Zreference_metadata_filef r   i/oak/stanford/groups/akundaje/marinovg/programs/cellranger-9.0.1/lib/python/cellranger/reference_paths.py_load_reference_metadata_file   s   
$r   c                 C     t j| tjS r   )r   r   r   r   REFERENCE_STAR_PATHr   r   r   r   get_reference_star_path&      r   c                 C  r   r   )r   r   r   r   REFERENCE_FASTA_PATHr   r   r   r   get_reference_genome_fasta*   r   r   
str | Nonetarget_set_path	list[str]c                 C  s4   | durt | tj S |durt|dd gS dgS )zUReturn the genome names from the reference transcriptome, or target set, or ["NONE"].Nz	probe setZreference_genomeNONE)r   r   REFERENCE_GENOMES_KEYr   )r   r    r   r   r   get_reference_genomes.   s
   r$   boolc                 C  s   t | }|dddS )Nmkref_version zcellranger-arc)r   get
startswithr   datar   r   r   is_arc_reference:   s   r,   intc                 C  s   t | }|tj S r   )r   r   REFERENCE_MEM_GB_KEYr*   r   r   r   get_reference_mem_gb_request?   s   
r/   floatc              
   C  sB   t | }ttj|d }tttj	t
jtddt|  S )Ng    eA      )r   r0   r   r   getsizenpceilmaxh5_constants
MIN_MEM_GBr   BAM_CHUNK_SIZE_GBr-   )r   Zin_fasta_fnZgenome_size_gbr   r   r   $get_mem_gb_request_from_genome_fastaD   s   r:   	input_bamr   nchunksinclude_unmapped list[list[tuple[str, int, int]]]c                 C  s8  |t |8 }|dksJ | j}| j}t|}t tt|| }dd t||D }g }|r3dgg}|g  d}	t	|r|
 }
|
d |
d  }|	| |k r\|d |
 |	|7 }	n5|	| | }|
d |
d |
d | f}|d | |g  d}	|dkr||
d |
d | |
d f t	|s>dd |D S )	a$  Chunk up a reference into nchunks roughly equally sized chunks.

    Args:
        input_bam (pysam.AlignmentFile): Source for reference contig names and lengths.
        nchunks (int): The number of chunks to create.
        include_unmapped (bool): If `True` then one of the chunks consists
                                 of all the unmapped reads as specified by
                                 `[("*", None, None)]`.

    Returns:
        list: loci, where each loci is a list of contiguous regions
              `(contig, start, end)`.
    r1   c                 S  s   g | ]	\}}|d |fqS )r   r   ).0chromendr   r   r   
<listcomp>g   s    z#chunk_reference.<locals>.<listcomp>)*r   r   r   r2   c                 S  s   g | ]}t |r|qS r   )len)r?   cr   r   r   rB   }   s    )r-   
referenceslengthssumr4   r5   r0   zipappendrE   pop)r;   r<   r=   chromsZchrom_lengthsZgenome_size
chunk_sizeprocesschunksZcurrent_chunk_sizeZpieceZ
piece_sizeZpiece_left_overZ	new_piecer   r   r   chunk_referenceO   s6   


 rQ   genomesIterable[str]c                 C  s
   d | S )NZ_and_)r   )rR   r   r   r   get_ref_name_from_genomes   s   
rT   )r   r   r	   r
   )r   r   r	   r   r   )r   r   r    r   r	   r!   )r   r   r	   r%   )r   r   r	   r-   )r   r   r	   r0   )r;   r   r<   r-   r=   r%   r	   r>   )rR   rS   )__doc__
__future__r   r   r   collections.abcr   typingr   numpyr4   cellranger.constants	constantsr   cellranger.h5_constantsr7   Z cellranger.targeted.simple_utilsr   Zpysamr   r   r   r   r$   r,   r/   r:   rQ   rT   r   r   r   r   <module>   s,   






1