o
    Uݢg                     @   s   d dl Z d dlZd dlZd dlZd dlZd dlmZm	Z	 d dl
m  mZ d dlmZ dd Zdd Zdd Zd	d
 Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd ZG dd dZG dd dZdS )    N)ensure_binary
ensure_strc                 C   s
   |  dS )N_)split)Zgene_pair_str r   c/oak/stanford/groups/akundaje/marinovg/programs/cellranger-9.0.1/lib/python/cellranger/vdj/utils.pyget_genes_in_pair   s   
r   c                 C   s$   t t| }|jddddd |S )N	contig_idFT)dropinplaceverify_integrity)pdread_csvr   	set_index)filenamedfr   r   r   load_contig_summary_table   s   r   c                 C   s   t | dd S )N   _r   )r   r   )contig_namer   r   r   get_barcode_from_contig_name      r   c                 C   s&   d| j v r| d | S tjd dS )zjReturn true if a contig passed filter.

    contig_df (pd.DataFrame): Contains contig ids and counts.
    Zpass_filterzKChecked if contig passes filter but contig summary did not have the column
T)columnssysstderrwrite)Z	contig_dfr	   r   r   r   is_contig_filtered"   s   
r   c                 C   s   | j |dd d S )NF)index)to_csv)
data_framer   r   r   r   save_contig_summary_table1   r   r   c                 C   s6   t | }t|W  d    S 1 sw   Y  d S N)openjsonload)r   fr   r   r   load_cell_barcodes_json5   s   
$r%   c                 C   s   | durt | | dS dS )z2Write a pandas dataframe to CSV if it is not None.N)	write_csv)r   r   r   r   r   write_csv_optional:   s   r'   c                 C   sR   |  tgD ]}| | jd rtd| d| dq| j|dddd dS )	z2Write a pandas dataframe to CSV in a standard way.,zFailed write to z	: Column z contains commasTF)headerr   sepN)select_dtypesobjectstrcontainsany
ValueErrorr   )r   r   colr   r   r   r&   @   s
   r&   c                 C   sN   dd | D } t | D ]\}}d|v rtd| q|dd|  dS )z)Write a standard CSV row to an open file.c                 S   s   g | ]}t |qS r   )r-   ).0xr   r   r   
<listcomp>M   s    z!write_csv_row.<locals>.<listcomp>r(   z3Failed write to csv file: Column %d contains commasz{}
N)	enumerater0   r   formatjoin)rowr$   ivr   r   r   write_csv_rowJ   s   r;   c                 C   s(   | du rdS |r
dnd}d|d|  f S )zJTakes a 0-based clonotype index and formats it into a clonotype id string.NZinferred_clonotype	clonotypez%s%d   r   )Zclonotype_indexinferredprefixr   r   r   format_clonotype_idT   s   r@   c                 C   s    t tjttj|  d S )zHEstimate mem request for loading an entire annotations json into memory.g    eA)npceilvdj_constantsMEM_GB_PER_ANNOTATIONS_JSON_GBfloatospathgetsize)r   r   r   r    get_mem_gb_from_annotations_json\   s   rI   c                 c   s   d}d}d}d}d}	 |  |}t|dkrdS |D ]e}|dkr*|d dkr*| }|d	kr3|d
7 }nd}|s`| r<q|dkrG|dkrGd
}n|d
krR|dkrRd}n|dv r`||dkr]d
nd7 }|dksjt|dkrn||7 }|dkrt|dkrt|V  d}qq)z?Generator that streams items from a list of dicts [{}, {},...].r    Fi   TN"   \r=   {})rN   rO   )readlenisspacer"   loads)r$   Zbrace_countr3   Zin_strZbackslash_countbufsizebufcr   r   r   get_json_obj_iterc   s@   

rX   c                   @   s(   e Zd ZdZdd Zdd Zdd ZdS )	JsonDictListWriterz*Streams a list of dicts as json to a file.c                 C   s   d| _ |d dS )zFh - file handle.Fz[
N)	wrote_anyr   selffhr   r   r   __init__   s   zJsonDictListWriter.__init__c                 C   s&   | j r|d t|| d| _ dS )zWrite a dict.z
,TN)rZ   r   r"   dump)r\   r3   r]   r   r   r   r      s   

zJsonDictListWriter.writec                 C   s   | d dS )zFinish writing all dicts.z
]N)r   r[   r   r   r   finish   s   zJsonDictListWriter.finishN)__name__
__module____qualname____doc__r^   r   r`   r   r   r   r   rY      s
    rY   c                   @   sD   e Zd ZdZdee fddZdd Zdd Zd	e	d
e
fddZdS )CachedJsonDictListWriterszStream multiple json DictLists.

    Using a FileHandleCache to limit the number of open file handles.
    This is useful when you're writing to many files from the same process.
    	filenamesc                    s,   | _ tjdd _ fdd|D  _dS )z)Filenames: list of filenames to write to.w)modec                    s   g | ]
}t  j|qS r   )rY   cacheget)r2   fnr\   r   r   r4      s    z6CachedJsonDictListWriters.__init__.<locals>.<listcomp>N)rf   tk_cacheZFileHandleCacheri   writers)r\   rf   r   rl   r   r^      s   z"CachedJsonDictListWriters.__init__c                 C   s   | S r    r   rl   r   r   r   	__enter__   s   z#CachedJsonDictListWriters.__enter__c                 C   s.   t | j| jD ]\}}|| j| qd S r    )ziprn   rf   r`   ri   rj   )r\   Ze_typeZe_valZe_tbwriterr   r   r   r   __exit__   s   z"CachedJsonDictListWriters.__exit__dfile_idxc                 C   s$   | j | || j| j|  dS )zWrite content.

        Args:
            d: data to write.
            file_idx: index of filename/writer in original given list
        N)rn   r   ri   rj   rf   )r\   rs   rt   r   r   r   r      s   $zCachedJsonDictListWriters.writeN)ra   rb   rc   rd   listr-   r^   ro   rr   dictintr   r   r   r   r   re      s    re   )r"   rF   r   numpyrA   pandasr   sixr   r   cellranger.vdj.constantsvdj	constantsrC   Ztenkit.cacheri   rm   r   r   r   r   r   r%   r'   r&   r;   r@   rI   rX   rY   re   r   r   r   r   <module>   s,   

*