o
    UݢgO                     @  s   U d dl mZ d dlZd dlmZmZ d dlmZ d dlZd dl	Z
d dlmZ d dlmZ d dlmZ e
e
jjZe
e
jjZeefZd8dd	Zd8d
dZdd Zdd Zdd Zdd Zd9ddZdd Zd:ddZ d;d!d"Z!ej"d#d$eZ#d%e$d&< d8d<d,d-Z%d=d1d2Z&d>d6d7Z'dS )?    )annotationsN)CallableIterable)unescape)
ensure_str)tablesFc                 C  s&   t | }|s|rtd|  d|S )zA wrapper around h5py.is_hdf5, optionally can throw an exception.

    Args:
        filename: The name of the file to test
        throw_exception: Should we raise an error if not?

    Returns:
        bool as to whether the file is valid
    zFile: z is not a valid HDF5 file.)h5pyis_hdf5OSError)filenameZthrow_exceptionvalid r   ^/oak/stanford/groups/akundaje/marinovg/programs/cellranger-9.0.1/lib/python/cellranger/hdf5.pyr	      s   

r	   c                 C  sX   d}|rd}t | |}| D ]\}}|||< qW d    d S 1 s%w   Y  d S )Nwa)r   Fileitems)r   dataappendfilemodefkeyvaluer   r   r   write_h5(   s   
"r   c              	   C  sr   t j| dd(}zt|dtj}W n ty   d }Y n	w W d    |S W d    |S 1 s2w   Y  |S )Nr)mode/)r   	open_filer   Zget_node_attrh5_constantsH5_FILETYPE_KEYAttributeError)r   r   filetyper   r   r   get_h5_filetype1   s   

r"   c                 C  sD   t | d}|j|j||d W d   dS 1 sw   Y  dS )z(Save an array to the root of an h5 file.r   )objN)r   r   Zcreate_carrayroot)r   namearrr   r   r   r   save_array_h5:   s   "r'   c                 C  s@   t | d}t|j| W  d   S 1 sw   Y  dS )z*Load an array from the root of an h5 file.r   N)r   r   getattrr$   read)r   r%   r   r   r   r   load_array_h5@   s   $r*   c                 K  s   |du st |dr t|dkr ttjdf}| j||d dS t|tjr,|jjt	ks<t|t
r:tdd |D s<J dd	 |D }td
d |D }|dkrRd}ttj|f}| j|f||d| dS )a  Create a dataset of strings under an HDF5 (h5py) group.

    Strings are stored as fixed-length 7-bit ASCII with XML-encoding
    for characters outside of 7-bit ASCII. This is inspired by the
    choice made for the Loom spec:
    https://github.com/linnarsson-lab/loompy/blob/master/doc/format/index.rst

    Args:
        group (h5py.Node): Parent group.
        name (str): Dataset name.
        data (list of str): Data to store. Both None and [] are serialized to an empty dataset.
                            Both elements that are empty strings and elements that are None are
                            serialized to empty strings.
        **kwargs: Additional arguments to `create_dataset`.
    N__len__r      dtypec                 s  s&    | ]}|d u pt |ttB V  qd S N)
isinstancebytesstr.0xr   r   r   	<genexpr>\   s   $ z-create_hdf5_string_dataset.<locals>.<genexpr>c                 S  s   g | ]
}|r
t |nd qS )    encode_ascii_xmlr3   r   r   r   
<listcomp>`   s    z.create_hdf5_string_dataset.<locals>.<listcomp>c                 s      | ]}t |V  qd S r/   lenr3   r   r   r   r6   b       )r   r.   )hasattrr=   npr.   bytes_create_datasetr0   ndarraycharSTR_DTYPE_CHARlistallmax)groupr%   r   kwargsr.   	fixed_lenr   r   r   create_hdf5_string_datasetF   s   rL   r5   str | bytesreturnr2   c                 C  s6   t | tr| S t | trt|  S tdt| )z4Decode a string from 7-bit ASCII + XML into unicode.Expected string type, got type )r0   r2   r1   r   decode
ValueErrortyper5   r   r   r   decode_ascii_xmll   s
   

rT   c                 C  sZ   t | tjr| jjtkr| S dd | D }tdd |D }ttj|f}tj||dS )zYDecode an array-like container of strings from 7-bit ASCII + XML.

    into unicode.
    c                 S  s   g | ]}t |qS r   )rT   r3   r   r   r   r:   ~       z*decode_ascii_xml_array.<locals>.<listcomp>c                 s  r;   r/   r<   r4   sr   r   r   r6      r>   z)decode_ascii_xml_array.<locals>.<genexpr>r-   )	r0   r@   rC   r.   rD   UNICODE_DTYPE_CHARrH   str_array)r   Zunicode_datarK   r.   r   r   r   decode_ascii_xml_arrayv   s   r[   c                 C  s6   t | tr| ddS t | tr| S tdt| )zEncode a string as fixed-length 7-bit ASCII with XML-encoding.

    for characters outside of 7-bit ASCII.

    Respect python2 and python3, either unicode or binary.
    asciixmlcharrefreplacerO   )r0   r2   encoder1   rQ   rR   rS   r   r   r   r9      s
   

r9   r   "np.ndarray | Iterable[str | bytes]0np.ndarray[tuple[int, int], np.dtype[np.bytes_]]c                   s|   t | tjr| jjtkr| jjdkr| S dd   fdd| D }tdd |D }td|}ttj|f}tj	||d	S )
zEncode an array-like container of strings as fixed-length 7-bit ASCII.

    with XML-encoding for characters outside of 7-bit ASCII.
    r   c                 S  s   | d urt | S dS )Nr7   r8   )rW   r   r   r   convert   s   z'encode_ascii_xml_array.<locals>.convertc                      g | ]} |qS r   r   r3   ra   r   r   r:      rU   z*encode_ascii_xml_array.<locals>.<listcomp>c                 s  r;   r/   r<   rV   r   r   r   r6      r>   z)encode_ascii_xml_array.<locals>.<genexpr>r,   r-   )
r0   r@   rC   r.   rD   rE   itemsizerH   rA   rZ   )r   Z
ascii_datarK   r.   r   rc   r   encode_ascii_xml_array   s   

re      )maxsizezCallable[[bytes | str], str]_cached_decodedataseth5py.Datasetmemoizebool	list[str]c                   s8   | j du rg S | dd }t |rt  fdd|D S )a  Read a dataset of strings from HDF5 (h5py).

    Args:
        dataset (h5py.Dataset): Data to read.
        memoize (bool): Whether to use memoization to make string conversion more efficient.

    Returns:
        list[unicode]: Strings in the dataset.
    Nc                   rb   r   r   r3   rP   r   r   r:      rU   z,read_hdf5_string_dataset.<locals>.<listcomp>)shaperT   rh   )ri   rk   r   r   rn   r   read_hdf5_string_dataset   s   
rp   r%   r   #str | bytes | Iterable[str | bytes]c                 C  sx   t |}t|ttB rt |}n%t|tjr!|jjtv r!t	|}nt|t
tB r5t|d ttB r5t	|}|| j|< dS )a)  Set an attribute of an HDF5 dataset/group.

    Strings are stored as fixed-length 7-bit ASCII with XML-encoding
    for characters outside of 7-bit ASCII. This is inspired by the
    choice made for the Loom spec:
    https://github.com/linnarsson-lab/loompy/blob/master/doc/format/index.rst
    r   N)r9   r0   r2   r1   r@   rC   r.   rD   STRING_DTYPE_CHARSre   rF   tupleattrs)ri   r%   r   r   r   r   set_hdf5_attr   s   

 ru   outfilefiles_to_combineIterable[str | bytes]c              
   C  s   t |tsJ tt| d0}|D ]$}tt|d}|j|j|jdd W d    n1 s1w   Y  qW d    d S 1 sBw   Y  d S )Nr   r   T)	recursive)r0   rF   r   r   r   Zcopy_childrenr$   )rv   rw   outfnamer   r   r   r   combine_h5s_into_one   s   "r|   )F)r5   rM   rN   r2   )r5   rM   )r   r_   rN   r`   )ri   rj   rk   rl   rN   rm   )r%   rM   r   rq   )rv   rM   rw   rx   )(
__future__r   	functoolscollections.abcr   r   Zhtmlr   r   numpyr@   sixr   cellranger.h5_constantsr   Zcellranger.wrapped_tablesr   r.   rA   rD   rE   rY   rX   rr   r	   r   r"   r'   r*   rL   rT   r[   r9   re   	lru_cacherh   __annotations__rp   ru   r|   r   r   r   r   <module>   s4   

		
&



