B
    \s                 @   sX  d Z ddlmZ ddlZddlZddlmZ ddlm	Z	 ddl
Z
ddlmZ ddlZddlmZmZmZmZmZmZmZ d	d
lmZmZmZmZmZ d	dlmZ d	dlmZ d	dlmZ  d	dl!m"Z" d	dl#m$Z$ d	dl%m&Z&m'Z' ddl(m)Z) e*e+dZ,e- j.Z/dd Z0dddZ1d ddZ2G dd de3Z4e/rDG dd de3Z5G dd deZ6dS )!z7
    Implements support for high-level dataset access.
    )absolute_importN)warn)local)xrange   )h5h5sh5th5rh5dh5ph5fd   )HLObjectphil	with_philEmptyis_empty_dataspace)filters)
selections)selections2)Datatype)filename_decode)VDSmapvds_support)H5pyDeprecationWarning
   c                s^   t |dkr S  jdkr"tdx"|D ]}| jkr(td| q(W t fdd|D S )z, Make a NumPy dtype appropriate for reading r   Nz+Field names only allowed for compound typesz&Field %s does not appear in this type.c                s   g | ]}| j | d  fqS )r   )fields).0name)basetype /lib/python3.7/site-packages/h5py/_hl/dataset.py
<listcomp>6   s    z"readtime_dtype.<locals>.<listcomp>)lennames
ValueErrornumpydtype)r    r%   r   r!   )r    r"   readtime_dtype)   s    


r)   c             C   s4  |dk	rpt |tspddlm} |dk	r2t|}nd}|dk	oP|jdkoP|jdk}tj|d|rb|n|	|d}|dkr|dkr|dkrt
dt|}|j}n:t|}|dk	rtj|tjd	tj|jtjd	krtd
|dk	r|n|}t |tr$tdd t||D r$d||}t|t |tr>|j}|j}nT|dkr^|dkr^td}n&|dkrz|dk	rz|j}n
t|}tj|dd}t|||||fr|dkrtd|dkr|	dkrd}	d}|tkr|	dk	rt
d|}	d}t|pttj|||||	|||||}|
dk	rBt|
}
||
 |dkrX|| n|dk	rjt
d|dkr| tj!tj"B  n(|dkr| d n|dk	rt
d|dk	rtdd |D }t |trt#t#j$}nt#%||}t&j| jd|||d}|dk	r0t |ts0|'t#j(t#j(| |S )zU Return a new low-level dataset identifier

    Only creates anonymous datasets.
    Nr   )basefr   C)orderr(   z-One of data, shape or dtype must be specified)r(   z%Shape tuple is incompatible with datac             s   s"   | ]\}}|d k	r||kV  qd S )Nr!   )r   Zdimchunkr!   r!   r"   	<genexpr>d   s    z make_new_dset.<locals>.<genexpr>z^Chunk shape must not be greater than data shape in any dimension. {} is not compatible with {}z=f4)logicalFz1Chunked format required for given storage optionsT   gzipzConflict in compression options)TFz(track_times must be either True or Falser   z(track_order must be either True or Falsec             s   s    | ]}|d k	r|nt jV  qd S )N)r   	UNLIMITED)r   mr!   r!   r"   r/      s    )dcpl))
isinstancer    r*   r'   r(   kinditemsizeasarrayZguess_dtype	TypeErrorshapetupleproduct	ulonglongr&   anyzipformatr   idr	   	py_create_LEGACY_GZIP_COMPRESSION_VALSr   Z	fill_dcplr   createDATASET_CREATEarrayset_fill_valueZset_obj_track_timesZset_attr_creation_orderZCRT_ORDER_TRACKEDZCRT_ORDER_INDEXEDr   ZNULLcreate_simpler   writeZALL)parentr<   r(   datachunkscompressionshuffle
fletcher32maxshapecompression_opts	fillvaluescaleoffsetZtrack_timesexternalZtrack_orderr5   r*   _dtypeZis_small_floatZ	tmp_shapeerrmsgtidsidZdset_idr!   r!   r"   make_new_dset9   s    

*















r[   c             C   s   t t j}|dk	r&|t|g |dk	r@tdd |D }t||}x$|D ]\}}	}
}|	||	|
| qRW |}t
|tr|j}n,|dkrtd}n
t|}tj|dd}tj| jd|||dS )zReturn a new low-level dataset identifier for a virtual dataset

    Like make_new_dset(), this creates an anonymous dataset, which can be given
    a name later.
    Nc             s   s    | ]}|d k	r|nt jV  qd S )N)r   r3   )r   r4   r!   r!   r"   r/      s    z(make_new_virtual_dset.<locals>.<genexpr>z=f4r   )r0   )r   rY   spacer5   )r   rF   rG   rI   r'   rH   r=   r   rJ   Zset_virtualr6   r   rC   r(   r	   rD   r   )rL   r<   Zsourcesr(   rR   rT   r5   Zvirt_dspaceZvspaceZfpathdsetZ
src_dspacerY   r!   r!   r"   make_new_virtual_dset   s"    

r^   c               @   s(   e Zd ZdZdd Zdd Zdd ZdS )	AstypeContextzQ
        Context manager which allows changing the type read from a dataset.
    c             C   s   || _ t|| _d S )N)_dsetr'   r(   rW   )selfr]   r(   r!   r!   r"   __init__   s    zAstypeContext.__init__c             C   s   | j | jj_d S )N)rW   r`   _localastype)ra   r!   r!   r"   	__enter__   s    zAstypeContext.__enter__c             G   s   d | j j_d S )N)r`   rc   rd   )ra   argsr!   r!   r"   __exit__   s    zAstypeContext.__exit__N)__name__
__module____qualname____doc__rb   re   rg   r!   r!   r!   r"   r_      s   r_   c               @   s(   e Zd ZdZdd Zdd Zdd ZdS )	CollectiveContextz$ Manages collective I/O in MPI mode c             C   s
   || _ d S )N)r`   )ra   r]   r!   r!   r"   rb      s    zCollectiveContext.__init__c             C   s   | j jtj d S )N)r`   _dxplset_dxpl_mpior   ZMPIO_COLLECTIVE)ra   r!   r!   r"   re      s    zCollectiveContext.__enter__c             G   s   | j jtj d S )N)r`   rm   rn   r   ZMPIO_INDEPENDENT)ra   rf   r!   r!   r"   rg      s    zCollectiveContext.__exit__N)rh   ri   rj   rk   rb   re   rg   r!   r!   r!   r"   rl      s   rl   c               @   s  e Zd ZdZdd Zer(eedd Zedd Z	eedd	 Z
eed
d Zejedd Zeedd Zeedd Zeedd Zeedd Zeedd Zeedd Zeedd Zeedd Zeedd Zedd  Zeed!d" Zeed#d$ Zed%d& ZdFd(d)Zed*d+ Zd,d- Zed.d/ Zed0d1 Zed2d3 ZdGd4d5Z dHd6d7Z!edId8d9Z"ed:d; Z#e$e%j&d<red=d> Z'e$e%j&d?red@dA Z(e)redBdC Z*dDdE Z+d'S )JDatasetz(
        Represents an HDF5 dataset
    c             C   s
   t | |S )z Get a context manager allowing you to perform reads to a
        different destination type, e.g.:

        >>> with dataset.astype('f8'):
        ...     double_precision = dataset[0:100:2]
        )r_   )ra   r(   r!   r!   r"   rd      s    zDataset.astypec             C   s   t | S )z3 Context manager for MPI collective reads & writes )rl   )ra   r!   r!   r"   
collective  s    zDataset.collectivec          	   C   s$   ddl m} t
 || S Q R X dS )z3 Access dimension scales attached to this dataset. r   )DimensionManagerN)dimsrq   r   )ra   rq   r!   r!   r"   rr     s    zDataset.dimsc             C   s   | j jS )z5Numpy-style attribute giving the number of dimensions)rC   rank)ra   r!   r!   r"   ndim  s    zDataset.ndimc             C   s   | j jS )z1Numpy-style shape tuple giving dataset dimensions)rC   r<   )ra   r!   r!   r"   r<     s    zDataset.shapec             C   s   |  | d S )N)resize)ra   r<   r!   r!   r"   r<      s    c             C   s    t | jrdS tj| jtjdS )z3Numpy-style attribute giving the total dataset sizeN)r(   )r   rC   r'   Zprodr<   Zintp)ra   r!   r!   r"   size&  s    
zDataset.sizec             C   s   | j jS )z%Numpy dtype representing the datatype)rC   r(   )ra   r!   r!   r"   r(   .  s    zDataset.dtypec             C   s   t dt | d S )z  Alias for dataset[()] z;dataset.value has been deprecated. Use dataset[()] instead.r!   )r   r   )ra   r!   r!   r"   value4  s    zDataset.valuec             C   s    | j }| tjkr| S dS )zDataset chunks (or None)N)_dcpl
get_layoutr   ZCHUNKEDZ	get_chunk)ra   r5   r!   r!   r"   rN   <  s    zDataset.chunksc             C   s    xdD ]}|| j kr|S qW dS )zCompression strategy (or None))r2   ZlzfZszipN)_filters)ra   xr!   r!   r"   rO   E  s    

zDataset.compressionc             C   s   | j | jdS )z< Compression setting.  Int(0-9) for gzip, 2-tuple for szip. N)rz   getrO   )ra   r!   r!   r"   rS   N  s    zDataset.compression_optsc             C   s
   d| j kS )zShuffle filter present (T/F)rP   )rz   )ra   r!   r!   r"   rP   T  s    zDataset.shufflec             C   s
   d| j kS )z"Fletcher32 filter is present (T/F)rQ   )rz   )ra   r!   r!   r"   rQ   Z  s    zDataset.fletcher32c             C   s(   y| j d d S  tk
r"   dS X dS )a  Scale/offset filter settings. For integer data types, this is
        the number of bits stored, or 0 for auto-detected. For floating
        point data types, this is the number of decimal places retained.
        If the scale/offset filter is not in use, this is None.rU   r   N)rz   KeyError)ra   r!   r!   r"   rU   `  s    zDataset.scaleoffsetc             C   sX   | j  }|dkrdS t }x6t|D ]*}| j |\}}}|t|||f q&W |S )zExternal file settings. Returns a list of tuples of
        (name, offset, size) for each external file entry, or returns None
        if no external files are used.r   N)rx   Zget_external_countlistr   Zget_externalappendr   )ra   countZext_listr{   r   offsetrv   r!   r!   r"   rV   l  s    
zDataset.externalc             C   s&   | j  }|d}tdd |D S )zcShape up to which this dataset can be resized.  Axes with value
        None have no resize limit. Tc             s   s    | ]}|t jkr|nd V  qd S )N)r   r3   )r   r{   r!   r!   r"   r/     s    z#Dataset.maxshape.<locals>.<genexpr>)rC   	get_spaceZget_simple_extent_dimsr=   )ra   r\   rr   r!   r!   r"   rR   z  s    

zDataset.maxshapec             C   s$   t jd| jd}| j| |d S )z*Fill value for this dataset (0 by default))r   )r(   r   )r'   ndarrayr(   rx   Zget_fill_value)ra   arrr!   r!   r"   rT     s    zDataset.fillvaluec             C   s`   t |tjstd| t| | | j | _t	
t	j| _t| j| _t | _d| j_dS )zJ Create a new Dataset object by binding to a low-level DatasetID.
        z%s is not a DatasetIDN)r6   r   	DatasetIDr&   r   rb   rC   Zget_create_plistrx   r   rF   ZDATASET_XFERrm   r   Zget_filtersrz   r   rc   rd   )ra   Zbindr!   r!   r"   rb     s    zDataset.__init__Nc          	   C   s   t  | jdkrtd|dk	r|dkr4|| jjk sHtd| jjd  yt|}W n tk
rp   tdY nX t| j}|||< t	|}| j
| W dQ R X dS )aE   Resize the dataset, or the specified axis.

        The dataset must be stored in chunked format; it can be resized up to
        the "maximum shape" (keyword maxshape) specified at creation time.
        The rank of the dataset cannot be changed.

        "Size" should be a shape tuple, or if an axis is specified, an integer.

        BEWARE: This functions differently than the NumPy resize() method!
        The data is not "reshuffled" to fit in the new shape; each axis is
        grown or shrunk independently.  The coordinates of existing data are
        fixed.
        Nz$Only chunked datasets can be resizedr   zInvalid axis (0 to %s allowed)r   z2Argument must be a single int if axis is specified)r   rN   r;   rC   rs   r&   intr~   r<   r=   Z
set_extent)ra   rv   ZaxisZnewlenr!   r!   r"   ru     s    

zDataset.resizec             C   s   |   }|tjkrtd|S )z The size of the first axis.  TypeError if scalar.

        Limited to 2**32 on 32-bit systems; Dataset.len() is preferred.
        z>Value too big for Python's __len__; use Dataset.len() instead.)r$   sysmaxsizeOverflowError)ra   rv   r!   r!   r"   __len__  s    
zDataset.__len__c          	   C   s2   t $ | j}t|dkr td|d S Q R X dS )z The size of the first axis.  TypeError if scalar.

        Use of this method is preferred to len(dset), as Python's built-in
        len() cannot handle values greater then 2**32 on 32-bit systems.
        r   z'Attempt to take len() of scalar datasetN)r   r<   r$   r;   )ra   r<   r!   r!   r"   r$     s
    zDataset.lenc             c   s>   | j }t|dkrtdxt|d D ]}| | V  q(W dS )z Iterate over the first axis.  TypeError if scalar.

        BEWARE: Modifications to the yielded data are *NOT* written to file.
        r   z#Can't iterate over a scalar datasetN)r<   r$   r;   r   )ra   r<   ir!   r!   r"   __iter__  s
    zDataset.__iter__c             C   sF  t |tr|n|f}t| jrD|t ks:|tfks:tdt| jS tdd |D }tdd |D }tj	rtdd |D }t
| jdd}|dk	rt||}nt| jj|}t|}t|dkrt |d	 tjrt|d	 | j}|| jkrtd
t|d	 | j}t|}|dkr2tjd|dS tj|tjdd	krVtj||dS tj||d}t|}	|	  | j|	||| |S tj| jtjdd	kr|tfks|t krtj| j|dS | jdkrV| j  }
t!"|
|}tj#|j$|d}x$|D ]\}}
| j||
|| qW t|dkr>||d	  }|j$dkrR|d S |S tj%| j|| jd}|j&d	krtj#|j$|dS |j$dk}|rdn|j$}tj#||dd}t|t| jk rdt| jt|  | }t|}|j}
| jj||
||| j'd t|dkr ||d	  }|jdkr4|( }|rB|d	 }|S )a   Read a slice from the HDF5 dataset.

        Takes slices and recarray-style field names (more than one is
        allowed!) in any order.  Obeys basic NumPy rules, including
        broadcasting.

        Also supports:

        * Boolean "mask" array indexing
        zEmpty datasets cannot be slicedc             s   s   | ]}t |tjr|V  qd S )N)r6   sixstring_types)r   r{   r!   r!   r"   r/     s    z&Dataset.__getitem__.<locals>.<genexpr>c             s   s   | ]}t |tjs|V  qd S )N)r6   r   r   )r   r{   r!   r!   r"   r/     s    c             s   s(   | ] }t |tjr|d n|V  qdS )zutf-8N)r6   r   	text_typeencode)r   r{   r!   r!   r"   r/     s    rd   Nr   r   z+Region reference must point to this dataset)r   )r(   r!   )dsid)r   r,   )r-   )dxpl))r6   r=   r   rC   Ellipsisr&   r   r(   r   PY2getattrrc   r)   r	   rD   r$   r
   ZRegionReferenceZdereferenceZ
get_regionselZguess_shaper'   rH   r>   r?   emptyr   rJ   Z
select_allreadr<   r   sel2Zselect_readr   mshapeselectnselectrm   item)ra   rf   r%   Z	new_dtypemtypeobjrZ   r   outZsid_outfspace	selectionr   mspaceZsingle_elementr!   r!   r"   __getitem__  sz    


 






zDataset.__getitem__c                s(  t |tr|n|f}tdd |D  tdd |D }tjrPtdd  D  tjjddk	rjttjfkrjyt	j
|d}W nL tk
r   y"t	jfdd	|D jd}W n tk
r   Y nX Y nX |jkrx|jd
krJt	j|jdd td}dd	 |t	j|jdd t	jd|jd fD | dd< nt	jdgtd}||d< |}njjdksjjdkrjt |t	jr|jjdkrjjjdkrjt d
krjjdk	r d jjkrtd d  jj d  d }d}n
j}d}t	j
||jdd}|rx|t	 d |fg}||jdt|jt|j  }nt	j
|dd}jjdk	rjjd
 }|jt| d }||krtd||f tt	|j|f}|jdt|jt|  }	n<t dkr(|j}	jjdkr tdfdd	 D }
t|
dkr`ddd |
D }
td|
 t d
kr|jjdkrt|j}ttj |! }|"# d d| nr fdd	|jj$D }ttj |jj%}xT|D ]@}t|jj| d }|jj| d
 }|"#||| qW n
|j}	d}t&j'j|j(d}|j)dkrVdS |	dkr|j*dkrjjdk	rtd t	j|j*d |jd}||d!< |}|j}	t|	tjk rd"tjt|	  |	 }n|	}t+,|t+j-ft| }x,|.|	D ]}j(j/||||j0d# qW dS )$z Write to the HDF5 dataset from a Numpy array.

        NumPy's broadcasting rules are honored, for "simple" indexing
        (slices and integers).  For advanced indexing, the shapes must
        match.
        c             s   s   | ]}t |tjr|V  qd S )N)r6   r   r   )r   r{   r!   r!   r"   r/   S  s    z&Dataset.__setitem__.<locals>.<genexpr>c             s   s   | ]}t |tjs|V  qd S )N)r6   r   r   )r   r{   r!   r!   r"   r/   T  s    c             s   s(   | ] }t |tjr|d n|V  qdS )zutf-8N)r6   r   r   r   )r   r{   r!   r!   r"   r/   V  s    )vlenN)r(   c                s   g | ]}t j| d qS ))r(   )r'   rH   )r   r{   )r   r!   r"   r#   `  s   z'Dataset.__setitem__.<locals>.<listcomp>r   )r<   r(   c             S   s   g | ]}|qS r!   r!   )r   r   r!   r!   r"   r#   g  s    r   OVzNo such field for indexing: %sTFr,   )r(   r-   )r-   zWWhen writing to array types, last N dimensions have to match (got %s, but should be %s)z1Illegal slicing argument (not a compound dataset)c                s   g | ]}| j jkr|qS r!   )r(   r   )r   r{   )ra   r!   r"   r#     s    z, c             s   s   | ]}d | V  qdS )z"%s"Nr!   )r   r{   r!   r!   r"   r/     s    z8Illegal slicing argument (fields %s not in dataset type)c                s   g | ]}| kr|qS r!   r!   )r   r{   )r%   r!   r"   r#     s    )r   r!   z5Scalar broadcasting is not supported for array dtypes.)r   )r   )1r6   r=   r   r   r	   Zcheck_dtyper(   bytesr   r'   r:   r&   rH   rt   r   r<   objectZreshaper>   r?   Zravelr8   r   Zsubdtyper$   r   r*   Zviewr;   rD   joinrF   ZCOMPOUNDZget_sizeinsertZ_er%   r9   r   r   rC   r   r   r   rJ   r3   	broadcastrK   rm   )ra   rf   valZtmpr(   Zcast_compoundZshpZvalshpr   r   ZmismatchZsubtypeZ
fieldnamesZ	fieldnamer   r   Zval2Z
mshape_padr   r   r!   )r%   ra   r   r"   __setitem__H  s    
8&
 
zDataset.__setitem__c          	   C   s   t  t| jrtd|dkr.t| j}nt| j|| j}|j}|dkr\t|j}nt|j|| j}x*||j	D ]}| jj
|||| jd q|W W dQ R X dS )z Read data directly from HDF5 into an existing NumPy array.

        The destination array must be C-contiguous and writable.
        Selections must be the output of numpy.s_[<args>].

        Broadcasting is supported for simple indexing.
        z+Empty datasets have no numpy representationN)r   )r   r   rC   r;   r   SimpleSelectionr<   r   r   r   r   rm   )ra   dest
source_seldest_selr   r   r!   r!   r"   read_direct  s    
zDataset.read_directc          	   C   s   t  t| jrtd|dkr.t|j}nt|j|| j}|j}|dkr\t| j}nt| j|| j}x*||j	D ]}| jj
|||| jd q|W W dQ R X dS )z Write data directly to HDF5 from a NumPy array.

        The source array must be C-contiguous.  Selections must be
        the output of numpy.s_[<args>].

        Broadcasting is supported for simple indexing.
        z#Empty datasets cannot be written toN)r   )r   r   rC   r;   r   r   r<   r   r   r   rK   rm   )ra   sourcer   r   r   r   r!   r!   r"   write_direct  s    
zDataset.write_directc             C   sF   t j| j|dkr| jn|d}t j| jt jddkr8|S | | |S )z Create a Numpy array containing the whole dataset.  DON'T THINK
        THIS MEANS DATASETS ARE INTERCHANGEABLE WITH ARRAYS.  For one thing,
        you have to read the whole dataset every time this method is called.
        N)r(   r   )r'   r   r<   r(   r>   r?   r   )ra   r(   r   r!   r!   r"   	__array__  s
    
zDataset.__array__c             C   sh   | s
d}nJ| j d krd}n&tt| j }d|dkr:|nd }d|| j| jjf }tjrd|	dS |S )Nz<Closed HDF5 dataset>z("anonymous")z"%s"r7   /z&<HDF5 dataset %s: shape %s, type "%s">utf8)
r   ppbasenamenormpathr<   r(   strr   r   r   )ra   rZnamestrr   r!   r!   r"   __repr__  s    

zDataset.__repr__refreshc             C   s   | j   dS )z Refresh the dataset metadata by reloading from the file.

            This is part of the SWMR features and only exist when the HDF5
            library version >=1.9.178
            N)_idr   )ra   r!   r!   r"   r     s    zDataset.refreshflushc             C   s   | j   dS )a    Flush the dataset data and metadata to the file.
            If the dataset is chunked, raw data chunks are written to the file.

            This is part of the SWMR features and only exist when the HDF5
            library version >=1.9.178
            N)r   r   )ra   r!   r!   r"   r   #  s    zDataset.flushc             C   s   | j  tjkS )N)rx   ry   r   ZVIRTUAL)ra   r!   r!   r"   
is_virtual.  s    zDataset.is_virtualc                s.   | j std| j  fddt  D S )NzNot a virtual datasetc          	      s2   g | ]*}t  | | | |qS r!   )r   Zget_virtual_vspaceZget_virtual_filenameZget_virtual_dsetnameZget_virtual_srcspace)r   j)r5   r!   r"   r#   7  s   z+Dataset.virtual_sources.<locals>.<listcomp>)r   RuntimeErrorrx   rangeZget_virtual_count)ra   r!   )r5   r"   virtual_sources2  s
    
zDataset.virtual_sources)N)NN)NN)N),rh   ri   rj   rk   rd   MPIpropertyr   rp   rr   rt   r<   setterrv   r(   rw   rN   rO   rS   rP   rQ   rU   rV   rR   rT   rb   ru   r   r$   r   r   r   r   r   r   r   hasattrr   r   r   r   r   r   r   r!   r!   r!   r"   ro      sl   	
 l~

	
ro   )NNNNNNNNNNNNNNN)NNN)7rk   Z
__future__r   	posixpathr   r   warningsr   Z	threadingr   r   Z	six.movesr   r'   r7   r   r   r	   r
   r   r   r   r*   r   r   r   r   r   r   r   r   r   r   Zdatatyper   compatr   Zvdsr   r   Zh5py_warningsr   	frozensetr   rE   Z
get_configZmpir   r)   r[   r^   r   r_   rl   ro   r!   r!   r!   r"   <module>   s>   $
    
r 
"