B
    &]\bM                 @   s  d dl mZmZmZ d dlZd dlZd dlZd dlmZ d dl	Z
d dlmZ ddddgZed	Zed
ZedZedZedZedZedZedZedZG dd deZG dd deZdd Zdd Zdd Zdd Zdd Zdd Z d d! Z!d"d# Z"d$d% Z#d&d' Z$d(d) Z%d*d+ Z&d,d- Z'd.d/ Z(d0d1 Z)G d2d de*Z+d3d Z,d4d5 Z-d6d7 Z.d8d9 Z/d:d; Z0d<e0_1e2d=krd dl3Z3e3j4d> Z5e0e5 dS )?    )divisionprint_functionabsolute_importN)partial)nextMetaDataloadarff	ArffErrorParseArffErrorz^\s*@z^%z^\s+$z^@\S*z^@[Dd][Aa][Tt][Aa]z*^@[Rr][Ee][Ll][Aa][Tt][Ii][Oo][Nn]\s*(\S*)z/^@[Aa][Tt][Tt][Rr][Ii][Bb][Uu][Tt][Ee]\s*(..*$)z'(..+)'\s+(..+$)z(\S+)\s+(..+$)c               @   s   e Zd ZdS )r	   N)__name__
__module____qualname__ r   r   5lib/python3.7/site-packages/scipy/io/arff/arffread.pyr	   6   s   c               @   s   e Zd ZdS )r
   N)r   r   r   r   r   r   r   r
   :   s   c             C   s   |    }|d dkrdS |dtd dkr4dS |dtd dkrLdS |dtd dkrddS |dtd dkr|dS |dtd	 d	krd	S |dtd
 d
krd
S td| dS )z`Given an arff attribute value (meta data), returns its type.

    Expect the value to be a name.r   {nominalNrealnumericintegerstring
relationaldatezunknown attribute %s)lowerstriplenr
   )ZattrtypeZ
uattributer   r   r   
parse_typeD   s     r   c             C   s
   |  dS )z5If attribute is nominal, returns a list of the values,)split)	attributer   r   r   get_nominal[   s    r   c             C   s>   t | g}|d  d dkr&td|dd | D  |S )z4Read each line of the iterable and put it in a list.r   r   z0This looks like a sparse ARFF: not supported yetc             S   s   g | ]}|qS r   r   ).0ir   r   r   
<listcomp>e   s    z"read_data_list.<locals>.<listcomp>)r   r   
ValueErrorextend)ofiledatar   r   r   read_data_list`   s
    
r'   c             C   sD   t | g}d}|d  d dkr*tdx| D ]}|d7 }q0W |S )z5Read the whole file to get number of data attributes.   r   r   z0This looks like a sparse ARFF: not supported yet)r   r   r#   )r%   r&   Zlocr!   r   r   r   	get_ndatai   s    

r)   c             C   s   t | }tdd |D S )a
  Given a string containing a nominal type definition, returns the
    string len of the biggest component.

    A nominal type is defined as seomthing framed between brace ({}).

    Parameters
    ----------
    atrv : str
       Nominal type definition

    Returns
    -------
    slen : int
       length of longest component

    Examples
    --------
    maxnomlen("{floup, bouga, fl, ratata}") returns 6 (the size of
    ratata, the longest nominal value).

    >>> maxnomlen("{floup, bouga, fl, ratata}")
    6
    c             s   s   | ]}t |V  qd S )N)r   )r    r!   r   r   r   	<genexpr>   s    zmaxnomlen.<locals>.<genexpr>)get_nom_valmax)atrvZnomtpr   r   r   	maxnomlent   s    r.   c             C   sB   t d}|| }|r6tdd |ddD S tddS )a  Given a string containing a nominal type, returns a tuple of the
    possible values.

    A nominal type is defined as something framed between braces ({}).

    Parameters
    ----------
    atrv : str
       Nominal type definition

    Returns
    -------
    poss_vals : tuple
       possible values

    Examples
    --------
    >>> get_nom_val("{floup, bouga, fl, ratata}")
    ('floup', 'bouga', 'fl', 'ratata')
    z{(.+)}c             s   s   | ]}|  V  qd S )N)r   )r    r!   r   r   r   r*      s    zget_nom_val.<locals>.<genexpr>r(   r   z(This does not look like a nominal stringN)recompilematchtuplegroupr   r#   )r-   Z	r_nominalmr   r   r   r+      s
    

r+   c             C   s  t d}|| }|r|d }d }d|krD|dd}d}n|dd}d}d|krl|dd	}d
}d|kr|dd}d}d|kr|dd}d}d|kr|dd}d}d|kr|dd}d}d|ksd|krtd|d krtd||fS tdd S )Nz%[Dd][Aa][Tt][Ee]\s+[\"']?(.+?)[\"']?$r(   Zyyyyz%YYZyyz%yZMMz%mMZddz%dDZHHz%HhZmmz%Mr4   Zssz%SszZz6Date type attributes with time zone not supported, yetz"Invalid or unsupported date formatzInvalid or no date format)r/   r0   r1   r3   r   replacer#   )r-   Zr_dater4   patterndatetime_unitr   r   r   get_date_format   s>    

r?   c             C   s   t dd | S )zWSkip header.

    the first next() call of the returned iterator will be the @data linec             S   s   t |  S )N)
r_datametar1   )xr   r   r   <lambda>   s    zgo_data.<locals>.<lambda>)	itertools	dropwhile)r%   r   r   r   go_data   s    rE   c             C   s   |  }t|}|rj|d}t|r@t|\}}t| }qvt|r`t|\}}t| }qvt	dnt	d| |dkrt	d|||fS )ah  Parse a raw string in header (eg starts by @attribute).

    Given a raw string attribute, try to get the name and type of the
    attribute. Constraints:

    * The first line must start with @attribute (case insensitive, and
      space like characters before @attribute are allowed)
    * Works also if the attribute is spread on multilines.
    * Works if empty lines or comments are in between

    Parameters
    ----------
    attribute : str
       the attribute string.

    Returns
    -------
    name : str
       name of the attribute
    value : str
       value of the attribute
    next : str
       next line to be parsed

    Examples
    --------
    If attribute is a string defined in python as r"floupi real", will
    return floupi as name, and real as value.

    >>> iterable = iter([0] * 10) # dummy iterator
    >>> tokenize_attribute(iterable, r"@attribute floupi real")
    ('floupi', 'real', 0)

    If attribute is r"'floupi 2' real", will return 'floupi 2' as name,
    and real as value.

    >>> tokenize_attribute(iterable, r"  @attribute 'floupi 2' real   ")
    ('floupi 2', 'real', 0)

    r(   zmulti line not supported yetzFirst line unparsable: %sr   z'relational attributes not supported yet)
r   r_attributer1   r3   r_comattrvaltokenize_single_commar   r_wcomattrvaltokenize_single_wcommar#   )iterabler   ZsattrZmattrr-   nametypeZ	next_itemr   r   r   tokenize_attribute   s    )






rN   c             C   sb   t | }|rNy |d }|d }W qZ tk
rJ   tdY qZX ntd|  ||fS )Nr(      z Error while tokenizing attributez Error while tokenizing single %s)rG   r1   r3   r   
IndexErrorr#   )valr4   rL   rM   r   r   r   rH      s    
rH   c             C   sb   t | }|rNy |d }|d }W qZ tk
rJ   tdY qZX ntd|  ||fS )Nr(   rO   z Error while tokenizing attributez Error while tokenizing single %s)rI   r1   r3   r   rP   r#   )rQ   r4   rL   rM   r   r   r   rJ   /  s    
rJ   c       	      C   s   t | }xt|rt | }q
W d}g }xt|st|}|rt|}|rpt| |\}}}|||f qt|}|r|	d}nt
d| t | }q*t | }q*W ||fS )z&Read the header of the iterable ofile.Nr(   zError parsing line %s)r   	r_commentr1   r@   r_headerlinerF   rN   append
r_relationr3   r#   )	r%   r!   ZrelationZ
attributesr4   ZisattrrL   rM   Zisrelr   r   r   read_header>  s&    



rV   c             C   s   d| krt jS t| S dS )av  given a string x, convert it to a float. If the stripped string is a ?,
    return a Nan (missing value).

    Parameters
    ----------
    x : str
       string to convert

    Returns
    -------
    f : float
       where float can be nan

    Examples
    --------
    >>> safe_float('1')
    1.0
    >>> safe_float('1\n')
    1.0
    >>> safe_float('?\n')
    nan
    ?N)npnanfloat)rA   r   r   r   
safe_float`  s    r[   c             C   s<   |   }||kr|S |dkr |S tdt|t|f d S )NrW   z%s value not in %s)r   r#   str)valuepvalueZsvaluer   r   r   safe_nominal}  s    r_   c             C   sN   |    d d}|dkr(td|S tj||}t|d| S d S )N'"rW   ZNaTzdatetime64[%s])r   rX   Z
datetime64datetimeZstrptimeZastype)r]   date_formatr>   Zdate_strZdtr   r   r   	safe_date  s
    rd   c               @   s@   e Zd ZdZdd Zdd Zdd Zdd	 Zd
d Zdd Z	dS )r   ay  Small container to keep useful information on a ARFF dataset.

    Knows about attributes names and types.

    Examples
    --------
    ::

        data, meta = loadarff('iris.arff')
        # This will print the attributes names of the iris.arff dataset
        for i in meta:
            print(i)
        # This works too
        meta.names()
        # Getting attribute type
        types = meta.types()

    Notes
    -----
    Also maintains the list of attributes in order, i.e. doing for i in
    meta, where meta is an instance of MetaData, will return the
    different attribute names in the order they were defined.
    c             C   s   || _ i | _g | _xn|D ]f\}}t|}| j| |dkrP|t|f| j|< q|dkrp|t|d f| j|< q|d f| j|< qW d S )Nr   r   r   )rL   _attributes
_attrnamesr   rT   r+   r?   )selfrelattrrL   r]   tpr   r   r   __init__  s    zMetaData.__init__c             C   sp   d}|d| j  7 }xX| jD ]N}|d|| j| d f 7 }| j| d r`|dt| j| d  7 }|d7 }qW |S )N zDataset: %s
z	%s's type is %sr   r(   z, range is %s
)rL   rf   re   r\   )rg   msgr!   r   r   r   __repr__  s    zMetaData.__repr__c             C   s
   t | jS )N)iterrf   )rg   r   r   r   __iter__  s    zMetaData.__iter__c             C   s
   | j | S )N)re   )rg   keyr   r   r   __getitem__  s    zMetaData.__getitem__c             C   s   | j S )z#Return the list of attribute names.)rf   )rg   r   r   r   names  s    zMetaData.namesc                s    fdd j D }|S )z#Return the list of attribute types.c                s   g | ]} j | d  qS )r   )re   )r    rL   )rg   r   r   r"     s    z"MetaData.types.<locals>.<listcomp>)rf   )rg   Z
attr_typesr   )rg   r   types  s    zMetaData.typesN)
r   r   r   __doc__rk   ro   rq   rs   rt   ru   r   r   r   r   r     s   
c             C   s:   t | dr| }n
t| d}zt|S || k	r4|  X dS )a   
    Read an arff file.

    The data is returned as a record array, which can be accessed much like
    a dictionary of numpy arrays.  For example, if one of the attributes is
    called 'pressure', then its first 10 data points can be accessed from the
    ``data`` record array like so: ``data['pressure'][0:10]``


    Parameters
    ----------
    f : file-like or str
       File-like object to read from, or filename to open.

    Returns
    -------
    data : record array
       The data of the arff file, accessible by attribute names.
    meta : `MetaData`
       Contains information about the arff file such as name and
       type of attributes, the relation (name of the dataset), etc...

    Raises
    ------
    ParseArffError
        This is raised if the given file is not ARFF-formatted.
    NotImplementedError
        The ARFF file has an attribute which is not supported yet.

    Notes
    -----

    This function should be able to read most arff files. Not
    implemented functionality include:

    * date type attributes
    * string type attributes

    It can read files with numeric and nominal attributes.  It cannot read
    files with sparse data ({} in the file).  However, this function can
    read files with missing data (? in the file), representing the data
    points as NaNs.

    Examples
    --------
    >>> from scipy.io import arff
    >>> from io import StringIO
    >>> content = """
    ... @relation foo
    ... @attribute width  numeric
    ... @attribute height numeric
    ... @attribute color  {red,green,blue,yellow,black}
    ... @data
    ... 5.0,3.25,blue
    ... 4.5,3.75,green
    ... 3.0,4.00,red
    ... """
    >>> f = StringIO(content)
    >>> data, meta = arff.loadarff(f)
    >>> data
    array([(5.0, 3.25, 'blue'), (4.5, 3.75, 'green'), (3.0, 4.0, 'red')],
          dtype=[('width', '<f8'), ('height', '<f8'), ('color', '|S6')])
    >>> meta
    Dataset: foo
    	width's type is numeric
    	height's type is numeric
    	color's type is nominal, range is ('red', 'green', 'blue', 'yellow', 'black')

    readZrtN)hasattropen	_loadarffclose)fr%   r   r   r   r     s    F

c          
      s  yt | \}}W n6 tk
rF } zdt| }t|W d d }~X Y nX d}x$|D ]\}}t|}|dkrRd}qRW t||}	tttd}
tttd}g }g  |sVx|D ]\}}t|}|dkrt|\}}|	|d| f  	t
t||d q|d	kr4t|}|	|d
| f t|} 	t
t|d q|	||
| f  	t qW ntdt d fdd	}|| }t||}||	fS )Nz'Error while parsing header, error was: Fr   T)r   r   r   r   zdatetime64[%s])rc   r>   r   zS%d)r^   z*String attributes not supported yet, sorryr   c             3   sX   t t}xF| D ]>}t|st|r,q|| t fdd|D V  qW d S )Nc                s   g | ]} | | qS r   r   )r    r!   )
convertorsrowr   r   r"   q  s    z0_loadarff.<locals>.generator.<locals>.<listcomp>)listrangerR   r1   r_emptyr   r2   )Zrow_iterZdelimZelemsraw)r}   ni)r~   r   	generatorZ  s    

z_loadarff.<locals>.generator)r   )rV   r#   r\   r
   r   r   rZ   r[   r?   rT   r   rd   r.   r+   r_   NotImplementedErrorr   rX   Zfromiter)r%   rh   ri   ern   hasstrrL   r]   rM   metaZ
acls2dtypeZ	acls2convZdescrrc   r>   nr^   r   ar&   r   )r}   r   r   rz   #  sL    
	

rz   c             C   s<   | j d | j d  }t| t| t| t| | fS )Ng      ?r(   )sizerX   ZnanminZnanmaxmeanstd)r&   Znbfacr   r   r   basic_stats|  s    r   c       
      C   s   |d }|dks |dks |dkrJt |\}}}}td| |||||f  nZ| d }x0tt|d d D ]}	||d |	 d 7 }qhW ||d d	 7 }|d
7 }t| d S )Nr   r   r   r   z%s,%s,%f,%f,%f,%fz,{r(   r   })r   printr   r   )
rL   rj   r&   rM   minr,   r   r   rn   r!   r   r   r   print_attribute  s    r   c             C   sJ   t | \}}tt|j t|j x |D ]}t||| ||  q*W d S )N)r   r   r   Zdtyper   r   )filenamer&   r   r!   r   r   r   	test_weka  s
    

r   F__main__r(   )6Z
__future__r   r   r   r/   rC   rb   	functoolsr   ZnumpyrX   Zscipy._lib.sixr   __all__r0   Zr_metarR   r   rS   r@   rU   rF   rG   rI   IOErrorr	   r
   r   r   r'   r)   r.   r+   r?   rE   rN   rH   rJ   rV   r[   r_   rd   objectr   r   rz   r   r   r   Z__test__r   sysargvr   r   r   r   r   <module>   sX   









	(
A"
	BQY	

