B
    îq\°  ã               @   sJ   d Z ddlZddlmZ ddgZG dd„ dƒZdadd	d„Zdd
d„ZdS )zG
This file contains routines to verify the correctness of UCD strings.
é    N)ÚdataÚ	parse_ucdÚ	check_ucdc               @   s8   e Zd ZdZdd„ Zdd„ Zdd„ Zdd	„ Zd
d„ ZdS )ÚUCDWordsz£
    Manages a list of acceptable UCD words.

    Works by reading in a data file exactly as provided by IVOA.  This
    file resides in data/ucd1p-words.txt.
    c          	   C   sª   t ƒ | _t ƒ | _i | _i | _tjdddv}xn| ¡ D ]b}dd„ | d¡D ƒ\}}}| 	¡ }|dkrp| j 
|¡ |dkr„| j 
|¡ || j|< || j|< q6W W d Q R X d S )	Nzdata/ucd1p-words.txtÚascii)Úencodingc             S   s   g | ]}|  ¡ ‘qS © )Ústrip)Ú.0Úxr   r   ú5lib/python3.7/site-packages/astropy/io/votable/ucd.pyú
<listcomp>"   s    z%UCDWords.__init__.<locals>.<listcomp>ú|ZQPEVZQSEV)ÚsetÚ_primaryÚ
_secondaryÚ_descriptionsÚ_capitalizationr   Zget_pkg_data_fileobjÚ	readlinesÚsplitÚlowerÚadd)ÚselfÚfdÚlineÚtypeÚnameZdescrZ
name_lowerr   r   r   Ú__init__   s    
zUCDWords.__init__c             C   s   |  ¡ | jkS )zA
        Returns True if *name* is a valid primary name.
        )r   r   )r   r   r   r   r   Ú
is_primary+   s    zUCDWords.is_primaryc             C   s   |  ¡ | jkS )zC
        Returns True if *name* is a valid secondary name.
        )r   r   )r   r   r   r   r   Úis_secondary1   s    zUCDWords.is_secondaryc             C   s   | j | ¡  S )z[
        Returns the official English description of the given UCD
        *name*.
        )r   r   )r   r   r   r   r   Úget_description7   s    zUCDWords.get_descriptionc             C   s   | j | ¡  S )zM
        Returns the standard capitalization form of the given name.
        )r   r   )r   r   r   r   r   Únormalize_capitalization>   s    z!UCDWords.normalize_capitalizationN)	Ú__name__Ú
__module__Ú__qualname__Ú__doc__r   r   r   r    r!   r   r   r   r   r      s   r   Fc          	   C   sÂ  t dkrtƒ a |r t d| ¡}nt d| ¡}|dk	rJtd | d¡| ¡ƒ‚d}d ||¡}|  d¡}g }xRt|ƒD ]D\}}	|	 	d	¡}
|
d
krÄ|	 d	d
¡\}}	t 
||¡sºtd |¡ƒ‚| ¡ }n|
d
krÜtd |	¡ƒ‚nd}t 
||	¡sútd |	¡ƒ‚|dkr‚|r‚|dkrLt  |	¡s‚t  |	¡r<td |	¡ƒ‚ntd |	¡ƒ‚n6t  |	¡s‚t  |	¡rttd |	¡ƒ‚ntd |	¡ƒ‚yt  |	¡}W n tk
rª   |	}Y nX | ||f¡ qtW |S )a-  
    Parse the UCD into its component parts.

    Parameters
    ----------
    ucd : str
        The UCD string

    check_controlled_vocabulary : bool, optional
        If `True`, then each word in the UCD will be verified against
        the UCD1+ controlled vocabulary, (as required by the VOTable
        specification version 1.2), otherwise not.

    has_colon : bool, optional
        If `True`, the UCD may contain a colon (as defined in earlier
        versions of the standard).

    Returns
    -------
    parts : list
        The result is a list of tuples of the form:

            (*namespace*, *word*)

        If no namespace was explicitly specified, *namespace* will be
        returned as ``'ivoa'`` (i.e., the default namespace).

    Raises
    ------
    ValueError : *ucd* is invalid
    Nz[^A-Za-z0-9_.:;\-]z[^A-Za-z0-9_.;\-]z&UCD has invalid character '{}' in '{}'r   z[A-Za-z0-9][A-Za-z0-9\-_]*z	{}(\.{})*ú;ú:é   zInvalid namespace '{}'zToo many colons in '{}'ZivoazInvalid word '{}'z2Secondary word '{}' is not valid as a primary wordzUnknown word '{}'z2Primary word '{}' is not valid as a secondary word)Ú_ucd_singletonr   ÚreÚsearchÚ
ValueErrorÚformatÚgroupr   Ú	enumerateÚcountÚmatchr   r   r   r!   ÚKeyErrorÚappend)ÚucdÚcheck_controlled_vocabularyÚ	has_colonÚmZword_component_reZword_reÚpartsZwordsÚiZwordZcolon_countÚnsZnormalized_wordr   r   r   r   H   sX    !






c             C   s8   | dkrdS yt | ||d W n tk
r2   dS X dS )a5  
    Returns False if *ucd* is not a valid `unified content descriptor`_.

    Parameters
    ----------
    ucd : str
        The UCD string

    check_controlled_vocabulary : bool, optional
        If `True`, then each word in the UCD will be verified against
        the UCD1+ controlled vocabulary, (as required by the VOTable
        specification version 1.2), otherwise not.

    has_colon : bool, optional
        If `True`, the UCD may contain a colon (as defined in earlier
        versions of the standard).

    Returns
    -------
    valid : bool
    NT)r5   r6   F)r   r,   )r4   r5   r6   r   r   r   r   £   s    )FF)FF)	r%   r*   Zastropy.utilsr   Ú__all__r   r)   r   r   r   r   r   r   Ú<module>   s   5
[