B
    P?ð[$  ã               @   sd   d Z ddlmZmZ ddlmZ ddlmZ ddlm	Z	 eeƒG dd„ de
ƒƒZG dd	„ d	eƒZd
S )z
Tokenizer Interface
é    )ÚABCMetaÚabstractmethod)Úadd_metaclass)Ú
overridden)Ústring_span_tokenizec               @   s4   e Zd ZdZedd„ ƒZdd„ Zdd„ Zdd	„ Zd
S )Ú
TokenizerIz†
    A processing interface for tokenizing a string.
    Subclasses must define ``tokenize()`` or ``tokenize_sents()`` (or both).
    c             C   s   t | jƒr|  |g¡d S dS )zN
        Return a tokenized copy of *s*.

        :rtype: list of str
        r   N)r   Útokenize_sents)ÚselfÚs© r   ú0lib/python3.7/site-packages/nltk/tokenize/api.pyÚtokenize   s    
zTokenizerI.tokenizec             C   s
   t ƒ ‚dS )z·
        Identify the tokens using integer offsets ``(start_i, end_i)``,
        where ``s[start_i:end_i]`` is the corresponding token.

        :rtype: iter(tuple(int, int))
        N)ÚNotImplementedError)r	   r
   r   r   r   Úspan_tokenize%   s    zTokenizerI.span_tokenizec                s   ‡ fdd„|D ƒS )z«
        Apply ``self.tokenize()`` to each element of ``strings``.  I.e.:

            return [self.tokenize(s) for s in strings]

        :rtype: list(list(str))
        c                s   g | ]}ˆ   |¡‘qS r   )r   )Ú.0r
   )r	   r   r   ú
<listcomp>6   s    z-TokenizerI.tokenize_sents.<locals>.<listcomp>r   )r	   Ústringsr   )r	   r   r   .   s    zTokenizerI.tokenize_sentsc             c   s"   x|D ]}t |  |¡ƒV  qW dS )zÁ
        Apply ``self.span_tokenize()`` to each element of ``strings``.  I.e.:

            return [self.span_tokenize(s) for s in strings]

        :rtype: iter(list(tuple(int, int)))
        N)Úlistr   )r	   r   r
   r   r   r   Úspan_tokenize_sents8   s    
zTokenizerI.span_tokenize_sentsN)	Ú__name__Ú
__module__Ú__qualname__Ú__doc__r   r   r   r   r   r   r   r   r   r      s
   
	
r   c               @   s    e Zd ZdZdd„ Zdd„ ZdS )ÚStringTokenizerzxA tokenizer that divides a string into substrings by splitting
    on the specified string (defined in subclasses).
    c             C   s   |  | j¡S )N)ÚsplitÚ_string)r	   r
   r   r   r   r   I   s    zStringTokenizer.tokenizec             c   s    xt || jƒD ]
}|V  qW d S )N)r   r   )r	   r
   Úspanr   r   r   r   L   s    zStringTokenizer.span_tokenizeN)r   r   r   r   r   r   r   r   r   r   r   D   s   r   N)r   Úabcr   r   Zsixr   Znltk.internalsr   Znltk.tokenize.utilr   Úobjectr   r   r   r   r   r   Ú<module>   s   /