ó
ù`]c           @   sˆ   d  Z  d d l m Z m Z d d l m Z d d l m Z d d l m	 Z	 e e ƒ d e
 f d „  ƒ  Yƒ Z d e f d	 „  ƒ  YZ d
 S(   s   
Tokenizer Interface
iÿÿÿÿ(   t   ABCMetat   abstractmethod(   t   add_metaclass(   t
   overridden(   t   string_span_tokenizet
   TokenizerIc           B   s8   e  Z d  Z e d „  ƒ Z d „  Z d „  Z d „  Z RS(   s†   
    A processing interface for tokenizing a string.
    Subclasses must define ``tokenize()`` or ``tokenize_sents()`` (or both).
    c         C   s'   t  |  j ƒ r# |  j | g ƒ d Sd S(   sN   
        Return a tokenized copy of *s*.

        :rtype: list of str
        i    N(   R   t   tokenize_sents(   t   selft   s(    (    s0   lib/python2.7/site-packages/nltk/tokenize/api.pyt   tokenize   s    c         C   s   t  ƒ  ‚ d S(   s·   
        Identify the tokens using integer offsets ``(start_i, end_i)``,
        where ``s[start_i:end_i]`` is the corresponding token.

        :rtype: iter(tuple(int, int))
        N(   t   NotImplementedError(   R   R   (    (    s0   lib/python2.7/site-packages/nltk/tokenize/api.pyt   span_tokenize%   s    c         C   s    g  | D] } |  j  | ƒ ^ q S(   s«   
        Apply ``self.tokenize()`` to each element of ``strings``.  I.e.:

            return [self.tokenize(s) for s in strings]

        :rtype: list(list(str))
        (   R	   (   R   t   stringsR   (    (    s0   lib/python2.7/site-packages/nltk/tokenize/api.pyR   .   s    c         c   s)   x" | D] } t  |  j | ƒ ƒ Vq Wd S(   sÁ   
        Apply ``self.span_tokenize()`` to each element of ``strings``.  I.e.:

            return [self.span_tokenize(s) for s in strings]

        :rtype: iter(list(tuple(int, int)))
        N(   t   listR   (   R   R   R   (    (    s0   lib/python2.7/site-packages/nltk/tokenize/api.pyt   span_tokenize_sents8   s    (   t   __name__t
   __module__t   __doc__R   R	   R   R   R   (    (    (    s0   lib/python2.7/site-packages/nltk/tokenize/api.pyR      s
   
			
t   StringTokenizerc           B   s    e  Z d  Z d „  Z d „  Z RS(   sx   A tokenizer that divides a string into substrings by splitting
    on the specified string (defined in subclasses).
    c         C   s   | j  |  j ƒ S(   N(   t   splitt   _string(   R   R   (    (    s0   lib/python2.7/site-packages/nltk/tokenize/api.pyR	   I   s    c         c   s&   x t  | |  j ƒ D] } | Vq Wd  S(   N(   R   R   (   R   R   t   span(    (    s0   lib/python2.7/site-packages/nltk/tokenize/api.pyR   L   s    (   R   R   R   R	   R   (    (    (    s0   lib/python2.7/site-packages/nltk/tokenize/api.pyR   D   s   	N(   R   t   abcR    R   t   sixR   t   nltk.internalsR   t   nltk.tokenize.utilR   t   objectR   R   (    (    (    s0   lib/python2.7/site-packages/nltk/tokenize/api.pyt   <module>   s   /