ó
ù`]c           @  s\   d  d l  m Z d  d l Z d  d l m Z d  d l m Z e d e f d „  ƒ  Yƒ Z d S(   iÿÿÿÿ(   t   unicode_literalsN(   t   StemmerI(   t   python_2_unicode_compatiblet   RegexpStemmerc           B  s,   e  Z d  Z d d „ Z d „  Z d „  Z RS(   uä  
    A stemmer that uses regular expressions to identify morphological
    affixes.  Any substrings that match the regular expressions will
    be removed.

        >>> from nltk.stem import RegexpStemmer
        >>> st = RegexpStemmer('ing$|s$|e$|able$', min=4)
        >>> st.stem('cars')
        'car'
        >>> st.stem('mass')
        'mas'
        >>> st.stem('was')
        'was'
        >>> st.stem('bee')
        'bee'
        >>> st.stem('compute')
        'comput'
        >>> st.stem('advisable')
        'advis'

    :type regexp: str or regexp
    :param regexp: The regular expression that should be used to
        identify morphological affixes.
    :type min: int
    :param min: The minimum length of string to stem
    i    c         C  s7   t  | d ƒ s! t j | ƒ } n  | |  _ | |  _ d  S(   Nu   pattern(   t   hasattrt   ret   compilet   _regexpt   _min(   t   selft   regexpt   min(    (    s/   lib/python2.7/site-packages/nltk/stem/regexp.pyt   __init__-   s    	c         C  s0   t  | ƒ |  j k  r | S|  j j d | ƒ Sd  S(   Nu    (   t   lenR   R   t   sub(   R	   t   word(    (    s/   lib/python2.7/site-packages/nltk/stem/regexp.pyt   stem4   s    c         C  s   d j  |  j j ƒ S(   Nu   <RegexpStemmer: {!r}>(   t   formatR   t   pattern(   R	   (    (    s/   lib/python2.7/site-packages/nltk/stem/regexp.pyt   __repr__:   s    (   t   __name__t
   __module__t   __doc__R   R   R   (    (    (    s/   lib/python2.7/site-packages/nltk/stem/regexp.pyR      s   	(   t
   __future__R    R   t   nltk.stem.apiR   t   nltk.compatR   R   (    (    (    s/   lib/python2.7/site-packages/nltk/stem/regexp.pyt   <module>	   s
   