B
    P?š['  ć               @   s<   d dl mZmZ d dlmZ d dlmZ G dd deZdS )é    )Śprint_functionŚunicode_literals)Śload)ŚStemmerIc               @   s0   e Zd ZdZdd Zdd Zdd Zdd	 Zd
S )ŚRSLPStemmeruF  
    A stemmer for Portuguese.

        >>> from nltk.stem import RSLPStemmer
        >>> st = RSLPStemmer()
        >>> # opening lines of Erico Verissimo's "MĆŗsica ao Longe"
        >>> text = '''
        ... Clarissa risca com giz no quadro-negro a paisagem que os alunos
        ... devem copiar . Uma casinha de porta e janela , em cima duma
        ... coxilha .'''
        >>> for token in text.split():
        ...     print(st.stem(token))
        clariss risc com giz no quadro-negr a pais que os alun dev copi .
        uma cas de port e janel , em cim dum coxilh .
    c             C   s   g | _ | j  |  d”” | j  |  d”” | j  |  d”” | j  |  d”” | j  |  d”” | j  |  d”” | j  |  d”” d S )Nzstep0.ptzstep1.ptzstep2.ptzstep3.ptzstep4.ptzstep5.ptzstep6.pt)Ś_modelŚappendŚ	read_rule)Śself© r   ś-lib/python3.7/site-packages/nltk/stem/rslp.pyŚ__init__8   s    zRSLPStemmer.__init__c             C   sŌ   t d| dd d”}| d”}dd |D }dd |D }d	d |D }g }x|D ]x}g }| d
”}| |d dd ” | t|d ” | |d dd ” | dd |d  d”D ” | |” qTW |S )Nznltk:stemmers/rslp/Śraw)ŚformatŚutf8Ś
c             S   s   g | ]}|d kr|qS )Ś r   )Ś.0Śliner   r   r   ś
<listcomp>G   s    z)RSLPStemmer.read_rule.<locals>.<listcomp>c             S   s   g | ]}|d  dkr|qS )r   ś#r   )r   r   r   r   r   r   H   s    c             S   s   g | ]}|  d d”qS )z		ś	)Śreplace)r   r   r   r   r   r   K   s    r   r   é   é’’’’é   c             S   s   g | ]}|d d qS )r   r   r   )r   Śtokenr   r   r   r   ]   s    é   ś,)r   ŚdecodeŚsplitr   Śint)r
   ŚfilenameŚrulesŚlinesr   ŚruleŚtokensr   r   r   r	   C   s    


zRSLPStemmer.read_rulec             C   s   |  ” }|d dkr |  |d”}|d dkr8|  |d”}|  |d”}|  |d”}|}|  |d”}||kr|}|  |d	”}||kr|  |d
”}|S )Nr   Śsr   Śar   r   r   é   é   é   )ŚlowerŚ
apply_rule)r
   ŚwordZ	prev_wordr   r   r   Śstemd   s    zRSLPStemmer.stemc             C   sv   | j | }xf|D ]^}t|d }|| d  |d krt|||d  kr||d kr|d |  |d  }P qW |S )Nr   r   r   r   )r   Ślen)r
   r.   Z
rule_indexr#   r%   Zsuffix_lengthr   r   r   r-      s    

zRSLPStemmer.apply_ruleN)Ś__name__Ś
__module__Ś__qualname__Ś__doc__r   r	   r/   r-   r   r   r   r   r   '   s
   !r   N)Z
__future__r   r   Z	nltk.datar   Znltk.stem.apir   r   r   r   r   r   Ś<module>!   s   