B
    P?[K              C   @   s  d Z ddlZddlZddlZddlmZ ddlZddlm	Z	 dZ
dZdZd	Zed
eejZdddddddddddddddddgZddddd d!d"d#d$d%d&d'd(d)d*d+d,d-d.d/d0d1d2d3d4d5d6d7d8d9d:d;d<d=d>d?d@dAdBdCdDdEdFdGdHdIdJdKdLdMdNdOdPdQdRdSdTdUdVh;Ze
e
e
e
e
e
e
e
e
e
e
e
e
e
e
e
e
e
e
e
e
e
e
e
e
e
e
e
e
e
e
e
e
e
e
e
e
e
e
e
e
e
e
e
e
e
eeeeeeeeeeeeeeeeeeeedWBZdXdXdYdZd[d\dZd]Zdld_d`ZdmdbdcZddde Zdfdg ZG dhdi dieZG djdk dkeZdS )na  
If you use the VADER sentiment analysis tools, please cite:

Hutto, C.J. & Gilbert, E.E. (2014). VADER: A Parsimonious Rule-based Model for
Sentiment Analysis of Social Media Text. Eighth International Conference on
Weblogs and Social Media (ICWSM-14). Ann Arbor, MI, June 2014.
    N)product   )pairwisegn?gnҿg~jt?gGzz[{0}].!?,;:-'"z!!z!!!z??z???z?!?z!?!z?!?!z!?!?ZaintZarentZcannotZcantZcouldntZdarentZdidntZdoesntzain'tzaren'tzcan'tzcouldn'tzdaren'tzdidn'tzdoesn'tZdontZhadntZhasntZhaventZisntZmightntZmustntZneitherzdon'tzhadn'tzhasn'tzhaven'tzisn'tzmightn'tzmustn'tZneedntzneedn'tneverZnoneZnopeZnornotZnothingZnowhereZoughtntZshantZshouldntZuhuhZwasntZwerentzoughtn'tzshan'tz	shouldn'tzuh-uhzwasn'tzweren'tZwithoutZwontZwouldntzwon'tzwouldn'tZrarelyZseldomZdespite)BZ
absolutelyZ	amazinglyZawfullyZ
completelyZconsiderablyZ	decidedlyZdeeplyZeffingZ
enormouslyZentirelyZ
especiallyZexceptionallyZ	extremelyZ
fabulouslyZflippingZflippinZfrickingZfrickinZfriggingZfrigginZfullyZfuckingZgreatlyZhellaZhighlyZhugelyZ
incrediblyZ	intenselyZmajorlyZmoreZmostZparticularlyZpurelyZquiteZreallyZ
remarkablysoZsubstantiallyZ
thoroughlyZtotallyZtremendouslyZuberZunbelievablyZ	unusuallyZutterlyveryZalmostZbarelyZhardlyzjust enoughzkind ofZkindaZkindofzkind-ofZlesslittleZ
marginallyZoccasionallyZpartlyZscarcelyZslightlyZsomewhatzsort ofZsortaZsortofzsort-of   g      ?   g      )zthe shitzthe bombzbad assz
yeah rightzcut the mustardzkiss of deathzhand to mouthTc                sn   t  t fdd| D rdS |r8tdd | D r8dS x0t| D ]$\}}| dkrB| dkrBdS qBW dS )z4
    Determine if input contains negation words
    c             3   s   | ]}|   kV  qd S )N)lower).0word)	neg_words 3lib/python3.7/site-packages/nltk/sentiment/vader.py	<genexpr>   s    znegated.<locals>.<genexpr>Tc             s   s   | ]}d |  kV  qdS )zn'tN)r   )r   r   r   r   r   r      s    leastatF)NEGATEanyr   r   )Zinput_wordsZ
include_ntfirstsecondr   )r   r   negated   s    r#      c             C   s   | t | |  |  }|S )zp
    Normalize the score to be between -1 and 1 using an alpha that
    approximates the max expected value
    )mathZsqrt)ZscoreZalphaZ
norm_scorer   r   r   	normalize   s    r&   c             C   sV   d}d}x| D ]}|  r|d7 }qW t| | }d|  k rJt| k rRn nd}|S )z
    Check whether just some words in the input are ALL CAPS

    :param list words: The words to inspect
    :returns: `True` if some but not all items in `words` are ALL CAPS
    Fr   r   T)isupperlen)ZwordsZis_differentZallcap_wordsr   Zcap_differentialr   r   r   allcap_differential   s    
r)   c             C   sV   d}|   }|tkrRt| }|dk r,|d9 }|  rR|rR|dkrJ|t7 }n|t8 }|S )z\
    Check if the preceding words increase, decrease, or negate/nullify the
    valence
    g        r   )r   BOOSTER_DICTr'   C_INCR)r   valenceis_cap_diffZscalarZ
word_lowerr   r   r   scalar_inc_dec  s    
r/   c               @   s(   e Zd ZdZdd Zdd Zdd ZdS )		SentiTextzL
    Identify sentiment-relevant string-level properties of input text.
    c             C   s8   t |tst|d}|| _|  | _t| j| _d S )Nzutf-8)
isinstancestrencodetext_words_and_emoticonswords_and_emoticonsr)   r.   )selfr4   r   r   r   __init__  s
    

zSentiText.__init__c             C   sb   t d| j}| }tdd |D }dd tt|D }dd t|tD }|}|| |S )zt
        Returns mapping of form:
        {
            'cat,': 'cat',
            ',cat': 'cat',
        }
         c             s   s   | ]}t |d kr|V  qdS )r   N)r(   )r   wr   r   r   r   /  s    z-SentiText._words_plus_punc.<locals>.<genexpr>c             S   s   i | ]}|d  d |qS )r   r9   )join)r   pr   r   r   
<dictcomp>1  s    z.SentiText._words_plus_punc.<locals>.<dictcomp>c             S   s   i | ]}|d  d |qS )r   r9   )r;   )r   r<   r   r   r   r=   2  s    )REGEX_REMOVE_PUNCTUATIONsubr4   splitsetr   	PUNC_LISTupdate)r7   Zno_punc_textZ
words_onlyZpunc_beforeZ
punc_afterwords_punc_dictr   r   r   _words_plus_punc#  s    
zSentiText._words_plus_puncc             C   sN   | j  }|  }dd |D }x(t|D ]\}}||kr*|| ||< q*W |S )z
        Removes leading and trailing puncutation
        Leaves contractions and most emoticons
            Does not preserve punc-plus-letter emoticons (e.g. :D)
        c             S   s   g | ]}t |d kr|qS )r   )r(   )r   wer   r   r   
<listcomp>?  s    z2SentiText._words_and_emoticons.<locals>.<listcomp>)r4   r@   rE   	enumerate)r7   ZwesrD   irF   r   r   r   r5   7  s    
zSentiText._words_and_emoticonsN)__name__
__module____qualname____doc__r8   rE   r5   r   r   r   r   r0     s   	r0   c               @   sz   e Zd ZdZdddZdd Zdd Zd	d
 Zdd Zdd Z	dd Z
dd Zdd Zdd Zdd Zdd Zdd ZdS )SentimentIntensityAnalyzerz8
    Give a sentiment intensity score to sentences.
    ;sentiment/vader_lexicon.zip/vader_lexicon/vader_lexicon.txtc             C   s   t j|| _|  | _d S )N)nltkdataloadlexicon_filemake_lex_dictlexicon)r7   rS   r   r   r   r8   K  s    z#SentimentIntensityAnalyzer.__init__c             C   sD   i }x:| j dD ]*}| ddd \}}t|||< qW |S )z6
        Convert lexicon file to a dictionary
        
	r   r   )rS   r@   stripfloat)r7   Zlex_dictliner   Zmeasurer   r   r   rT   Q  s
    z(SentimentIntensityAnalyzer.make_lex_dictc             C   s   t |}g }|j}xt|D ]l}d}||}|t|d k rZ| dkrZ||d   dksf| tkrr|| q| |||||}qW | ||}| 	||S )z
        Return a float for sentiment strength based on the input text.
        Positive values are positive valence, negative value are negative
        valence.
        r   r   ZkindZof)
r0   r6   indexr(   r   r+   appendsentiment_valence
_but_checkscore_valence)r7   r4   	sentitext
sentimentsr6   itemr-   rI   r   r   r   polarity_scores[  s    


z*SentimentIntensityAnalyzer.polarity_scoresc             C   s  |j }|j}| }|| jkr
| j| }| rP|rP|dkrH|t7 }n|t8 }xtddD ]}	||	kr\|||	d    | jkr\t|||	d   ||}
|	dkr|
dkr|
d }
|	dkr|
dkr|
d }
||
 }| |||	|}|	dkr\| 	|||}q\W | 
|||}|| |S )Nr   r   r   gffffff?r   g?)r.   r6   r   rU   r'   r,   ranger/   _never_check_idioms_check_least_checkr\   )r7   r-   r`   rb   rI   ra   r.   r6   Zitem_lowercasestart_isr   r   r   r]   w  s6    

	
z,SentimentIntensityAnalyzer.sentiment_valencec             C   s   |dkrd||d    | jkrd||d    dkrd||d    dkr||d    dkr|t }n:|dkr||d    | jkr||d    dkr|t }|S )Nr   r   r   r   r   r   )r   rU   N_SCALAR)r7   r-   r6   rI   r   r   r   rg     s    
z'SentimentIntensityAnalyzer._least_checkc             C   s   d|ksd|kry| d}W n tk
r<   | d}Y nX x\|D ]T}| |}||k rv|| |||d  qD||krD|| |||d  qDW |S )NZbutZBUTg      ?g      ?)r[   
ValueErrorpopinsert)r7   r6   ra   ZbiZ	sentimentZsir   r   r   r^     s    



z%SentimentIntensityAnalyzer._but_checkc             C   sh  d ||d  || }d ||d  ||d  || }d ||d  ||d  }d ||d  ||d  ||d  }d ||d  ||d  }|||||g}	x|	D ]}
|
tkrt|
 }P qW t|d |krd || ||d  }|tkrt| }t|d |d krHd || ||d  ||d  }|tkrHt| }|tks\|tkrd|t }|S )Nz{0} {1}r   z{0} {1} {2}r   r   )formatSPECIAL_CASE_IDIOMSr(   r+   B_DECR)r7   r-   r6   rI   ZonezeroZ
twoonezeroZtwooneZthreetwooneZthreetwo	sequencesseqZzerooneZ
zeroonetwor   r   r   rf     sB    







z(SentimentIntensityAnalyzer._idioms_checkc             C   s
  |dkr"t ||d  gr"|t }|dkr||d  dkrd||d  dksZ||d  dkrd|d }nt |||d   gr|t }|dkr||d  dkr||d  dks||d  dks||d  dks||d  dkr|d	 }n t |||d   gr|t }|S )
Nr   r   r   r   r   thisg      ?r   g      ?)r#   rj   )r7   r-   r6   rh   rI   r   r   r   re      s(    


z'SentimentIntensityAnalyzer._never_checkc             C   s    |  |}| |}|| }|S )N)_amplify_ep_amplify_qm)r7   sum_sr4   ep_amplifierqm_amplifierpunct_emph_amplifierr   r   r   _punctuation_emphasis  s    

z0SentimentIntensityAnalyzer._punctuation_emphasisc             C   s"   | d}|dkrd}|d }|S )Nr      g㥛 ?)count)r7   r4   Zep_countrw   r   r   r   rt   $  s
    
z&SentimentIntensityAnalyzer._amplify_epc             C   s0   | d}d}|dkr,|dkr(|d }nd}|S )Nr   r   r   r   g
ףp=
?gQ?)r|   )r7   r4   Zqm_countrx   r   r   r   ru   .  s    

z&SentimentIntensityAnalyzer._amplify_qmc             C   sd   d}d}d}xL|D ]D}|dkr.|t |d 7 }|dk rF|t |d 7 }|dkr|d7 }qW |||fS )Ng        r   r   )rY   )r7   ra   pos_sumneg_sum	neu_countZsentiment_scorer   r   r   _sift_sentiment_scores;  s    
z1SentimentIntensityAnalyzer._sift_sentiment_scoresc             C   s   |rt t|}| ||}|dkr.||7 }n|dk r>||8 }t|}| |\}}}|t|krn||7 }n|t|k r||8 }|t| | }	t||	 }
t||	 }t||	 }nd}d}
d}d}t|dt|dt|
dt|dd}|S )Nr   g        r   r{   )negneuposcompound)rY   sumrz   r&   r   r%   Zfabsround)r7   ra   r4   rv   ry   r   r}   r~   r   Ztotalr   r   r   Zsentiment_dictr   r   r   r_   M  s4    

z(SentimentIntensityAnalyzer.score_valenceN)rO   )rJ   rK   rL   rM   r8   rT   rc   r]   rg   r^   rf   re   rz   rt   ru   r   r_   r   r   r   r   rN   F  s   

22
rN   )T)r$   )rM   r%   restring	itertoolsr   Z	nltk.datarP   utilr   ZB_INCRrp   r,   rj   compilern   escapeZpunctuationr>   rB   r   r+   ro   r#   r&   r)   r/   objectr0   rN   r   r   r   r   <module>   sL  

	1