B
    A!p\:                 @   s   d Z ddlmZ ddlZddlmZ ddlmZ ddlZddl	m
Z
 ddlmZ ddlmZmZ dd	lmZ G d
d dZdd ZG dd dZddiZdd ZG dd deeZdd ZdddZdddZedkre  dS )zGa similarities / code duplication command line tool and pylint checker
    )print_functionN)defaultdict)groupby)decoding_stream)IRawChecker)BaseCheckertable_lines_from_stats)Tablec               @   sL   e Zd ZdZdddZdddZd	d
 Zdd Zdd Zdd Z	dd Z
dS )Similarz,finds copy-pasted lines of code in a project   Fc             C   s"   || _ || _|| _|| _g | _d S )N)	min_linesignore_commentsignore_docstringsignore_importslinesets)selfr   r   r   r    r   6lib/python3.7/site-packages/pylint/checkers/similar.py__init__$   s
    zSimilar.__init__Nc             C   sZ   |dkr|j }nt||j }y$| jt|| | j| j| j W n tk
rT   Y nX dS )z(append a file to search for similaritiesN)		readlinesr   r   appendLineSetr   r   r   UnicodeDecodeError)r   Zstreamidstreamencodingr   r   r   r   append_stream1   s    zSimilar.append_streamc             C   s   |  |   dS )z<start looking for similarities and display results on stdoutN)_display_sims_compute_sims)r   r   r   r   runD   s    zSimilar.runc             C   s   t t}x||  D ]p\}}}}}|| }xX|D ]:}||f|ksJ||f|kr.|||f |||f P q.W |||f||fh qW g }	x0| D ]$\}}
x|
D ]}|	||f qW qW |	  |	  |	S )z&compute similarities in appended files)r   list
_iter_simsaddr   itemssortreverse)r   Zno_duplicatesnumlineset1Zidx1lineset2Zidx2Z	duplicatecouplessimsZ	ensemblesr   r   r   r   H   s     

zSimilar._compute_simsc       	      C   s   d}x|D ]\}}t   t |dt|d t|}x"|D ]\}}t d|j|f  q8W x(|j|||  D ]}t d|  qjW ||t|d  7 }q
W tdd | jD }t d	|||d
 | f  dS )z'display computed similarities on stdoutr   zsimilar lines infilesz==%s:%sz     c             S   s   g | ]}t |qS r   )len).0linesetr   r   r   
<listcomp>i   s    z)Similar._display_sims.<locals>.<listcomp>z)TOTAL lines=%s duplicates=%s percent=%.2fg      Y@N)printr,   sortedname_real_linesrstripsumr   )	r   r)   Znb_lignes_dupliqueesr%   r(   r.   idxlineZnb_total_lignesr   r   r   r   \   s     zSimilar._display_simsc             c   s   |j }|j }|j}d}| j}x|t|k rd}d}	x||| D ]}
d}xtt||||
D ]L\}	\\}}\}}||kr||kr|	||||
fV  t||	}P |r`|d7 }q`W |	d7 }	||kr|	||||
fV  t||	}q@W ||7 }qW dS )z+find similarities in the two given linesetsr   r+   N)enumerate_strippedfindr   r,   	enumeratezipmax)r   r&   r'   Zlines1Zlines2r9   Zindex1r   skipr%   Zindex2Z	non_blank_Zline1Zline2r   r   r   _find_commons   s0    (
zSimilar._find_commonc             c   s\   xVt | jdd D ]@\}}x6| j|d d D ] }x| ||D ]
}|V  qBW q0W qW dS )zWiterate on similarities among all files, by making a cartesian
        product
        Nr+   )r:   r   r?   )r   r6   r.   r'   simr   r   r   r       s    zSimilar._iter_sims)r   FFF)N)__name__
__module____qualname____doc__r   r   r   r   r   r?   r    r   r   r   r   r
   !   s      

r
   c             C   s   |rBt d| }dd |jD }dd t|dd dD }d	}g }d
}	xt| ddD ]\}
}| }|r|	s|ds|dr|d
d }	|dd
 }|	r||	rd
}	d}|r|	|
|}|rd}|r|
ddd  }|| qXW |S )z\return lines with leading/trailing whitespace and any ignored code
    features removed
     c             s   s&   | ]}|j t|tjtjffV  qd S )N)lineno
isinstanceastroidZImportZ
ImportFrom)r-   noder   r   r   	<genexpr>   s   z!stripped_lines.<locals>.<genexpr>c             S   s$   i | ]\}}t d d |D |qS )c             s   s   | ]\}}|V  qd S )Nr   )r-   r>   Z	is_importr   r   r   rK      s    z,stripped_lines.<locals>.<dictcomp>.<genexpr>)all)r-   rG   Znode_is_import_groupr   r   r   
<dictcomp>   s   z"stripped_lines.<locals>.<dictcomp>c             S   s   | d S )Nr   r   )xr   r   r   <lambda>   s    z stripped_lines.<locals>.<lambda>)keyFNr+   )startz"""z'''   #r   )rI   parsejoinZbodyr   r:   strip
startswithendswithgetsplitr   )linesr   r   r   ZtreeZnode_is_import_by_linenoZline_begins_importZcurrent_line_is_importZstrippedlinesZ	docstringrG   r7   r   r   r   stripped_lines   s:    

r\   c               @   s\   e Zd ZdZdddZdd Zdd Zd	d
 Zdd Zdd Z	dddZ
dd Zdd ZdS )r   z7Holds and indexes all the lines of a single source fileFc             C   s*   || _ || _t||||| _|  | _d S )N)r2   r3   r\   _stripped_lines	_mk_index_index)r   r2   r[   r   r   r   r   r   r   r      s
    zLineSet.__init__c             C   s
   d| j  S )Nz<Lineset for %s>)r2   )r   r   r   r   __str__   s    zLineSet.__str__c             C   s
   t | jS )N)r,   r3   )r   r   r   r   __len__   s    zLineSet.__len__c             C   s
   | j | S )N)r]   )r   indexr   r   r   __getitem__   s    zLineSet.__getitem__c             C   s   | j |j k S )N)r2   )r   otherr   r   r   __lt__   s    zLineSet.__lt__c             C   s   t | S )N)id)r   r   r   r   __hash__   s    zLineSet.__hash__r   c             c   sB   |}|r| j |d }n| j }x|D ]}||fV  |d7 }q$W dS )zgreturn an iterator on stripped lines, starting from a given index
        if specified, else 0
        Nr+   )r]   )r   Zstart_atr6   r[   r7   r   r   r   r8      s    

zLineSet.enumerate_strippedc             C   s   | j |dS )z7return positions of the given stripped line in this setr   )r_   rY   )r   Zstripped_liner   r   r   r9      s    zLineSet.findc             C   s6   t t}x(t| jD ]\}}|r|| | qW |S )zcreate the index for this set)r   r   r:   r]   r   )r   rb   Zline_nor7   r   r   r   r^      s
    zLineSet._mk_indexN)FFF)r   )rB   rC   rD   rE   r   r`   ra   rc   re   rg   r8   r9   r^   r   r   r   r   r      s     
	
r   R0801)zSimilar lines in %s files
%szduplicate-codezIndicates that a set of similar lines has been detected among multiple file. This usually means that the code should be refactored to avoid this duplication.c             C   s6   ddddg}|t ||d7 }| t|dddd d	S )
z/make a layout with some stats about duplicationrF   ZnowZprevious
difference)nb_duplicated_linespercent_duplicated_linesr   r+   )ZchildrenZcolsZrheadersZcheadersN)r   r   r	   )ZsectstatsZ	old_statsr[   r   r   r   report_similarities  s    rm   c            	   @   s   e Zd ZdZefZdZeZddddddfd	d
ddddfdd
ddddfddddddffZ	dde
ffZd ddZd!ddZdd Zdd Zdd ZdS )"SimilarCheckerzchecks for similarities and duplicated code. This computation may be
    memory / CPU intensive, so you should disable it if you experiment some
    problems.
    Zsimilaritieszmin-similarity-linesr   intz<int>z%Minimum lines number of a similarity.)defaulttypemetavarhelpzignore-commentsTZynz<y or n>z,Ignore comments when computing similarities.zignore-docstringsz.Ignore docstrings when computing similarities.zignore-importsFz+Ignore imports when computing similarities.ZRP0801ZDuplicationNc             C   s(   t | | tj| dddd d | _d S )Nr   T)r   r   r   )r   r   r
   rl   )r   linterr   r   r   r   N  s    zSimilarChecker.__init__c             C   sd   t | |||| |dkr&| jj| _n:|dkr:| jj| _n&|dkrN| jj| _n|dkr`| jj| _dS )zmethod called to set an option (registered in the options list)

        overridden to report options setting to Similar
        zmin-similarity-lineszignore-commentszignore-docstringszignore-importsN)r   
set_optionZconfigZmin_similarity_linesr   r   r   r   )r   ZoptnamevalueactionZoptdictr   r   r   ru   U  s    zSimilarChecker.set_optionc             C   s   g | _ | jjddd| _dS )z<init the checkers: reset linesets and statistics informationr   )rj   rk   N)r   rt   Z	add_statsrl   )r   r   r   r   opend  s    zSimilarChecker.openc          	   C   s,   |  }| | jj||j W dQ R X dS )zprocess a module

        the module's content is accessible via the stream object

        stream must implement the readlines method
        N)r   r   rt   Zcurrent_namefile_encoding)r   rJ   r   r   r   r   process_modulek  s    
zSimilarChecker.process_modulec       
      C   s   t dd | jD }d}| j}x|  D ]\}}g }x$|D ]\}}|d|j|f  q:W |  x(|j|||  D ]}	||	  qvW | j	dt
|d|fd ||t
|d  7 }q(W ||d	< |o|d
 | |d< dS )zAcompute and display similarities on closing (i.e. end of parsing)c             s   s   | ]}t |V  qd S )N)r,   )r-   r.   r   r   r   rK   w  s    z'SimilarChecker.close.<locals>.<genexpr>r   z==%s:%srh   
)argsr+   rj   g      Y@rk   N)r5   r   rl   r   r   r2   r#   r3   r4   Zadd_messager,   rU   )
r   ZtotalZ
duplicatedrl   r%   r(   msgr.   r6   r7   r   r   r   closeu  s    zSimilarChecker.close)N)NN)rB   rC   rD   rE   r   Z__implements__r2   MSGSZmsgsZoptionsrm   Zreportsr   ru   rx   rz   r~   r   r   r   r   rn     s<   


rn   c             C   s   |  t|  dS )z.required method to auto register this checker N)Zregister_checkerrn   )rt   r   r   r   register  s    r   c             C   s$   t d t   t d t|  dS )z&display command line usage informationz*finds copy pasted blocks in a set of filesz~Usage: symilar [-d|--duplicates min_duplicated_lines] [-i|--ignore-comments] [--ignore-docstrings] [--ignore-imports] file1...N)r0   sysexit)Zstatusr   r   r   usage  s
    r   c          
   C   s  | dkrt jdd } ddlm} d}d}d}d}d}d}|| ||\}}	xZ|D ]R\}
}|
d	krjt|}qP|
d
krzt  qP|
dkrd}qP|
dkrd}qP|
dkrPd}qPW |	std t||||}x,|	D ]$}t|}||| W dQ R X qW |  t 	d dS )z$standalone command line access pointNr+   r   )getoptZhdi)rs   zduplicates=zignore-commentszignore-importszignore-docstringsr   F)z-dz--duplicates)z-hz--help)z-iz--ignore-commentsT)z--ignore-docstrings)z--ignore-imports)
r   argvr   ro   r   r
   rx   r   r   r   )r   r   Zs_optsZl_optsr   r   r   r   Zoptsr|   ZoptvalrA   filenamer   r   r   r   Run  s:    


r   __main__)r   )N)rE   Z
__future__r   r   collectionsr   	itertoolsr   rI   Zpylint.utilsr   Zpylint.interfacesr   Zpylint.checkersr   r   Zpylint.reporters.ureports.nodesr	   r
   r\   r   r   rm   rn   r   r   r   rB   r   r   r   r   <module>   s*   z,=	
p

(