B
    j9C\6  ã               @   s  d Z dZddlZddlmZ ddlmZ ddlZddlmZm	Z	 ddl
mZ ddlZddlZddlZddlZddlZddlZddlZddlZdd	„ Zd#dd„ZG dd„ deƒZdd„ ZdZdZd$dd„Zd%dd„Zd&dd„Zd'dd„Zd(d d!„Zed"kreej  ¡ ƒ dS ))z=Diagnostic functions, mainly for use when doing tech support.ZMITé    N)ÚStringIO)Ú
HTMLParser)ÚBeautifulSoupÚ__version__)Úbuilder_registryc             C   sT  t dt ƒ t dtj ƒ dddg}x>|D ]6}x0tjD ]}||jkr6P q6W | |¡ t d| ƒ q*W d|krÌ| d¡ y*dd	l	m
} t d
d tt|jƒ¡ ƒ W n* tk
rÊ } zt dƒ W dd}~X Y nX d|kryddl}t d|j ƒ W n, tk
r } zt dƒ W dd}~X Y nX t| dƒr4|  ¡ } nˆ|  d¡sL|  d¡rdt d|  ƒ t dƒ dS y:tj | ¡rœt d|  ƒ t| ƒ}| ¡ } W dQ R X W n tk
r´   Y nX t ƒ  x’|D ]Š}t d| ƒ d}	yt| |d}
d}	W n8 tk
r" } zt d| ƒ t ¡  W dd}~X Y nX |	rBt d| ƒ t |
 ¡ ƒ t dƒ qÂW dS )z/Diagnostic suite for isolating common problems.z'Diagnostic running on Beautiful Soup %szPython version %szhtml.parserÚhtml5libÚlxmlz;I noticed that %s is not installed. Installing it may help.zlxml-xmlr   )ÚetreezFound lxml version %sÚ.z.lxml is not installed or couldn't be imported.NzFound html5lib version %sz2html5lib is not installed or couldn't be imported.Úreadzhttp:zhttps:z<"%s" looks like a URL. Beautiful Soup is not an HTTP client.zpYou need to use some other library to get the document behind the URL, and feed that document to Beautiful Soup.z7"%s" looks like a filename. Reading data from the file.z#Trying to parse your markup with %sF)ÚfeaturesTz%s could not parse the markup.z#Here's what %s did with the markup:zP--------------------------------------------------------------------------------)Úprintr   ÚsysÚversionr   Zbuildersr   ÚremoveÚappendr   r	   ÚjoinÚmapÚstrZLXML_VERSIONÚImportErrorr   Úhasattrr   Ú
startswithÚosÚpathÚexistsÚopenÚ
ValueErrorr   Ú	ExceptionÚ	tracebackÚ	print_excZprettify)ÚdataZbasic_parsersÚnameZbuilderr	   Úer   ÚfpÚparserÚsuccessÚsoup© r'   ú+lib/python3.7/site-packages/bs4/diagnose.pyÚdiagnose   sj    








r)   Tc             K   sN   ddl m} x<|jt| ƒfd|i|—ŽD ]\}}td||j|jf ƒ q(W dS )z—Print out the lxml events that occur during parsing.

    This lets you see how lxml parses a document when no Beautiful
    Soup code is running.
    r   )r	   Úhtmlz%s, %4s, %sN)r   r	   Z	iterparser   r   ÚtagÚtext)r    r*   Úkwargsr	   ZeventÚelementr'   r'   r(   Ú
lxml_traceY   s    $r/   c               @   s`   e Zd ZdZdd„ Zdd„ Zdd„ Zdd	„ Zd
d„ Zdd„ Z	dd„ Z
dd„ Zdd„ Zdd„ ZdS )ÚAnnouncingParserz?Announces HTMLParser parse events, without doing anything else.c             C   s   t |ƒ d S )N)r   )ÚselfÚsr'   r'   r(   Ú_pf   s    zAnnouncingParser._pc             C   s   |   d| ¡ d S )Nz%s START)r3   )r1   r!   Zattrsr'   r'   r(   Úhandle_starttagi   s    z AnnouncingParser.handle_starttagc             C   s   |   d| ¡ d S )Nz%s END)r3   )r1   r!   r'   r'   r(   Úhandle_endtagl   s    zAnnouncingParser.handle_endtagc             C   s   |   d| ¡ d S )Nz%s DATA)r3   )r1   r    r'   r'   r(   Úhandle_datao   s    zAnnouncingParser.handle_datac             C   s   |   d| ¡ d S )Nz
%s CHARREF)r3   )r1   r!   r'   r'   r(   Úhandle_charrefr   s    zAnnouncingParser.handle_charrefc             C   s   |   d| ¡ d S )Nz%s ENTITYREF)r3   )r1   r!   r'   r'   r(   Úhandle_entityrefu   s    z!AnnouncingParser.handle_entityrefc             C   s   |   d| ¡ d S )Nz
%s COMMENT)r3   )r1   r    r'   r'   r(   Úhandle_commentx   s    zAnnouncingParser.handle_commentc             C   s   |   d| ¡ d S )Nz%s DECL)r3   )r1   r    r'   r'   r(   Úhandle_decl{   s    zAnnouncingParser.handle_declc             C   s   |   d| ¡ d S )Nz%s UNKNOWN-DECL)r3   )r1   r    r'   r'   r(   Úunknown_decl~   s    zAnnouncingParser.unknown_declc             C   s   |   d| ¡ d S )Nz%s PI)r3   )r1   r    r'   r'   r(   Ú	handle_pi   s    zAnnouncingParser.handle_piN)Ú__name__Ú
__module__Ú__qualname__Ú__doc__r3   r4   r5   r6   r7   r8   r9   r:   r;   r<   r'   r'   r'   r(   r0   c   s   r0   c             C   s   t ƒ }| | ¡ dS )z£Print out the HTMLParser events that occur during parsing.

    This lets you see how HTMLParser parses a document when no
    Beautiful Soup code is running.
    N)r0   Zfeed)r    r$   r'   r'   r(   Úhtmlparser_trace„   s    rA   ZaeiouZbcdfghjklmnpqrstvwxyzé   c             C   s>   d}x4t | ƒD ](}|d dkr$t}nt}|t |¡7 }qW |S )z#Generate a random word-like string.Ú é   r   )ÚrangeÚ_consonantsÚ_vowelsÚrandomÚchoice)Úlengthr2   ÚiÚtr'   r'   r(   Úrword   s    rM   é   c             C   s   d  dd„ t| ƒD ƒ¡S )z'Generate a random sentence-like string.ú c             s   s   | ]}t t d d¡ƒV  qdS )rN   é	   N)rM   rH   Úrandint)Ú.0rK   r'   r'   r(   ú	<genexpr>   s    zrsentence.<locals>.<genexpr>)r   rE   )rJ   r'   r'   r(   Ú	rsentence›   s    rT   éè  c             C   s¨   dddddddg}g }x~t | ƒD ]r}t dd	¡}|dkrRt |¡}| d
| ¡ q |dkrr| tt dd¡ƒ¡ q |dkr t |¡}| d| ¡ q W dd |¡ d S )z+Randomly generate an invalid HTML document.ÚpZdivÚspanrK   ÚbZscriptÚtabler   é   z<%s>é   rN   rD   z</%s>z<html>Ú
z</html>)rE   rH   rQ   rI   r   rT   r   )Únum_elementsZ	tag_namesÚelementsrK   rI   Ztag_namer'   r'   r(   ÚrdocŸ   s    

r_   é † c       
      C   s(  t dt ƒ t| ƒ}t dt|ƒ ƒ xŽdddgddgD ]z}d}y"t ¡ }t||ƒ}t ¡ }d}W n6 tk
r– } zt d	| ƒ t ¡  W d
d
}~X Y nX |r6t d||| f ƒ q6W ddl	m
} t ¡ }| |¡ t ¡ }t d||  ƒ dd
l}	|	 ¡ }t ¡ }| |¡ t ¡ }t d||  ƒ d
S )z.Very basic head-to-head performance benchmark.z1Comparative parser benchmark on Beautiful Soup %sz3Generated a large invalid HTML document (%d bytes).r   r*   r   zhtml.parserFTz%s could not parse the markup.Nz"BS4+%s parsed the markup in %.2fs.r   )r	   z$Raw lxml parsed the markup in %.2fs.z(Raw html5lib parsed the markup in %.2fs.)r   r   r_   ÚlenÚtimer   r   r   r   r   r	   ZHTMLr   r   Úparse)
r]   r    r$   r%   Úar&   rX   r"   r	   r   r'   r'   r(   Úbenchmark_parsers±   s4    


re   r   c             C   sX   t  ¡ }|j}t| ƒ}tt||d}t d|||¡ t 	|¡}| 
d¡ | dd¡ d S )N)Úbs4r    r$   zbs4.BeautifulSoup(data, parser)Z
cumulativez_html5lib|bs4é2   )ÚtempfileZNamedTemporaryFiler!   r_   Údictrf   ÚcProfileZrunctxÚpstatsZStatsZ
sort_statsZprint_stats)r]   r$   Z
filehandleÚfilenamer    ÚvarsZstatsr'   r'   r(   ÚprofileÑ   s    

rn   Ú__main__)T)rB   )rN   )rU   )r`   )r`   r   )!r@   Z__license__rj   Úior   Zhtml.parserr   rf   r   r   Zbs4.builderr   r   rk   rH   rh   rb   r   r   r)   r/   r0   rA   rG   rF   rM   rT   r_   re   rn   r=   Ústdinr   r'   r'   r'   r(   Ú<module>   s8   C

!	



 

