ó
­V]c           @   so  d  Z  d Z d d l Z d d l m Z d d l m Z d d l Z d d l m Z m Z d d l m	 Z	 d d l
 Z
 d d l Z d d l Z d d l Z d d l Z d d l Z d d l Z d d l Z d   Z e d	  Z d
 e f d     YZ d   Z d Z d Z d d  Z d d  Z d d  Z d d  Z d d d  Z e d k rke e j j    n  d S(   s=   Diagnostic functions, mainly for use when doing tech support.t   MITi˙˙˙˙N(   t   StringIO(   t
   HTMLParser(   t   BeautifulSoupt   __version__(   t   builder_registryc         C   sV  d t  GHd t j GHd d d g } xK | D]C } x: t j D] } | | j k r; Pq; q; W| j |  d | GHq+ Wd | k rŰ | j d  y2 d d	 l m	 } d
 d j
 t t | j   GHWqŰ t k
 r× } d GHqŰ Xn  d | k r!y d d l } d | j  GHWq!t k
 r} d GHq!Xn  t |  d  r?|  j   }  n |  j d  s]|  j d  rod |  GHd GHd SyC t j j |   rąd |  GHt |    } | j   }  Wd QXn  Wn t k
 rĹn XHx | D] } d | GHt }	 y t |  d | }
 t }	 Wn& t k
 r'} d | GHt j   n X|	 rEd | GH|
 j   GHn  d d GHqÎWd S(   s/   Diagnostic suite for isolating common problems.s'   Diagnostic running on Beautiful Soup %ss   Python version %ss   html.parsert   html5libt   lxmls;   I noticed that %s is not installed. Installing it may help.s   lxml-xmli˙˙˙˙(   t   etrees   Found lxml version %st   .s.   lxml is not installed or couldn't be imported.Ns   Found html5lib version %ss2   html5lib is not installed or couldn't be imported.t   reads   http:s   https:s<   "%s" looks like a URL. Beautiful Soup is not an HTTP client.sp   You need to use some other library to get the document behind the URL, and feed that document to Beautiful Soup.s7   "%s" looks like a filename. Reading data from the file.s#   Trying to parse your markup with %st   featuress   %s could not parse the markup.s#   Here's what %s did with the markup:t   -iP   (   R   t   syst   versionR   t   buildersR   t   removet   appendR   R   t   joint   mapt   strt   LXML_VERSIONt   ImportErrorR   t   hasattrR
   t
   startswitht   ost   patht   existst   opent
   ValueErrort   FalseR   t   Truet	   Exceptiont	   tracebackt	   print_exct   prettify(   t   datat   basic_parserst   namet   builderR   t   eR   t   fpt   parsert   successt   soup(    (    s+   lib/python2.7/site-packages/bs4/diagnose.pyt   diagnose   sd    	
"			
		c         K   s[   d d l  m } xD | j t |   d | | D]$ \ } } d | | j | j f GHq/ Wd S(   s   Print out the lxml events that occur during parsing.

    This lets you see how lxml parses a document when no Beautiful
    Soup code is running.
    i˙˙˙˙(   R   t   htmls   %s, %4s, %sN(   R   R   t	   iterparseR   t   tagt   text(   R$   R.   t   kwargsR   t   eventt   element(    (    s+   lib/python2.7/site-packages/bs4/diagnose.pyt
   lxml_traceY   s    +t   AnnouncingParserc           B   sh   e  Z d  Z d   Z d   Z d   Z d   Z d   Z d   Z d   Z	 d   Z
 d	   Z d
   Z RS(   s?   Announces HTMLParser parse events, without doing anything else.c         C   s	   | GHd  S(   N(    (   t   selft   s(    (    s+   lib/python2.7/site-packages/bs4/diagnose.pyt   _pf   s    c         C   s   |  j  d |  d  S(   Ns   %s START(   R9   (   R7   R&   t   attrs(    (    s+   lib/python2.7/site-packages/bs4/diagnose.pyt   handle_starttagi   s    c         C   s   |  j  d |  d  S(   Ns   %s END(   R9   (   R7   R&   (    (    s+   lib/python2.7/site-packages/bs4/diagnose.pyt   handle_endtagl   s    c         C   s   |  j  d |  d  S(   Ns   %s DATA(   R9   (   R7   R$   (    (    s+   lib/python2.7/site-packages/bs4/diagnose.pyt   handle_datao   s    c         C   s   |  j  d |  d  S(   Ns
   %s CHARREF(   R9   (   R7   R&   (    (    s+   lib/python2.7/site-packages/bs4/diagnose.pyt   handle_charrefr   s    c         C   s   |  j  d |  d  S(   Ns   %s ENTITYREF(   R9   (   R7   R&   (    (    s+   lib/python2.7/site-packages/bs4/diagnose.pyt   handle_entityrefu   s    c         C   s   |  j  d |  d  S(   Ns
   %s COMMENT(   R9   (   R7   R$   (    (    s+   lib/python2.7/site-packages/bs4/diagnose.pyt   handle_commentx   s    c         C   s   |  j  d |  d  S(   Ns   %s DECL(   R9   (   R7   R$   (    (    s+   lib/python2.7/site-packages/bs4/diagnose.pyt   handle_decl{   s    c         C   s   |  j  d |  d  S(   Ns   %s UNKNOWN-DECL(   R9   (   R7   R$   (    (    s+   lib/python2.7/site-packages/bs4/diagnose.pyt   unknown_decl~   s    c         C   s   |  j  d |  d  S(   Ns   %s PI(   R9   (   R7   R$   (    (    s+   lib/python2.7/site-packages/bs4/diagnose.pyt	   handle_pi   s    (   t   __name__t
   __module__t   __doc__R9   R;   R<   R=   R>   R?   R@   RA   RB   RC   (    (    (    s+   lib/python2.7/site-packages/bs4/diagnose.pyR6   c   s   									c         C   s   t    } | j |   d S(   sŁ   Print out the HTMLParser events that occur during parsing.

    This lets you see how HTMLParser parses a document when no
    Beautiful Soup code is running.
    N(   R6   t   feed(   R$   R*   (    (    s+   lib/python2.7/site-packages/bs4/diagnose.pyt   htmlparser_trace   s    	t   aeiout   bcdfghjklmnpqrstvwxyzi   c         C   sS   d } xF t  |   D]8 } | d d k r2 t } n t } | t j |  7} q W| S(   s#   Generate a random word-like string.t    i   i    (   t   ranget   _consonantst   _vowelst   randomt   choice(   t   lengthR8   t   it   t(    (    s+   lib/python2.7/site-packages/bs4/diagnose.pyt   rword   s    	i   c         C   s   d j  d   t |   D  S(   s'   Generate a random sentence-like string.t    c         s   s'   |  ] } t  t j d  d   Vq d S(   i   i	   N(   RT   RO   t   randint(   t   .0RR   (    (    s+   lib/python2.7/site-packages/bs4/diagnose.pys	   <genexpr>   s    (   R   RL   (   RQ   (    (    s+   lib/python2.7/site-packages/bs4/diagnose.pyt	   rsentence   s    ič  c         C   së   d d d d d d d g } g  } x˛ t  |   D]¤ } t j d d	  } | d k ru t j |  } | j d
 |  q. | d k rŁ | j t t j d d    q. | d k r. t j |  } | j d |  q. q. Wd d j |  d S(   s+   Randomly generate an invalid HTML document.t   pt   divt   spanRR   t   bt   scriptt   tablei    i   s   <%s>i   i   i   s   </%s>s   <html>s   
s   </html>(   RL   RO   RV   RP   R   RX   R   (   t   num_elementst	   tag_namest   elementsRR   RP   t   tag_name(    (    s+   lib/python2.7/site-packages/bs4/diagnose.pyt   rdoc   s    "i  c   
      C   sS  d t  GHt |   } d t |  GHx d d d g d d g D] } t } y1 t j   } t | |  } t j   } t } Wn& t k
 r˘ } d | GHt j	   n X| r= d | | | f GHq= q= Wd	 d
 l
 m } t j   } | j |  t j   } d | | GHd	 d l }	 |	 j   } t j   } | j |  t j   } d | | GHd S(   s.   Very basic head-to-head performance benchmark.s1   Comparative parser benchmark on Beautiful Soup %ss3   Generated a large invalid HTML document (%d bytes).R   R.   R   s   html.parsers   %s could not parse the markup.s"   BS4+%s parsed the markup in %.2fs.i˙˙˙˙(   R   s$   Raw lxml parsed the markup in %.2fs.Ns(   Raw html5lib parsed the markup in %.2fs.(   R   Rc   t   lenR   t   timeR   R   R    R!   R"   R   R   t   HTMLR   R   t   parse(
   R_   R$   R*   R+   t   aR,   R\   R(   R   R   (    (    s+   lib/python2.7/site-packages/bs4/diagnose.pyt   benchmark_parsersą   s4    	
	R   c         C   s   t  j   } | j } t |   } t d t d | d |  } t j d | | |  t j	 |  } | j
 d  | j d d  d  S(   Nt   bs4R$   R*   s   bs4.BeautifulSoup(data, parser)t
   cumulatives   _html5lib|bs4i2   (   t   tempfilet   NamedTemporaryFileR&   Rc   t   dictRj   t   cProfilet   runctxt   pstatst   Statst
   sort_statst   print_stats(   R_   R*   t
   filehandlet   filenameR$   t   varst   stats(    (    s+   lib/python2.7/site-packages/bs4/diagnose.pyt   profileŃ   s    	t   __main__(    RF   t   __license__Ro   R   R   Rj   R   R   t   bs4.builderR   R   Rq   RO   Rl   Re   R!   R   R-   R   R5   R6   RH   RN   RM   RT   RX   Rc   Ri   Ry   RD   t   stdinR
   (    (    (    s+   lib/python2.7/site-packages/bs4/diagnose.pyt   <module>   s8   	C
!		 