B
    j9C\X              
   @   sJ  d Z yddlmZ W n. ek
rB Z zddlmZ W ddZ[X Y nX ddlZddlZddlZyddl	Z	W n0 ek
r Z zdZ	e
d W ddZ[X Y nX ddlmZ dZejd dkZedZed	Zd
d ZG dd deZG dd deZG dd deZG dd deZG dd deZG dd deZG dd deZG dd deZG dd deZG dd deZG d d! d!eZ G d"d# d#eZ!G d$d% d%ee!Z"G d&d' d'e"Z#G d(d) d)e#Z$G d*d+ d+e#Z%G d,d- d-e%Z&G d.d/ d/e#Z'G d0d1 d1e#Z(G d2d3 d3e#Z)G d4d5 d5e!Z*G d6d7 d7eZ+G d8d9 d9e,Z-dS ):ZMIT    )CallableNzEThe soupsieve package is not installed. CSS selectors cannot be used.)EntitySubstitutionzutf-8   z\S+z\s+c                s&   t  fdd}|j fdd}|S )z>Alias one attribute name to another for backward compatibilityc                s
   t |  S )N)getattr)self)attr *lib/python3.7/site-packages/bs4/element.pyalias    s    z_alias.<locals>.aliasc                s
   t |  S )N)setattr)r   )r   r   r	   r
   $   s    )propertysetter)r   r
   r   )r   r	   _alias   s    r   c               @   s   e Zd ZdddZdS )NamespacedAttributeNc             C   sV   |d krt | |}n*|d kr,t | |}nt | |d | }||_||_||_|S )N:)str__new__prefixname	namespace)clsr   r   r   objr   r   r	   r   ,   s    zNamespacedAttribute.__new__)N)__name__
__module____qualname__r   r   r   r   r	   r   *   s   r   c               @   s   e Zd ZdZdS )%AttributeValueWithCharsetSubstitutionz=A stand-in object for a character encoding specified in HTML.N)r   r   r   __doc__r   r   r   r	   r   9   s   r   c               @   s    e Zd ZdZdd Zdd ZdS )CharsetMetaAttributeValuezA generic stand-in for the value of a meta tag's 'charset' attribute.

    When Beautiful Soup parses the markup '<meta charset="utf8">', the
    value of the 'charset' attribute will be one of these objects.
    c             C   s   t | |}||_|S )N)r   r   original_value)r   r   r   r   r   r	   r   C   s    z!CharsetMetaAttributeValue.__new__c             C   s   |S )Nr   )r   encodingr   r   r	   encodeH   s    z CharsetMetaAttributeValue.encodeN)r   r   r   r   r   r    r   r   r   r	   r   <   s   r   c               @   s.   e Zd ZdZedejZdd Zdd Z	dS )ContentMetaAttributeValuea  A generic stand-in for the value of a meta tag's 'content' attribute.

    When Beautiful Soup parses the markup:
     <meta http-equiv="content-type" content="text/html; charset=utf8">

    The value of the 'content' attribute will be one of these objects.
    z((^|;)\s*charset=)([^;]*)c             C   s6   | j |}|d kr tt|S t| |}||_|S )N)
CHARSET_REsearchr   r   r   )r   r   matchr   r   r   r	   r   W   s    z!ContentMetaAttributeValue.__new__c                s    fdd}| j || jS )Nc                s   |  d  S )N   )group)r$   )r   r   r	   rewriteb   s    z1ContentMetaAttributeValue.encode.<locals>.rewrite)r"   subr   )r   r   r'   r   )r   r	   r    a   s    z ContentMetaAttributeValue.encodeN)
r   r   r   r   recompileMr"   r   r    r   r   r   r	   r!   L   s   
r!   c               @   sV   e Zd ZdZeddgZedgZeddgZedd Z	edd	 Z
ed
d ZdS )HTMLAwareEntitySubstitutiona%  Entity substitution rules that are aware of some HTML quirks.

    Specifically, the contents of <script> and <style> tags should not
    undergo entity substitution.

    Incoming NavigableString objects are checked to see if they're the
    direct children of a <script> or <style> tag.
    ZscriptZstylepreZtextareac             C   s.   t |tr&|jd k	r&|jj| jkr&|S ||S )N)
isinstanceNavigableStringparentr   cdata_containing_tags)r   nsfr   r   r	   _substitute_if_appropriatew   s
    

z6HTMLAwareEntitySubstitution._substitute_if_appropriatec             C   s   |  |tjS )N)r4   r   substitute_html)r   r2   r   r   r	   r5      s    z+HTMLAwareEntitySubstitution.substitute_htmlc             C   s   |  |tjS )N)r4   r   substitute_xml)r   r2   r   r   r	   r6      s    z*HTMLAwareEntitySubstitution.substitute_xmlN)r   r   r   r   setr1   Zpreformatted_tagspreserve_whitespace_tagsclassmethodr4   r5   r6   r   r   r   r	   r,   f   s   	

r,   c               @   s   e Zd ZdZdZdd ZdS )	Formatterz6Contains information about how to format a parse tree./c             O   s
   t  dS )z1Transform certain characters into named entities.N)NotImplementedError)r   argskwargsr   r   r	   substitute_entities   s    zFormatter.substitute_entitiesN)r   r   r   r   void_element_close_prefixr?   r   r   r   r	   r:      s   r:   c               @   s   e Zd ZdZdd ZdS )HTMLFormatterzThe default HTML formatter.c             O   s   t j||S )N)r,   r5   )r   r=   r>   r   r   r	   
substitute   s    zHTMLFormatter.substituteN)r   r   r   r   rB   r   r   r   r	   rA      s   rA   c               @   s   e Zd ZdZdd ZdS )MinimalHTMLFormatterzA minimal HTML formatter.c             O   s   t j||S )N)r,   r6   )r   r=   r>   r   r   r	   rB      s    zMinimalHTMLFormatter.substituteN)r   r   r   r   rB   r   r   r   r	   rC      s   rC   c               @   s   e Zd ZdZdZdS )HTML5Formatterz5An HTML formatter that omits the slash in a void tag.N)r   r   r   r   r@   r   r   r   r	   rD      s   rD   c               @   s   e Zd ZdZdd ZdS )XMLFormatterz+Substitute only the essential XML entities.c             O   s   t j||S )N)r   r6   )r   r=   r>   r   r   r	   rB      s    zXMLFormatter.substituteN)r   r   r   r   rB   r   r   r   r	   rE      s   rE   c               @   s   e Zd ZdZdd ZdS )HTMLXMLFormatterzFormat XML using HTML rules.c             O   s   t j||S )N)r,   r5   )r   r=   r>   r   r   r	   rB      s    zHTMLXMLFormatter.substituteN)r   r   r   r   rB   r   r   r   r	   rF      s   rF   c               @   s8  e Zd ZdZe e e ddZe e	 ddZ
dUddZedd	 Zd
d ZdVddZedZedZdd ZeZdd ZeZeZdd Zdd ZdWddZeZdd Zdd Zdd  Zd!d" Zd#d$ Z di dfd%d&Z!e!Z"di ddfd'd(Z#e#Z$di dfd)d*Z%e%Z&di ddfd+d,Z'e'Z(e'Z)di dfd-d.Z*e*Z+di ddfd/d0Z,e,Z-e,Z.di dfd1d2Z/e/Z0di ddfd3d4Z1e1Z2e1Z3di fd5d6Z4e4Z5di dfd7d8Z6e6Z7e6Z8ed9d: Z9ed;d< Z:d=d> Z;d?d@ Z<edAdB Z=edCdD Z>edEdF Z?edGdH Z@edIdJ ZAdKdL ZBdMdN ZCdOdP ZDdQdR ZEdSdT ZFdS )XPageElementzeContains the navigational information for some part of the page
    (either a tag or a piece of text)N)htmlZhtml5minimalN)rH   rI   NrI   c             C   sD   t |tr| |}|dkr"|}nt |tr6||}n
||}|S )z2Format the given string using the given formatter.N)r.   r   _formatter_for_namer   rB   )r   s	formatteroutputr   r   r	   format_string   s    




zPageElement.format_stringc             C   s.   | j dk	r| j S | jdkr&t| ddS | jjS )aJ  Is this element part of an XML tree or an HTML tree?

        This is used when mapping a formatter name ("minimal") to an
        appropriate function (one that performs entity-substitution on
        the contents of <script> and <style> tags, or not). It can be
        inefficient, but it should be called very rarely.
        Nis_xmlF)	known_xmlr0   r   _is_xml)r   r   r   r	   rQ      s
    	

zPageElement._is_xmlc             C   s*   | j r| j|t S | j|t S dS )z<Look up a formatter function based on its name and the tree.N)rQ   XML_FORMATTERSgetrE   HTML_FORMATTERSrA   )r   r   r   r   r	   rJ      s    zPageElement._formatter_for_namec             C   s   || _ || _|dk	r| | j_|| _| jdk	r4| | j_|| _| jdk	rL| | j_|dkrr| j dk	rr| j jrr| j jd }|| _|dk	r| | j_dS )zNSets up the initial relations between this element and
        other elements.N)r0   previous_elementnext_elementnext_siblingprevious_siblingcontents)r   r0   rV   rW   rY   rX   r   r   r	   setup  s     

zPageElement.setuprX   rY   c             C   sZ   | j d krtd|| krd S || j kr0td| j }| j | }|   ||| | S )Nz]Cannot replace one element with another when theelement to be replaced is not part of a tree.z%Cannot replace a Tag with its parent.)r0   
ValueErrorindexextractinsert)r   replace_withZ
old_parentmy_indexr   r   r	   r`     s    

zPageElement.replace_withc             C   sX   | j }| j d krtd| j | }|   x&t| jd d  D ]}||| q@W | S )NzSCannot replace an element with its contents when thatelement is not part of a tree.)r0   r\   r]   r^   reversedrZ   r_   )r   Z	my_parentra   childr   r   r	   unwrap/  s    
zPageElement.unwrapc             C   s   |  |}|| |S )N)r`   append)r   Zwrap_insidemer   r   r	   wrap=  s    

zPageElement.wrapc             C   s   | j dk	r| j j| j | = |  }|j}| jdk	rF| j|k	rF|| j_|dk	r`|| jk	r`| j|_d| _d|_d| _ | jdk	r| j| jk	r| j| j_| jdk	r| j| jk	r| j| j_d | _| _| S )z0Destructively rips this element out of the tree.N)r0   rZ   r]   _last_descendantrW   rV   rY   rX   )r   
last_childrW   r   r   r	   r^   B  s(    






zPageElement.extractTc             C   sP   |r| j dk	r| j j}n$| }xt|tr:|jr:|jd }qW |sL|| krLd}|S )z8Finds the last element beneath this object to be parsed.NrU   )rX   rV   r.   TagrZ   )r   Zis_initializedZaccept_selfri   r   r   r	   rh   _  s    
zPageElement._last_descendantc             C   s  |d krt d|| kr t dt|tr<t|ts<t|}ddlm} t||r~x&t|jD ]}| || |d7 }q^W d S t	|t
| j}t|dr|jd k	r|j| kr| |}||k r|d8 }|  | |_d }|dkrd |_| |_n(| j|d  }||_||j_|d|_|jd k	r*||j_|d}|t
| jkrd |_| }d }	x2|	d kr|d k	r|j}	|j}|	d k	rTP qTW |	d k	r|	|_nd |_n*| j| }
|
|_|jd k	r||j_|
|_|jd k	r||j_| j|| d S )NzCannot insert None into a tag.z Cannot insert a tag into itself.r   )BeautifulSoupr%   r0   F)r\   r.   r   r/   Zbs4rk   listrZ   r_   minlenhasattrr0   r]   r^   rY   rV   rX   rh   rW   )r   Zposition	new_childrk   ZsubchildZcurrent_indexZprevious_childZnew_childs_last_elementr0   Zparents_next_siblingZ
next_childr   r   r	   r_   m  sh    








zPageElement.insertc             C   s   |  t| j| dS )z2Appends the given tag to the contents of this tag.N)r_   rn   rZ   )r   tagr   r   r	   re     s    zPageElement.appendc             C   s   x|D ]}|  | qW dS )z3Appends the given tags to the contents of this tag.N)re   )r   Ztagsrq   r   r   r	   extend  s    
zPageElement.extendc                sn    j }|dkrtdt fdd|D r4tdx4|D ],}t|trP|  | }||| q:W dS )zMakes the given element(s) the immediate predecessor of this one.

        The elements will have the same parent, and the given elements
        will be immediately before this one.
        Nz2Element has no parent, so 'before' has no meaning.c             3   s   | ]}| kV  qd S )Nr   ).0x)r   r   r	   	<genexpr>  s    z,PageElement.insert_before.<locals>.<genexpr>z&Can't insert an element before itself.)r0   r\   anyr.   rG   r^   r]   r_   )r   r=   r0   Zpredecessorr]   r   )r   r	   insert_before  s    


zPageElement.insert_beforec                s    j }|dkrtdt fdd|D r4tdd}xD|D ]<}t|trT|  | }||d | | |d7 }q>W dS )zMakes the given element(s) the immediate successor of this one.

        The elements will have the same parent, and the given elements
        will be immediately after this one.
        Nz1Element has no parent, so 'after' has no meaning.c             3   s   | ]}| kV  qd S )Nr   )rs   rt   )r   r   r	   ru     s    z+PageElement.insert_after.<locals>.<genexpr>z%Can't insert an element after itself.r   r%   )r0   r\   rv   r.   rG   r^   r]   r_   )r   r=   r0   offsetZ	successorr]   r   )r   r	   insert_after  s    


zPageElement.insert_afterc             K   s   | j | j|||f|S )zjReturns the first item that matches the given criteria and
        appears after this Tag in the document.)	_find_onefind_all_next)r   r   attrstextr>   r   r   r	   	find_next  s    zPageElement.find_nextc             K   s   | j ||||| jf|S )zbReturns all items that match the given criteria and appear
        after this Tag in the document.)	_find_allnext_elements)r   r   r|   r}   limitr>   r   r   r	   r{     s    zPageElement.find_all_nextc             K   s   | j | j|||f|S )z{Returns the closest sibling to this Tag that matches the
        given criteria and appears after this Tag in the document.)rz   find_next_siblings)r   r   r|   r}   r>   r   r   r	   find_next_sibling  s    zPageElement.find_next_siblingc             K   s   | j ||||| jf|S )zqReturns the siblings of this Tag that match the given
        criteria and appear after this Tag in the document.)r   next_siblings)r   r   r|   r}   r   r>   r   r   r	   r     s    zPageElement.find_next_siblingsc             K   s   | j | j|||f|S )zkReturns the first item that matches the given criteria and
        appears before this Tag in the document.)rz   find_all_previous)r   r   r|   r}   r>   r   r   r	   find_previous
  s    zPageElement.find_previousc             K   s   | j ||||| jf|S )zcReturns all items that match the given criteria and appear
        before this Tag in the document.)r   previous_elements)r   r   r|   r}   r   r>   r   r   r	   r     s    zPageElement.find_all_previousc             K   s   | j | j|||f|S )z|Returns the closest sibling to this Tag that matches the
        given criteria and appears before this Tag in the document.)rz   find_previous_siblings)r   r   r|   r}   r>   r   r   r	   find_previous_sibling  s    z!PageElement.find_previous_siblingc             K   s   | j ||||| jf|S )zrReturns the siblings of this Tag that match the given
        criteria and appear before this Tag in the document.)r   previous_siblings)r   r   r|   r}   r   r>   r   r   r	   r   !  s    z"PageElement.find_previous_siblingsc             K   s&   d}| j ||df|}|r"|d }|S )zOReturns the closest parent of this Tag that matches the given
        criteria.Nr%   r   )find_parents)r   r   r|   r>   rlr   r   r	   find_parent*  s
    zPageElement.find_parentc             K   s   | j ||d|| jf|S )zFReturns the parents of this Tag that match the given
        criteria.N)r   parents)r   r   r|   r   r>   r   r   r	   r   6  s    zPageElement.find_parentsc             C   s   | j S )N)rW   )r   r   r   r	   next?  s    zPageElement.nextc             C   s   | j S )N)rV   )r   r   r   r	   previousC  s    zPageElement.previousc             K   s&   d }||||df|}|r"|d }|S )Nr%   r   r   )r   methodr   r|   r}   r>   r   r   r   r   r	   rz   I  s
    zPageElement._find_onec                s0  |dkrd|kr|d }|d= t tr.}nt||f|}|dkr|s|s|s̈dksbdkrzdd |D }t||S t tr̈ddkrdd\ nd  fdd|D }t||S t|}	xVyt|}
W n tk
r   P Y nX |
r||
}|r|		| |rt
|	|krP qW |	S )	z8Iterates over a generator looking for things that match.NstringTc             s   s   | ]}t |tr|V  qd S )N)r.   rj   )rs   elementr   r   r	   ru   _  s    z(PageElement._find_all.<locals>.<genexpr>r   r%   c             3   sB   | ]:}t |tr|jks6|j krd ks6|jkr|V  qd S )N)r.   rj   r   r   )rs   r   )
local_namer   r   r   r	   ru   l  s
    


)r.   SoupStrainer	ResultSetr   countsplitr   StopIterationr#   re   rn   )r   r   r|   r}   r   	generatorr>   ZstrainerresultZresultsifoundr   )r   r   r   r	   r   P  s<    


	


zPageElement._find_allc             c   s$   | j }x|d k	r|V  |j }qW d S )N)rW   )r   r   r   r   r	   r     s    
zPageElement.next_elementsc             c   s$   | j }x|d k	r|V  |j }qW d S )N)rX   )r   r   r   r   r	   r     s    
zPageElement.next_siblingsc             c   s$   | j }x|d k	r|V  |j }qW d S )N)rV   )r   r   r   r   r	   r     s    
zPageElement.previous_elementsc             c   s$   | j }x|d k	r|V  |j }qW d S )N)rY   )r   r   r   r   r	   r     s    
zPageElement.previous_siblingsc             c   s$   | j }x|d k	r|V  |j }qW d S )N)r0   )r   r   r   r   r	   r     s    
zPageElement.parentsc             C   s   | j S )N)r   )r   r   r   r	   nextGenerator  s    zPageElement.nextGeneratorc             C   s   | j S )N)r   )r   r   r   r	   nextSiblingGenerator  s    z PageElement.nextSiblingGeneratorc             C   s   | j S )N)r   )r   r   r   r	   previousGenerator  s    zPageElement.previousGeneratorc             C   s   | j S )N)r   )r   r   r   r	   previousSiblingGenerator  s    z$PageElement.previousSiblingGeneratorc             C   s   | j S )N)r   )r   r   r   r	   parentGenerator  s    zPageElement.parentGenerator)rI   )NNNNN)TT)Gr   r   r   r   rA   rD   rC   rT   rF   rE   rR   rN   r   rQ   rJ   r[   r   ZnextSiblingZpreviousSiblingr`   ZreplaceWithrd   Zreplace_with_childrenZreplaceWithChildrenrg   r^   rh   Z_lastRecursiveChildr_   re   rr   rw   ry   r~   ZfindNextr{   ZfindAllNextr   ZfindNextSiblingr   ZfindNextSiblingsZfetchNextSiblingsr   ZfindPreviousr   ZfindAllPreviousZfetchPreviousr   ZfindPreviousSiblingr   ZfindPreviousSiblingsZfetchPreviousSiblingsr   Z
findParentr   ZfindParentsZfetchParentsr   r   rz   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r	   rG      s   
 

J
6	rG   c               @   s\   e Zd ZdZdZdZdd Zdd Zdd Zd	d
 Z	dddZ
edd Zejdd ZdS )r/    Nc             C   s2   t |trt| |}nt| |t}|  |S )a-  Create a new NavigableString.

        When unpickling a NavigableString, this method is called with
        the string in DEFAULT_OUTPUT_ENCODING. That encoding needs to be
        passed in to the superclass's __new__ or the superclass won't know
        how to handle non-ASCII characters.
        )r.   r   r   DEFAULT_OUTPUT_ENCODINGr[   )r   valueur   r   r	   r     s
    
zNavigableString.__new__c             C   s   t | | S )zA copy of a NavigableString has the same contents and class
        as the original, but it is not connected to the parse tree.
        )type)r   r   r   r	   __copy__  s    zNavigableString.__copy__c             C   s
   t | fS )N)r   )r   r   r   r	   __getnewargs__  s    zNavigableString.__getnewargs__c             C   s$   |dkr| S t d| jj|f dS )ztext.string gives you text. This is for backwards
        compatibility for Navigable*String, but for CData* it lets you
        get the string without the CData wrapper.r   z!'%s' object has no attribute '%s'N)AttributeError	__class__r   )r   r   r   r   r	   __getattr__  s
    zNavigableString.__getattr__rI   c             C   s   |  | |}| j| | j S )N)rN   PREFIXSUFFIX)r   rL   rM   r   r   r	   output_ready  s    zNavigableString.output_readyc             C   s   d S )Nr   )r   r   r   r	   r     s    zNavigableString.namec             C   s   t dd S )Nz)A NavigableString cannot be given a name.)r   )r   r   r   r   r	   r     s    )rI   )r   r   r   r   r   rP   r   r   r   r   r   r   r   r   r   r   r   r	   r/     s   
r/   c               @   s   e Zd ZdZdddZdS )PreformattedStringzA NavigableString not subject to the normal formatting rules.

    The string will be passed into the formatter (to trigger side effects),
    but the return value will be ignored.
    rI   c             C   s   |  | | | j|  | j S )zUCData strings are passed into the formatter.
        But the return value is ignored.)rN   r   r   )r   rL   r   r   r	   r     s    zPreformattedString.output_readyN)rI   )r   r   r   r   r   r   r   r   r	   r     s   r   c               @   s   e Zd ZdZdZdS )CDataz	<![CDATA[z]]>N)r   r   r   r   r   r   r   r   r	   r     s   r   c               @   s   e Zd ZdZdZdZdS )ProcessingInstructionzA SGML processing instruction.z<?>N)r   r   r   r   r   r   r   r   r   r	   r     s   r   c               @   s   e Zd ZdZdZdZdS )XMLProcessingInstructionzAn XML processing instruction.z<?z?>N)r   r   r   r   r   r   r   r   r   r	   r     s   r   c               @   s   e Zd ZdZdZdS )Commentz<!--z-->N)r   r   r   r   r   r   r   r   r	   r     s   r   c               @   s   e Zd ZdZdZdS )Declarationz<?z?>N)r   r   r   r   r   r   r   r   r	   r     s   r   c               @   s    e Zd Zedd ZdZdZdS )Doctypec             C   sN   |pd}|d k	r2|d| 7 }|d k	rF|d| 7 }n|d k	rF|d| 7 }t |S )Nr   z PUBLIC "%s"z "%s"z SYSTEM "%s")r   )r   r   Zpub_idZ	system_idr   r   r   r	   for_name_and_ids  s    zDoctype.for_name_and_idsz
<!DOCTYPE z>
N)r   r   r   r9   r   r   r   r   r   r   r	   r     s   r   c            	   @   s2  e Zd ZdZddddZedZdd Zedd	 Z	e	Z
ed
d Zejdd ZdeeffddZeeZedd ZddeeffddZeZeeZdd ZdeddZdd ZdfddZdgddZdd  Zd!d" Zd#d$ Zd%d& Zd'd( Zd)d* Zd+d, Z d-d. Z!d/d0 Z"d1d2 Z#d3d4 Z$d5d6 Z%d7d8 Z&dhd:d;Z'd<d= Z(d>d? Z)e*rTe( Z)Z'e+dd@dAfdBdCZ,dDdE Z-de+d@fdFdGZ.didHdIZ/de+d@fdJdKZ0de+d@fdLdMZ1e+ddNfdOdPZ2di dQdfdRdSZ3e3Z4di dQddfdTdUZ5e5Z6e5Z7edVdW Z8edXdY Z9djdZd[Z:dkd\d]Z;d^d_ Z<d`da Z=dbdc Z>dS )lrj   z=Represents a found HTML tag with its attributes and contents.Nc
             C   s   |dkrd| _ n|j| _ |dkr(td|| _|| _|| _|dk	rJ|j}
n|	rTg }
ntj}
|
| _|dkrni }n4|r|dk	r|jr|	| j|}qt
|}nt
|}|r|j| _n|	| _|| _g | _| || d| _|dk	r||  ||| _nd| _dS )zBasic constructor.Nz%No value provided for new tag's name.F)parser_classr   r\   r   r   r   r8   r,   Zcdata_list_attributesZ$_replace_cdata_list_attribute_valuesdictrO   rP   r|   rZ   r[   hiddenZset_up_substitutionscan_be_empty_element)r   parserbuilderr   r   r   r|   r0   r   rO   r8   r   r   r	   __init__2  sB    


zTag.__init__r   c          	   C   sh   t | d| j| j| j| j| j| jd}xdD ]}t||t| | q,W x| j	D ]}|
|  qNW |S )zA copy of a Tag is a new Tag, unconnected to the parse tree.
        Its contents are a copy of the old Tag's contents.
        N)rO   )r   r   )r   r   r   r   r   r|   rQ   r   r   rZ   re   r   )r   Zcloner   rc   r   r   r	   r   i  s    
zTag.__copy__c             C   s   t | jdko| jS )a7  Is this tag an empty-element tag? (aka a self-closing tag)

        A tag that has contents is never an empty-element tag.

        A tag that has no contents may or may not be an empty-element
        tag. It depends on the builder used to create the tag. If the
        builder has a designated list of empty-element tags, then only
        a tag whose name shows up in that list is considered an
        empty-element tag.

        If the builder has no designated list of empty-element tags,
        then any tag with no contents is an empty-element tag.
        r   )rn   rZ   r   )r   r   r   r	   is_empty_elementu  s    zTag.is_empty_elementc             C   s0   t | jdkrdS | jd }t|tr*|S |jS )aq  Convenience property to get the single string within this tag.

        :Return: If this tag has a single string child, return value
         is that string. If this tag has no children, or more than one
         child, return value is None. If this tag has one child tag,
         return value is the 'string' attribute of the child tag,
         recursively.
        r%   Nr   )rn   rZ   r.   r/   r   )r   rc   r   r   r	   r     s    


z
Tag.stringc             C   s   |    | || d S )N)clearre   r   )r   r   r   r   r	   r     s    Fc             c   s\   xV| j D ]L}|dkrt|tr|dk	r4t||kr4q|rN| }t|dkrNq|V  qW dS )zYield all strings of certain classes, possibly stripping them.

        By default, yields only NavigableString and CData objects. So
        no comments, processing instructions, etc.
        Nr   )descendantsr.   r/   r   striprn   )r   r   typesZ
descendantr   r   r	   _all_strings  s    zTag._all_stringsc             c   s   x|  dD ]
}|V  qW d S )NT)r   )r   r   r   r   r	   stripped_strings  s    zTag.stripped_stringsr   c             C   s   | dd | j||dD S )zP
        Get all child strings, concatenated using the given separator.
        c             S   s   g | ]}|qS r   r   )rs   rK   r   r   r	   
<listcomp>  s    z Tag.get_text.<locals>.<listcomp>)r   )joinr   )r   Z	separatorr   r   r   r   r	   get_text  s    zTag.get_textc             C   s8   |    | }x&|dk	r2|j}|j  g |_|}qW dS )z/Recursively destroys the contents of this tree.N)r^   rW   __dict__r   rZ   )r   r   r   r   r   r	   	decompose  s    

zTag.decomposec             C   s^   |r:xT| j dd D ] }t|tr,|  q|  qW n x| j dd D ]}|  qJW dS )zP
        Extract all children. If decompose is True, decompose instead.
        N)rZ   r.   rj   r   r^   )r   r   r   r   r   r	   r     s    

z	Tag.clearc             C   s0   x"t | jD ]\}}||kr|S qW tddS )z
        Find the index of a child by identity, not value. Avoids issues with
        tag.contents.index(element) getting the index of equal elements.
        zTag.index: element not in tagN)	enumeraterZ   r\   )r   r   r   rc   r   r   r	   r]     s    z	Tag.indexc             C   s   | j ||S )zReturns the value of the 'key' attribute for the tag, or
        the value given for 'default' if it doesn't have that
        attribute.)r|   rS   )r   keydefaultr   r   r	   rS     s    zTag.getc             C   s    |  ||}t|ts|g}|S )z-The same as get(), but always returns a list.)rS   r.   rl   )r   r   r   r   r   r   r	   get_attribute_list  s    
zTag.get_attribute_listc             C   s
   || j kS )N)r|   )r   r   r   r   r	   has_attr  s    zTag.has_attrc             C   s   t |  S )N)r   __hash__)r   r   r   r	   r     s    zTag.__hash__c             C   s
   | j | S )zqtag[key] returns the value of the 'key' attribute for the tag,
        and throws an exception if it's not there.)r|   )r   r   r   r   r	   __getitem__  s    zTag.__getitem__c             C   s
   t | jS )z0Iterating over a tag iterates over its contents.)iterrZ   )r   r   r   r	   __iter__  s    zTag.__iter__c             C   s
   t | jS )z:The length of a tag is the length of its list of contents.)rn   rZ   )r   r   r   r	   __len__  s    zTag.__len__c             C   s
   || j kS )N)rZ   )r   rt   r   r   r	   __contains__  s    zTag.__contains__c             C   s   dS )z-A tag is non-None even if it has no contents.Tr   )r   r   r   r	   __bool__  s    zTag.__bool__c             C   s   || j |< dS )zKSetting tag[key] sets the value of the 'key' attribute for the
        tag.N)r|   )r   r   r   r   r   r	   __setitem__	  s    zTag.__setitem__c             C   s   | j |d dS )z;Deleting tag[key] deletes all 'key' attributes for the tag.N)r|   pop)r   r   r   r   r	   __delitem__  s    zTag.__delitem__c             O   s   | j ||S )zCalling a tag like a function is the same as calling its
        find_all() method. Eg. tag('a') returns a list of all the A tags
        found within this tag.)find_all)r   r=   r>   r   r   r	   __call__  s    zTag.__call__c             C   sr   t |dkr@|dr@|d d }tdt|d  | |S |ds\|dks\| |S td| j|f d S )	N   rj   z.%(name)sTag is deprecated, use .find("%(name)s") instead. If you really were looking for a tag called %(name)sTag, use .find("%(name)sTag"))r   __rZ   z!'%s' object has no attribute '%s')	rn   endswithwarningswarnr   find
startswithr   r   )r   rq   Ztag_namer   r   r	   r     s    

zTag.__getattr__c             C   s   | |krdS t |drRt |drRt |drR| j|jksR| j|jksRt| t|krVdS x(t| jD ]\}}||j| krbdS qbW dS )zReturns true iff this tag has the same name, the same attributes,
        and the same contents (recursively) as the given tag.Tr   r|   rZ   F)ro   r   r|   rn   r   rZ   )r   otherr   Zmy_childr   r   r	   __eq__)  s    


z
Tag.__eq__c             C   s
   | |k S )zZReturns true iff this tag is not identical to the other tag,
        as defined in __eq__.r   )r   r   r   r   r	   __ne__:  s    z
Tag.__ne__unicode-escapec             C   s   t r|  S | |S dS )zRenders this tag as a string.N)PY3Kdecoder    )r   r   r   r   r	   __repr__?  s    zTag.__repr__c             C   s   |   S )N)r   )r   r   r   r	   __unicode__J  s    zTag.__unicode__c             C   s   t r|  S |  S d S )N)r   r   r    )r   r   r   r	   __str__M  s    zTag.__str__rI   xmlcharrefreplacec             C   s   |  |||}|||S )N)r   r    )r   r   indent_levelrL   errorsr   r   r   r	   r    V  s    z
Tag.encodec             C   s   |dk	o| j | jkS )z"Should this tag be pretty-printed?N)r   r8   )r   r   r   r   r	   _should_pretty_print^  s    zTag._should_pretty_printc             C   sP  t |tst |ts| |}g }| jrxt| j D ]\}}|dkrN|}nrt |tsbt |trnd	|}n0t |t
st
|}nt |tr|dk	r||}| ||}t
|d t| }|| q8W d}	d}
d}| jr| jd }| jrd}	t |tr|jp|	}	nd|| jf }
| |}d}d}|dk	rFd|d  }|rZ|}|d }nd}| |||}| jrz|}ng }d}|rdd	| }|dk	r|| |d|| j||	f  |r|d	 || |r |r |d
 d	kr |d	 |r|
r|| ||
 |dk	rB|
rB| jrB|d	 d	|}|S )a  Returns a Unicode representation of this tag and its contents.

        :param eventual_encoding: The tag is destined to be
           encoded into this encoding. This method is _not_
           responsible for performing that encoding. This information
           is passed in so that it can be substituted in if the
           document contains a <META> tag that mentions the document's
           encoding.
        N =r   r   z</%s%s>r%   z
<%s%s%s%s>
rU   )r.   r:   r   rJ   r|   sorteditemsrl   tupler   r   r   r    rN   r   Zquoted_attribute_valuere   r   r   r@   r   r   decode_contentsr   rX   )r   r   eventual_encodingrL   r|   r   valZdecodedr}   closeZcloseTagr   pretty_printZspaceZindent_spaceZindent_contentsrZ   rK   Zattribute_stringr   r   r	   r   f  sx    


















z
Tag.decodec             C   s*   |d kr| j d|dS | j|d|dS d S )NT)rL   )r   r    )r   r   rL   r   r   r	   prettify  s    zTag.prettifyc             C   s   t |tst |ts| |}|dk	}g }x| D ]}d}t |trN||}nt |trl||||| |r|r| j	dks|
 }|r0|r| j	dks|d|d   || |r0| j	dks0|d q0W d|S )av  Renders the contents of this tag as a Unicode string.

        :param indent_level: Each line of the rendering will be
           indented this many spaces.

        :param eventual_encoding: The tag is destined to be
           encoded into this encoding. This method is _not_
           responsible for performing that encoding. This information
           is passed in so that it can be substituted in if the
           document contains a <META> tag that mentions the document's
           encoding.

        :param formatter: The output formatter responsible for converting
           entities to Unicode characters.
        Nr-   r   r%   r   r   )r.   r:   r   rJ   r/   r   rj   re   r   r   r   r   )r   r   r   rL   r   rK   cr}   r   r   r	   r     s(    




zTag.decode_contentsc             C   s   |  |||}||S )ac  Renders the contents of this tag as a bytestring.

        :param indent_level: Each line of the rendering will be
           indented this many spaces.

        :param eventual_encoding: The bytestring will be in this encoding.

        :param formatter: The output formatter responsible for converting
           entities to Unicode characters.
        )r   r    )r   r   r   rL   rZ   r   r   r	   encode_contents  s    zTag.encode_contentsr   c             C   s   |sd }| j ||dS )N)r   r   )r   )r   r   ZprettyPrintZindentLevelr   r   r	   renderContents  s    zTag.renderContentsTc             K   s*   d}| j ||||df|}|r&|d }|S )zLReturn only the first child of this Tag matching the given
        criteria.Nr%   r   )r   )r   r   r|   	recursiver}   r>   r   r   r   r   r	   r     s
    zTag.findc             K   s&   | j }|s| j}| j|||||f|S )a  Extracts a list of Tag objects that match the given
        criteria.  You can specify the name of the Tag and any
        attributes you want the Tag to have.

        The value of a key-value pair in the 'attrs' map can be a
        string, a list of strings, a regular expression object, or a
        callable that takes a string and returns whether or not the
        string matches for some custom definition of 'matches'. The
        same is true of the tag name.)r   childrenr   )r   r   r|   r  r}   r   r>   r   r   r   r	   r     s    zTag.find_allc             C   s
   t | jS )N)r   rZ   )r   r   r   r	   r  .  s    zTag.childrenc             c   s@   t | jsd S |  j}| jd }x||k	r:|V  |j}q$W d S )Nr   )rn   rZ   rh   rW   )r   ZstopNodeZcurrentr   r   r	   r   3  s    



zTag.descendantsc             K   s"   | j ||df|}|r|d S dS )z9Perform a CSS selection operation on the current element.r%   r   N)select)r   selector
namespacesr>   r   r   r   r	   
select_one>  s    zTag.select_onec             K   s>   |dkr| j }|dkrd}tdkr*tdtj|| ||f|S )a:  Perform a CSS selection operation on the current element.

        This uses the SoupSieve library.

        :param selector: A string containing a CSS selector.

        :param namespaces: A dictionary mapping namespace prefixes
        used in the CSS selector to namespace URIs. By default,
        Beautiful Soup will use the prefixes it encountered while
        parsing the document.

        :param limit: After finding this number of results, stop looking.

        :param kwargs: Any extra arguments you'd like to pass in to
        soupsieve.select().
        Nr   zLCannot execute CSS selectors because the soupsieve package is not installed.)Z_namespaces	soupsiever<   r  )r   r  r  r   r>   r   r   r	   r  E  s    z
Tag.selectc             C   s   | j S )N)r  )r   r   r   r	   childGeneratorc  s    zTag.childGeneratorc             C   s   | j S )N)r   )r   r   r   r	   recursiveChildGeneratorf  s    zTag.recursiveChildGeneratorc             C   s   t d|  | |S )zThis was kind of misleading because has_key() (attributes)
        was different from __in__ (contents). has_key() is gone in
        Python 3, anyway.z2has_key is deprecated. Use has_attr("%s") instead.)r   r   r   )r   r   r   r   r	   has_keyi  s    zTag.has_key)	NNNNNNNNN)F)N)N)r   )NrI   )N)NN)?r   r   r   r   r   r   ZparserClassr   r   r   ZisSelfClosingr   r   r/   r   r   Zstringsr   r   ZgetTextr}   r   r   r]   rS   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r    r   r   r   r   r   r  r   Z	findChildr   ZfindAllZfindChildrenr  r   r  r  r	  r
  r  r   r   r   r	   rj   .  s     
3





\
)	

rj   c               @   sT   e Zd ZdZdi dfddZdd Zdd Zdi fd	d
ZeZdd Z	dddZ
dS )r   zMEncapsulates a number of ways of matching a markup element (tag or
    text).Nc             K   s   |  || _t|ts"||d< d }d|kr<|d |d< |d= |r\|rX| }|| n|}i }x&t| D ]\}}|  |||< qnW || _|  || _	d S )NclassZclass_)
_normalize_search_valuer   r.   r   copyupdaterl   r   r|   r}   )r   r   r|   r}   r>   Znormalized_attrsr   r   r   r   r	   r   v  s"    
zSoupStrainer.__init__c             C   s   t |ts0t |ts0t|ds0t |ts0|d kr4|S t |trH|dS t|drg }xF|D ]>}t|drt |tst |ts|| q\|| | q\W |S tt|S )Nr$   utf8r   )	r.   r   r   ro   boolbytesr   re   r  )r   r   Z	new_valuevr   r   r	   r    s    




z$SoupStrainer._normalize_search_valuec             C   s    | j r| j S d| j| jf S d S )Nz%s|%s)r}   r   r|   )r   r   r   r	   r     s    zSoupStrainer.__str__c             C   s  d }d }t |tr|}|}t | jto0t |t }| jr`|s`|rN| || js`|s| || jr|rr| ||}nrd}d }xht| j D ]V\}}	|st|dr|}ni }x|D ]\}
}|||
< qW |	|}| ||	sd}P qW |r|r|}n|}|r| j
r| |j| j
sd }|S )NTrS   F)r.   rj   r   r   _matchesrl   r|   r   ro   rS   r}   r   )r   Zmarkup_nameZmarkup_attrsr   markupZcall_function_with_tag_datar$   Zmarkup_attr_mapr   match_againstkr  Z
attr_valuer   r   r	   
search_tag  sB    


 zSoupStrainer.search_tagc             C   s   d }t |drFt|ttfsFx|D ]}t|tr"| |r"|}P q"W njt|trn| jrb| jsb| jr| 	|}nBt|tst|tr| js| js| 
|| jr|}ntd|j |S )Nr   z&I don't know how to match against a %s)ro   r.   rj   r   r/   r#   r}   r   r|   r  r  	Exceptionr   )r   r  r   r   r   r   r	   r#     s"    





zSoupStrainer.searchc       	      C   s~  d}t |tst |trPx|D ]}| ||rdS qW | d||rLdS dS |dkr`|d k	S t |trr||S |}t |tr|j}| |}|d kr| S t	|drt |t
s|st }xL|D ]@}|jr|}nt|}||krqq|| | |||rdS qW dS d}|s,t |t
r,||k}|sHt	|drH||S |szt |trz|jrz| |jd |j |S |S )NFTr   r   r#   r   )r.   rl   r   r  r   r   rj   r   r  ro   r   r7   r   idaddr#   r   )	r   r  r  Zalready_triedr   itemZoriginal_markupr   r$   r   r   r	   r    sV    






zSoupStrainer._matches)N)r   r   r   r   r   r  r   r  Z	searchTagr#   r  r   r   r   r	   r   r  s   'r   c                   s*   e Zd ZdZd fdd	Zdd Z  ZS )r   zTA ResultSet is just a list that keeps track of the SoupStrainer
    that created it.r   c                s   t t| | || _d S )N)superr   r   source)r   r  r   )r   r   r	   r   N  s    zResultSet.__init__c             C   s   t d| d S )NzResultSet object has no attribute '%s'. You're probably treating a list of items like a single item. Did you call find_all() when you meant to call find()?)r   )r   r   r   r   r	   r   R  s    zResultSet.__getattr__)r   )r   r   r   r   r   r   __classcell__r   r   )r   r	   r   K  s   r   ).Z__license__Zcollections.abcr   ImportErrorecollectionsr)   sysr   r  r   Z
bs4.dammitr   r   version_infor   r*   Znonwhitespace_reZwhitespace_rer   r   r   r   r   r!   r,   objectr:   rA   rC   rD   rE   rF   rG   r/   r   r   r   r   r   r   r   rj   r   rl   r   r   r   r   r	   <module>   sf   

%
    9    H Z