B
    j9C\\A              
   @   s  d Z dgZddlZddlZddlmZmZmZmZ ddl	m
Z
mZ ddlZddlmZmZ ddl	mZmZmZmZ yddlmZ d	ZW n2 ek
r Z zdd
lmZ dZW ddZ[X Y nX G dd deZG dd dejZG dd deZG dd dej Z!G dd de!Z"dS )ZMITHTML5TreeBuilder    N)
PERMISSIVEHTMLHTML_5HTMLTreeBuilder)NamespacedAttributenonwhitespace_re)
namespacesprefixes)CommentDoctypeNavigableStringTag)_baseF)baseTc               @   sB   e Zd ZdZdZeeeegZdddZ	dd Z
dd	 Zd
d ZdS )r   zUse html5lib to build a tree.html5libNc             c   s&   || _ |rtd |d d dfV  d S )NzjYou provided a value for exclude_encoding, but the html5lib tree builder doesn't support exclude_encoding.F)user_specified_encodingwarningswarn)selfmarkupr   Zdocument_declared_encodingZexclude_encodings r   4lib/python3.7/site-packages/bs4/builder/_html5lib.pyprepare_markup0   s    
zHTML5TreeBuilder.prepare_markupc             C   s   | j jd k	rtd tj| jd}t }t|t	sNt
rD| j|d< n
| j|d< |j|f|}t|t	rnd |_n$|jjjd }t|t	s|j}||_d S )NzYou provided a value for parse_only, but the html5lib tree builder doesn't support parse_only. The entire document will be parsed.)ZtreeZoverride_encodingencodingr   )soupZ
parse_onlyr   r   r   Z
HTMLParsercreate_treebuilderdict
isinstancestrnew_html5libr   parseoriginal_encodingZ	tokenizerstreamZcharEncodingname)r   r   parserZextra_kwargsdocr"   r   r   r   feed=   s    




zHTML5TreeBuilder.feedc             C   s   t || j| _| jS )N)TreeBuilderForHtml5libr   Zunderlying_builder)r   namespaceHTMLElementsr   r   r   r   X   s    z#HTML5TreeBuilder.create_treebuilderc             C   s   d| S )zSee `TreeBuilder`.z)<html><head></head><body>%s</body></html>r   )r   Zfragmentr   r   r   test_fragment_to_document]   s    z*HTML5TreeBuilder.test_fragment_to_document)NN)__name__
__module____qualname____doc__NAMEr   r   r   Zfeaturesr   r'   r   r*   r   r   r   r   r   )   s   
c                   sf   e Zd Zd fdd	Zdd Zdd Zdd	 Zd
d Zdd Zdd Z	dd Z
dd Zdd Z  ZS )r(   Nc                s8   |r|| _ nddlm} |dd| _ tt| | d S )Nr   )BeautifulSoup zhtml.parser)r   bs4r0   superr(   __init__)r   r)   r   r0   )	__class__r   r   r4   d   s
    zTreeBuilderForHtml5lib.__init__c             C   s   | j   t| j | j d S )N)r   resetElement)r   r   r   r   documentClassl   s    
z$TreeBuilderForHtml5lib.documentClassc             C   s6   |d }|d }|d }t |||}| j| d S )Nr$   publicIdsystemId)r   Zfor_name_and_idsr   object_was_parsed)r   tokenr$   r9   r:   Zdoctyper   r   r   insertDoctypep   s
    z$TreeBuilderForHtml5lib.insertDoctypec             C   s   | j ||}t|| j |S )N)r   new_tagr7   )r   r$   	namespacetagr   r   r   elementClassx   s    z#TreeBuilderForHtml5lib.elementClassc             C   s   t t|| jS )N)TextNoder   r   )r   datar   r   r   commentClass|   s    z#TreeBuilderForHtml5lib.commentClassc             C   s0   ddl m} |dd| _d| j_t| j| jd S )Nr   )r0   r1   zhtml.parserz[document_fragment])r2   r0   r   r$   r7   )r   r0   r   r   r   fragmentClass   s    z$TreeBuilderForHtml5lib.fragmentClassc             C   s   | j |j d S )N)r   appendelement)r   noder   r   r   appendChild   s    z"TreeBuilderForHtml5lib.appendChildc             C   s   | j S )N)r   )r   r   r   r   getDocument   s    z"TreeBuilderForHtml5lib.getDocumentc             C   s   t j| jS )N)treebuilder_baseTreeBuildergetFragmentrG   )r   r   r   r   rM      s    z"TreeBuilderForHtml5lib.getFragmentc                sB   ddl m  g tdd fdd	|d dS )Nr   )r0   z8^(.*?)(?: PUBLIC "(.*?)"(?: "(.*?)")?| SYSTEM "(.*?)")?$c       	         s  t |  r
t | tr| }|r|d}|jdkrx|dpBd}|dpZ|dpZd}dd| |||f  qdd| |f  nd	d| f  nHt | tr̈d
d| | f  n$t | trdd| | f  n | jrdt	| j | j
f }n| j
}dd| |f  | jrg }x`t| j D ]N\}}t |trndt	|j |j
f }t |trd|}|||f qFW x2t|D ]&\}}dd|d  ||f  qW |d7 }x| jD ]}|| qW d S )N      r1         z|%s<!DOCTYPE %s "%s" "%s"> z|%s<!DOCTYPE %s>z|%s<!DOCTYPE >z|%s<!-- %s -->z|%s"%s"z%s %sz|%s<%s>z
|%s%s="%s")r   r   matchgroup	lastindexrF   r   r   r?   r
   r$   attrslistitemsr   joinsortedZchildren)	rG   indentmr$   r9   r:   
attributesvaluechild)r0   
doctype_rervserializeElementr   r   rb      sD    








"z?TreeBuilderForHtml5lib.testSerializer.<locals>.serializeElement
)r   )r2   r0   recompilerY   )r   rG   r   )r0   r`   ra   rb   r   testSerializer   s    
)
z%TreeBuilderForHtml5lib.testSerializer)N)r+   r,   r-   r4   r8   r=   rA   rD   rE   rI   rJ   rM   rf   __classcell__r   r   )r5   r   r(   b   s   r(   c               @   sL   e Zd Zdd Zdd Zdd Zdd Zd	d
 Zdd Zdd Z	dd Z
dS )AttrListc             C   s   || _ t| j j| _d S )N)rG   r   rV   )r   rG   r   r   r   r4      s    zAttrList.__init__c             C   s   t | j  S )N)rW   rV   rX   __iter__)r   r   r   r   ri      s    zAttrList.__iter__c             C   sP   t j}||d ks.| jj|krB||| jj krBt|tsBt|}|| j|< d S )N*)r   Zcdata_list_attributesrG   r$   r   rW   r   findall)r   r$   r^   Z	list_attrr   r   r   __setitem__   s    

zAttrList.__setitem__c             C   s   t | j S )N)rW   rV   rX   )r   r   r   r   rX      s    zAttrList.itemsc             C   s   t | j S )N)rW   rV   keys)r   r   r   r   rm      s    zAttrList.keysc             C   s
   t | jS )N)lenrV   )r   r   r   r   __len__   s    zAttrList.__len__c             C   s
   | j | S )N)rV   )r   r$   r   r   r   __getitem__   s    zAttrList.__getitem__c             C   s   |t | j kS )N)rW   rV   rm   )r   r$   r   r   r   __contains__   s    zAttrList.__contains__N)r+   r,   r-   r4   ri   rl   rX   rm   ro   rp   rq   r   r   r   r   rh      s   rh   c               @   sx   e Zd Zdd Zdd Zdd Zdd ZeeeZdd
dZ	dd Z
dd Zdd Zdd Zdd Zdd ZeeZd	S )r7   c             C   s&   t j| |j || _|| _|| _d S )N)rK   Noder4   r$   rG   r   r?   )r   rG   r   r?   r   r   r   r4      s    zElement.__init__c             C   s*  d  }}t |tr| }}n:t |tr,|}n*|jjtkrJ|j }}| |_n|j}| |_t |tst|jd k	rt|j  |d k	r| jjr| jjd jtkr| jjd }| j	
|| }|| || j	_n`t |tr| j	
|}| jjr| jd}n | jjd k	r| j	 }n| j}| j	j|| j|d d S )NF)parentmost_recent_element)r   r   r   rG   r5   r   rt   extractcontentsr   
new_stringreplace_withZ_most_recent_element_last_descendantnext_elementr;   )r   rH   Zstring_childr_   Zold_elementZnew_elementru   r   r   r   rI      s8    







zElement.appendChildc             C   s   t | jtri S t| jS )N)r   rG   r   rh   )r   r   r   r   getAttributes  s    zElement.getAttributesc             C   s   |d k	rt |dkrg }x8t| D ](\}}t|tr&t| }||= |||< q&W | jj| j	| x"t| D ]\}}|| j
|< qrW | jj| j
 d S )Nr   )rn   rW   rX   r   tupler   r   ZbuilderZ$_replace_cdata_list_attribute_valuesr$   rG   Zset_up_substitutions)r   r]   Zconverted_attributesr$   r^   new_namer   r   r   setAttributes!  s    

zElement.setAttributesNc             C   s4   t | j|| j}|r&| || n
| | d S )N)rB   r   rx   insertBeforerI   )r   rC   r   textr   r   r   
insertText9  s    zElement.insertTextc             C   s   | j |j }|j jtkrf| j jrf| j j|d  jtkrf| j j|d  }| j||j  }|| n| j ||j  | |_	d S )NrN   )
rG   indexr5   r   rw   r   rx   ry   insertrt   )r   rH   ZrefNoder   Zold_nodeZnew_strr   r   r   r   @  s    zElement.insertBeforec             C   s   |j   d S )N)rG   rv   )r   rH   r   r   r   removeChildL  s    zElement.removeChildc             C   s   | j }|j }|j}|dd}t|jdkr>|jd }|j}n
d}|j}|j}t|dkr|d }	|dk	rr||	_n||	_||	_|dk	r|	|_n|	|_|dk	r|	|_|d dd}
||
_|dk	r|
|_d|
_x|D ]}||_|j	| qW g |_||_dS )z1Move all of this tag's children into another tag.Fr   rs   NT)
rG   Znext_siblingrz   rn   rw   r{   Zprevious_elementZprevious_siblingrt   rF   )r   Z
new_parentrG   Znew_parent_elementZfinal_next_elementZnew_parents_last_descendantZnew_parents_last_childZ(new_parents_last_descendant_next_elementZ	to_appendZfirst_childZlast_childs_last_descendantr_   r   r   r   reparentChildrenO  s>    

zElement.reparentChildrenc             C   sF   | j | jj| j}t|| j | j}x| jD ]\}}||j|< q,W |S )N)r   r>   rG   r$   r?   r7   r]   )r   r@   rH   keyr^   r   r   r   	cloneNode  s
    zElement.cloneNodec             C   s   | j jS )N)rG   rw   )r   r   r   r   
hasContent  s    zElement.hasContentc             C   s(   | j d krtd | jfS | j | jfS d S )NZhtml)r?   r	   r$   )r   r   r   r   getNameTuple  s    
zElement.getNameTuple)N)r+   r,   r-   r4   rI   r|   r   propertyr]   r   r   r   r   r   r   r   Z	nameTupler   r   r   r   r7      s   6

Br7   c               @   s   e Zd Zdd Zdd ZdS )rB   c             C   s   t j| d  || _|| _d S )N)rK   rr   r4   rG   r   )r   rG   r   r   r   r   r4     s    zTextNode.__init__c             C   s   t d S )N)NotImplementedError)r   r   r   r   r     s    zTextNode.cloneNodeN)r+   r,   r-   r4   r   r   r   r   r   rB     s   rB   )#Z__license____all__r   rd   Zbs4.builderr   r   r   r   Zbs4.elementr   r   r   Zhtml5lib.constantsr	   r
   r   r   r   r   Zhtml5lib.treebuildersr   rK   r    ImportErrorer   r   rL   r(   objectrh   rr   r7   rB   r   r   r   r   <module>   s(   9_ E