U
    lufJ                  
   @   s  d Z dgZddlZddlZddlmZmZmZmZm	Z	 ddl
mZmZ ddlZddlmZmZ ddl
mZmZmZmZ zddlmZ d	ZW n2 ek
r Z zdd
lmZ dZW 5 dZ[X Y nX G dd de	ZG dd dejZG dd deZ G dd dej!Z"G dd de"Z#dS )MITHTML5TreeBuilder    N)DetectsXMLParsedAsHTML
PERMISSIVEHTMLHTML_5HTMLTreeBuilder)NamespacedAttributenonwhitespace_re)
namespacesprefixes)CommentDoctypeNavigableStringTag)_baseF)baseTc                   @   sF   e Zd ZdZdZeeeegZdZ	dddZ
dd Zd	d
 Zdd ZdS )r   a  Use html5lib to build a tree.

    Note that this TreeBuilder does not support some features common
    to HTML TreeBuilders. Some of these features could theoretically
    be implemented, but at the very least it's quite difficult,
    because html5lib moves the parse tree around as it's being built.

    * This TreeBuilder doesn't use different subclasses of NavigableString
      based on the name of the tag in which the string was found.

    * You can't use a SoupStrainer to parse only part of a document.
    html5libTNc                 c   s8   || _ |rtjddd tj|dd |d d dfV  d S )NzjYou provided a value for exclude_encoding, but the html5lib tree builder doesn't support exclude_encoding.   
stacklevelF)user_specified_encodingwarningswarnr   Zwarn_if_markup_looks_like_xml)selfmarkupr   Zdocument_declared_encodingZexclude_encodings r   9/tmp/pip-unpacked-wheel-pg4vfqbd/bs4/builder/_html5lib.pyprepare_markup@   s     zHTML5TreeBuilder.prepare_markupc                 C   s   | j jd k	rtjddd tj| jd}|| j_t	 }t
|tsZtrP| j|d< n
| j|d< |j|f|}t
|trzd |_n$|jjjd }t
|ts|j}||_d | j_d S )NzYou provided a value for parse_only, but the html5lib tree builder doesn't support parse_only. The entire document will be parsed.   r   )treeoverride_encodingencodingr   )soupZ
parse_onlyr   r   r   
HTMLParsercreate_treebuilderunderlying_builderparserdict
isinstancestrnew_html5libr   parseoriginal_encoding	tokenizerstreamcharEncodingname)r   r   r'   extra_kwargsdocr-   r   r   r   feedW   s(    



zHTML5TreeBuilder.feedc                 C   s   t || j| jd| _| jS )N)store_line_numbers)TreeBuilderForHtml5libr#   r5   r&   )r   namespaceHTMLElementsr   r   r   r%   v   s     z#HTML5TreeBuilder.create_treebuilderc                 C   s   d| S )zSee `TreeBuilder`.z)<html><head></head><body>%s</body></html>r   )r   fragmentr   r   r   test_fragment_to_document}   s    z*HTML5TreeBuilder.test_fragment_to_document)NN)__name__
__module____qualname____doc__NAMEr   r   r   featuresZTRACKS_LINE_NUMBERSr   r4   r%   r9   r   r   r   r   r   *   s      
c                       sf   e Zd Zd fdd	Zdd Zdd Zd	d
 Zdd Zdd Zdd Z	dd Z
dd Zdd Z  ZS )r6   NTc                    sL   |r|| _ n ddlm} |dd|i|| _ tt| | d | _|| _d S )Nr   BeautifulSoup html.parserr5   )rB   rC   )r#   bs4rA   superr6   __init__r'   r5   )r   r7   r#   r5   kwargsrA   	__class__r   r   rF      s       zTreeBuilderForHtml5lib.__init__c                 C   s   | j   t| j | j d S N)r#   resetElementr   r   r   r   documentClass   s    
z$TreeBuilderForHtml5lib.documentClassc                 C   s6   |d }|d }|d }t |||}| j| d S )Nr1   publicIdsystemId)r   Zfor_name_and_idsr#   object_was_parsed)r   tokenr1   rO   rP   doctyper   r   r   insertDoctype   s
    z$TreeBuilderForHtml5lib.insertDoctypec                 C   sV   i }| j r6| jr6| j jj \}}||d< |d |d< | jj||f|}t|| j|S )N
sourceline   	sourcepos)r'   r5   r.   r/   positionr#   new_tagrL   )r   r1   	namespacerG   rU   rW   tagr   r   r   elementClass   s    z#TreeBuilderForHtml5lib.elementClassc                 C   s   t t|| jS rJ   )TextNoder   r#   )r   datar   r   r   commentClass   s    z#TreeBuilderForHtml5lib.commentClassc                 C   s0   ddl m} |dd| _d| j_t| j| jd S )Nr   r@   rB   rC   z[document_fragment])rD   rA   r#   r1   rL   )r   rA   r   r   r   fragmentClass   s    z$TreeBuilderForHtml5lib.fragmentClassc                 C   s   | j |j d S rJ   )r#   appendelementr   noder   r   r   appendChild   s    z"TreeBuilderForHtml5lib.appendChildc                 C   s   | j S rJ   )r#   rM   r   r   r   getDocument   s    z"TreeBuilderForHtml5lib.getDocumentc                 C   s   t j| jS rJ   )treebuilder_baseTreeBuildergetFragmentrb   rM   r   r   r   ri      s    z"TreeBuilderForHtml5lib.getFragmentc                    sB   ddl m  g tdd fdd	|d dS )Nr   r@   z8^(.*?)(?: PUBLIC "(.*?)"(?: "(.*?)")?| SYSTEM "(.*?)")?$c           	         s  t |  r
t | tr| }|r|d}|jdkrx|dpBd}|dpZ|dpZd}dd| |||f  qdd| |f  nd	d| f  n:t | tr̈d
d| | f  nt | trdd| | f  n| jrdt	| j | j
f }n| j
}dd| |f  | jrg }t| j D ]N\}}t |trjdt	|j |j
f }t |trd|}|||f qBt|D ]&\}}dd|d  ||f  q|d7 }| jD ]}|| qd S )NrV      rB   r   r   z|%s<!DOCTYPE %s "%s" "%s"> z|%s<!DOCTYPE %s>z|%s<!DOCTYPE >z|%s<!-- %s -->z|%s"%s"z%s %sz|%s<%s>z
|%s%s="%s")r)   r   matchgroup	lastindexra   r   r   rZ   r   r1   attrslistitemsr	   joinsortedchildren)	rb   indentmr1   rO   rP   
attributesvaluechildrA   Z
doctype_rervserializeElementr   r   r|      sH    








 
z?TreeBuilderForHtml5lib.testSerializer.<locals>.serializeElement
)r   )rD   rA   recompilerr   r   rb   r   rz   r   testSerializer   s    
)
z%TreeBuilderForHtml5lib.testSerializer)NT)r:   r;   r<   rF   rN   rT   r\   r_   r`   re   rf   ri   r   __classcell__r   r   rH   r   r6      s     r6   c                   @   sL   e Zd Zdd Zdd Zdd Zdd Zd	d
 Zdd Zdd Z	dd Z
dS )AttrListc                 C   s   || _ t| j j| _d S rJ   )rb   r(   ro   r   r   r   r   rF      s    zAttrList.__init__c                 C   s   t | j  S rJ   )rp   ro   rq   __iter__rM   r   r   r   r      s    zAttrList.__iter__c                 C   s^   | j jp
i }||dg ks<| j j|krP||| j jg krPt|tsPt|}|| j |< d S )N*)rb   Zcdata_list_attributesgetr1   r)   rp   r
   findall)r   r1   rx   Z	list_attrr   r   r   __setitem__   s    


zAttrList.__setitem__c                 C   s   t | j S rJ   )rp   ro   rq   rM   r   r   r   rq     s    zAttrList.itemsc                 C   s   t | j S rJ   rp   ro   keysrM   r   r   r   r     s    zAttrList.keysc                 C   s
   t | jS rJ   )lenro   rM   r   r   r   __len__  s    zAttrList.__len__c                 C   s
   | j | S rJ   )ro   r   r1   r   r   r   __getitem__  s    zAttrList.__getitem__c                 C   s   |t | j kS rJ   r   r   r   r   r   __contains__  s    zAttrList.__contains__N)r:   r;   r<   rF   r   r   rq   r   r   r   r   r   r   r   r   r      s   r   c                   @   sx   e Zd Zdd Zdd Zdd Zdd ZeeeZdd
dZ	dd Z
dd Zdd Zdd Zdd Zdd ZeeZd	S )rL   c                 C   s&   t j| |j || _|| _|| _d S rJ   )rg   NoderF   r1   rb   r#   rZ   )r   rb   r#   rZ   r   r   r   rF     s    zElement.__init__c                 C   s*  d  }}t |tr| }}n:t |tr,|}n*|jjtkrJ|j }}| |_n|j}| |_t |tst|jd k	rt|j  |d k	r| jjr| jjd jtkr| jjd }| j	
|| }|| || j	_n`t |tr| j	
|}| jjr| jd}n | jjd k	r| j	 }n| j}| j	j|| j|d d S )NF)parentmost_recent_element)r)   r*   r   rb   rI   r   r   extractcontentsr#   
new_stringreplace_withZ_most_recent_element_last_descendantnext_elementrQ   )r   rd   Zstring_childry   Zold_elementZnew_elementr   r   r   r   re     s>    







 zElement.appendChildc                 C   s   t | jtri S t| jS rJ   )r)   rb   r   r   rM   r   r   r   getAttributesU  s    zElement.getAttributesc                 C   s   |d k	rt |dkrg }t| D ](\}}t|tr$t| }||= |||< q$| jj| j	| t| D ]\}}|| j
|< ql| jj| j
 d S )Nr   )r   rp   rq   r)   tupler	   r#   ZbuilderZ$_replace_cdata_list_attribute_valuesr1   rb   Zset_up_substitutions)r   rw   Zconverted_attributesr1   rx   new_namer   r   r   setAttributesZ  s    

 zElement.setAttributesNc                 C   s4   t | j|| j}|r&| || n
| | d S rJ   )r]   r#   r   insertBeforere   )r   r^   r   textr   r   r   
insertTextp  s    zElement.insertTextc                 C   s   | j |j }|j jtkrf| j jrf| j j|d  jtkrf| j j|d  }| j||j  }|| n| j ||j  | |_	d S )NrV   )
rb   indexrI   r   r   r#   r   r   insertr   )r   rd   refNoder   Zold_nodeZnew_strr   r   r   r   w  s    zElement.insertBeforec                 C   s   |j   d S rJ   )rb   r   rc   r   r   r   removeChild  s    zElement.removeChildc                 C   s   | j }|j }|j}|dd}t|jdkr>|jd }|j}n
d}|j}|j}t|dkr|d }	|dk	rr||	_n||	_||	_|dk	r|	|_n|	|_|dk	r|	|_|d dd}
||
_|dk	r|
|_d|
_|D ]}||_|j	| qg |_||_dS )z1Move all of this tag's children into another tag.Fr   r   NT)
rb   Znext_siblingr   r   r   r   Zprevious_elementZprevious_siblingr   ra   )r   Z
new_parentrb   Znew_parent_elementZfinal_next_elementZnew_parents_last_descendantZnew_parents_last_childZ(new_parents_last_descendant_next_elementZ	to_appendZfirst_childZlast_childs_last_descendantry   r   r   r   reparentChildren  s>    
zElement.reparentChildrenc                 C   sB   | j | jj| j}t|| j | j}| jD ]\}}||j|< q*|S rJ   )r#   rY   rb   r1   rZ   rL   rw   )r   r[   rd   keyrx   r   r   r   	cloneNode  s
    zElement.cloneNodec                 C   s   | j jS rJ   )rb   r   rM   r   r   r   
hasContent  s    zElement.hasContentc                 C   s(   | j d krtd | jfS | j | jfS d S )Nhtml)rZ   r   r1   rM   r   r   r   getNameTuple  s    
zElement.getNameTuple)N)r:   r;   r<   rF   re   r   r   propertyrw   r   r   r   r   r   r   r   	nameTupler   r   r   r   rL     s   6

BrL   c                   @   s   e Zd Zdd Zdd ZdS )r]   c                 C   s   t j| d  || _|| _d S rJ   )rg   r   rF   rb   r#   )r   rb   r#   r   r   r   rF     s    zTextNode.__init__c                 C   s   t d S rJ   )NotImplementedErrorrM   r   r   r   r     s    zTextNode.cloneNodeN)r:   r;   r<   rF   r   r   r   r   r   r]     s   r]   )$__license____all__r   r~   Zbs4.builderr   r   r   r   r   Zbs4.elementr	   r
   r   Zhtml5lib.constantsr   r   r   r   r   r   Zhtml5lib.treebuildersr   rg   r+   ImportErrorer   r   rh   r6   objectr   r   rL   r]   r   r   r   r   <module>   s*   Xx C