U
    >vf                     @  sj  d dl mZ d dlmZmZ d dlmZ d dlZd dlm	Z	 d dl
mZ d dlmZ d dlmZ d dlZd dlZd d	lmZ d d
lmZ d dlmZmZ d dlm  mZ d dlZd dlm Z m!Z!m"Z" d dl#m$Z% d dl&m'Z'm(Z( d dl)m*Z* d dl+m,Z, e!dddgdddgdej-dgdZ.dZ/dZ0e!ddddddd d!d!d!d!dd"d"d"d"d"dd d d d d dd#d#d#d#d#dd$d%d&d'd(dd)Z1ej2d*d+gd,d-d. Z3ej2ej4d/e5d/d0d1gd,d2d3 Z6d4d5 Z7d6d7 Z8e5d/d8d9 Z9ej:j;ej:j<e%j;d:d;d<d=d> Z=d?d@ Z>dAdB Z?dCdD Z@dEdF ZAdGdH ZBdIdJ ZCdKdL ZDe5d/ej:EdMdNdOgdPdQ ZFej:EdMdNdOgdRdS ZGe5d/dTdU ZHdVdW ZIej:j;e%j;dXd;d<e5d/dYdZ ZJej:j;e%j;d[d;d<d\d] ZKe5d/d^d_ ZLd`da ZMe5d/dbdc ZNddde ZOdfdg ZPe5d/dhdi ZQe5d/djdk ZRdldm ZSdndo ZTe5d/dpdq ZUe5d/ej:EdrdNdgdsdt ZVdudv ZWdwdx ZXdydz ZYd{d| ZZd}d~ Z[dd Z\e5d/dd Z]dd Z^dd Z_dd Z`dd Zadd Zbdd Zcdd Zddd Zedd Zfdd Zge5d/dd Zhe5d/dd Zidd Zjekd/dd Zldd Zme5d/dd Zne5d/dd Zoe5d/dd Zpe5d/dd Zqe5d/dd Zre5d/dd Zse5d/dd Zte5d/dd Zue5d/dd Zve5d/dd Zwe5d/dd Zxe5d/dd Zye5d/ej:EdMdNdOgdd Zzdd Z{dd Z|dd Z}ej:j;e%j;dXd;d<dd Z~ddĄ ZddƄ ZddȄ Zddʄ Zdd̄ Zdd΄ ZddЄ Zdd҄ ZddԄ Zej:j;e5d/e%j;dd;d<ddׄ Zddل Zddۄ Zdd݄ Zej:j;e5dޡe5d/ej:je dde%j;dd Zdd Zdd ZdS )    )annotations)BytesIOStringIO)	LZMAErrorN)	ReadError)	HTTPError)
ParseError)
BadZipFile)is_ci_environment)import_optional_dependency)EmptyDataErrorParserError)NA	DataFrameSeries)ArrowStringArrayStringArray)
get_handle)read_xmlsquarecircletriangleh           shapedegreessidesa[  <?xml version='1.0' encoding='utf-8'?>
<data xmlns="http://example.com">
  <row>
    <shape>square</shape>
    <degrees>360</degrees>
    <sides>4</sides>
  </row>
  <row>
    <shape>circle</shape>
    <degrees>360</degrees>
    <sides/>
  </row>
  <row>
    <shape>triangle</shape>
    <degrees>180</degrees>
    <sides>3</sides>
  </row>
</data>a  <?xml version='1.0' encoding='utf-8'?>
<doc:data xmlns:doc="http://example.com">
  <doc:row>
    <doc:shape>square</doc:shape>
    <doc:degrees>360</doc:degrees>
    <doc:sides>4.0</doc:sides>
  </doc:row>
  <doc:row>
    <doc:shape>circle</doc:shape>
    <doc:degrees>360</doc:degrees>
    <doc:sides/>
  </doc:row>
  <doc:row>
    <doc:shape>triangle</doc:shape>
    <doc:degrees>180</doc:degrees>
    <doc:sides>3.0</doc:sides>
  </doc:row>
</doc:data>ZID_00001ZID_00002ZID_00003ZID_00004ZID_00005r         r   r   zBlue Line (Forest Park)zRed, Purple Linez#LineStyle01ZclampedToGroundz-87.77678526964958,41.8708863930319,0 -87.77826234150609,41.87097820122218,0 -87.78251583439344,41.87130129991005,0 -87.78418294588424,41.87145055520308,0 -87.7872369165933,41.8717239119163,0 -87.79160214925886,41.87210797280065,0a\  -87.65758750947528,41.96427269188822,0 -87.65802133507393,41.96581929055245,0 -87.65819033925305,41.96621846093642,0 -87.6583189819129,41.96650362897086,0 -87.65835858701473,41.96669002089185,0 -87.65838428411853,41.96688150295095,0 -87.65842208882658,41.96745896091846,0 -87.65846556843937,41.9683761425439,0 -87.65849296214573,41.96913893870342,0a  -87.65492939166126,41.95377494531437,0 -87.65557043199591,41.95376544118533,0 -87.65606302030132,41.95376391658746,0 -87.65623502146268,41.95377379126367,0 -87.65634748981634,41.95380103566435,0 -87.65646537904269,41.95387703994676,0 -87.65656532461145,41.95396622645799,0 -87.65664760856414,41.95404201996044,0 -87.65671750555913,41.95416647054043,0 -87.65673983607117,41.95429949810849,0 -87.65673866475777,41.95441024240925,0 -87.6567690255541,41.95490657227902,0 -87.65683672482363,41.95692259283837,0 -87.6568900886376,41.95861070983142,0 -87.65699865558875,41.96181418669004,0 -87.65756347177603,41.96397045777844,0 -87.65758750947528,41.96427269188822,0ah  -87.65362593118043,41.94742799535678,0 -87.65363554415794,41.94819886386848,0 -87.6536456393239,41.95059994675451,0 -87.65365831235026,41.95108288489359,0 -87.6536604873874,41.9519954657554,0 -87.65362592053201,41.95245597302328,0 -87.65367158496069,41.95311153649393,0 -87.65368468595476,41.9533202828916,0 -87.65369271253692,41.95343095587119,0 -87.65373335834569,41.95351536301472,0 -87.65378605844126,41.95358212680591,0 -87.65385067928185,41.95364452823767,0 -87.6539390793817,41.95370263886964,0 -87.6540786298351,41.95373403675265,0 -87.65430648647626,41.9537535411832,0 -87.65492939166126,41.95377494531437,0a   -87.65345391792157,41.94217681262115,0 -87.65342448305786,41.94237224420864,0 -87.65339745703922,41.94268217746244,0 -87.65337753982941,41.94288140770284,0 -87.65336256753105,41.94317369618263,0 -87.65338799707138,41.94357253961736,0 -87.65340240886648,41.94389158188269,0 -87.65341837392448,41.94406444407721,0 -87.65342275247338,41.94421065714904,0 -87.65347469646018,41.94434829382345,0 -87.65351486483024,41.94447699917548,0 -87.65353483605053,41.9453896864472,0 -87.65361975532807,41.94689193720703,0 -87.65362593118043,41.94742799535678,0)idnamestyleUrlextrudealtitudeModecoordinatesrbr)paramsc                 C  s   | j S Nparamrequest r1   @/tmp/pip-unpacked-wheel-vdrwu74i/pandas/tests/io/xml/test_xml.pymode   s    r3   lxml)Zmarksetreec                 C  s   | j S r,   r-   r/   r1   r1   r2   parser   s    r6   c              
   K  sL   t  :}t|d}||  W 5 Q R X t|f|W  5 Q R  S Q R X d S )Nw)tmensure_cleanopenwriter   )datakwargspathfr1   r1   r2   read_xml_iterparse  s    
r@   c                 K  sx   t | d|d`}t L}t|d}||j  W 5 Q R X t|f|W  5 Q R  W  5 Q R  S Q R X W 5 Q R X d S )Nr*   )compressionr7   )r   r8   r9   r:   r;   handlereadr   )	comp_pathcompression_onlyr=   Zhandlesr>   r?   r1   r1   r2   read_xml_iterparse_comp  s
    
rF   c              	   C  s   | dddd}t |dd}t |dd}t |ddd	d
dddgid}t |ddd	d
dddgid}t|| t|| t|| d S )Nior<   xml	books.xmlr4   r6   r5   bookcategorytitleyearauthorpricer6   	iterparser   r8   assert_frame_equal)datapathfilenameZdf_file_lxmlZdf_file_etreedf_iter_lxmldf_iter_etreer1   r1   r2   test_parser_consistency_file  s     rY   Ohttps://data.cityofchicago.org/api/views/8pix-ypme/rows.xml?accessType=DOWNLOADT)urlZcheck_before_testc                 C  s   d}t jddf}t|d| dj|dd t|| d}t|| d	d
ddddddddddddddddddddgid}W 5 Q R X t || d S ) NrZ   zcta.xmlrV   z
.//row/rowxpathr6   FindexrJ   rowZ_idZ_uuid	_positionZ_addressZstop_idZdirection_idZ	stop_nameZstation_nameZstation_descriptive_nameZmap_idZadaZredZbluegZbrnpZpexpyZpnkolocationrQ   )r8   r9   r   to_xmlrT   )r6   r[   r>   df_xpathdf_iterr1   r1   r2   test_parser_consistency_url,  sD    rk   c              	   C  sr   | dddd}t ||}t||d}W 5 Q R X tdddgd	d
dgdddgdddgdddgd}t|| d S NrG   r<   rH   rI   rJ   cookingchildrenwebEveryday ItalianHarry PotterLearning XMLGiada De LaurentiisJ K. RowlingErik T. Ray          >@=
ףp=@皙C@rL   rM   rO   rN   rP   )r:   r   r   r8   rT   )rU   r6   r3   rV   r?   df_filedf_expectedr1   r1   r2   test_file_like`  s    
r~   c              	   C  s   | dddd}t ||}| }W 5 Q R X tt|tr@t|nt||d}tdddgd	d
dgdddgdddgdddgd}t	|| d S rl   )
r:   rC   r   
isinstancebytesr   r   r   r8   rT   )rU   r6   r3   rV   r?   xml_objZdf_ior}   r1   r1   r2   test_file_ior  s     
r   c              	   C  sz   | dddd}t ||}| }W 5 Q R X t||d}tdddgd	d
dgdddgdddgdddgd}t|| d S rl   )r:   rC   r   r   r8   rT   rU   r6   r3   rV   r?   r   df_strr}   r1   r1   r2    test_file_buffered_reader_string  s    
r   c              	   C  s   | dddd}t ||}t| | }W 5 Q R X t||d}tdddgd	d
dgdddgdddgdddgd}t|| d S rl   )r:   nextrC   r   r   r8   rT   r   r1   r1   r2   ,test_file_buffered_reader_no_xml_declaration  s    
r   c                 C  s4   d}t || d}tddddgd}t|| d S )N<   <中文標籤><row><c1>1</c1><c2>2</c2></row></中文標籤>rJ   r!   r"   )Zc1c2r   r_   r   r   r8   rT   )r6   txtr   r}   r1   r1   r2   test_string_charset  s    r   c                 C  sP   | dddd}t | ||d}tdddgd	d d
gd dd gd}t|| d S )NrG   r<   rH   zdoc_ch_utf.xmlrJ   uP   問  若箇是邪而言破邪 何者是正而道(Sorry, this is Big5 only)申正u;   問 既破有得申無得 亦應但破性執申假名以不uO   問 既破性申假 亦應但破有申無 若有無兩洗 亦應性假雙破耶uw   答  邪既無量 正亦多途  大略為言不出二種 謂有得與無得 有得是邪須破 無得是正須申
		故uL   答  不例  有無皆是性 所以須雙破 既分性假異 故有破不破uV   答 性執是有得 假名是無得  今破有得申無得 即是破性執申假名也)u   問u   答ar   )rU   r6   xml_filer|   r}   r1   r1   r2   test_file_charset  s    r   c              	   C  sF   | dddd}t |d$}tt| |d |jr8tW 5 Q R X d S )NrG   r<   rH   rI   r)   rJ   )r:   r   r   rC   closedAssertionError)rU   r6   r   r?   r1   r1   r2   test_file_handle_close  s    r   val     c              	   C  sD   ddl m} dddg}tj||d t| dd W 5 Q R X d S )	Nr   XMLSyntaxError|zDocument is emptyzNone \(line 0\)matchr4   rJ   )
lxml.etreer   joinpytestraisesr   )r   r   msgr1   r1   r2   test_empty_string_lxml  s    r   c              	   C  s*   t jtdd t| dd W 5 Q R X d S )Nzno element foundr   r5   rJ   )r   r   r   r   )r   r1   r1   r2   test_empty_string_etree  s    r   c               	   C  sF   ddl m}  tjddd}tj| dd t|dd	 W 5 Q R X d S )
Nr   r   r<   htmlrI   !Start tag expected, '<' not foundr   r4   rJ   r   r   osr>   r   r   r   r   )r   rV   r1   r1   r2   test_wrong_file_path_lxml  s    r   c               	   C  s:   t jddd} tjtdd t| dd W 5 Q R X d S )Nr<   r   rI   znot well-formedr   r5   rJ   )r   r>   r   r   r   r   r   r\   r1   r1   r2   test_wrong_file_path_etree   s    r   'https://www.w3schools.com/xml/books.xmlc               	   C  sZ   d} t | dd}tdddgddd	gd
ddgdddgdddgd d dgd}t|| d S )Nr   .//book[count(*)=4]r^   rm   rn   ro   rp   rq   rr   rs   rt   ru   rv   rw   rx   ry   rz   Z	paperback)rL   rM   rO   rN   rP   Zcoverr   )r[   Zdf_urlr}   r1   r1   r2   test_url
  s    r   (https://www.w3schools.com/xml/python.xmlc              	   C  s0   t jtdd d}t|d| d W 5 Q R X d S )NzHTTP Error 404: Not Foundr   r   r   r]   )r   r   r   r   r6   r[   r1   r1   r2   test_wrong_url"  s    r   c              	   C  s:   | dddd}t jtdd t|ddd	 W 5 Q R X d S )
NrG   r<   rH   rI   xpath does not return any nodesr   z	.//pythonr4   r]   r   r   
ValueErrorr   rU   rV   r1   r1   r2   test_empty_xpath_lxml-  s    r   c              	   C  s:   | dddd}t jtdd t|ddd	 W 5 Q R X d S )
NrG   r<   rH   rI   z/You have used an incorrect or unsupported XPathr   	.//[book]r5   r]   r   r   SyntaxErrorr   r   r1   r1   r2   test_bad_xpath_etree4  s     r   c              	   C  sF   ddl m} | dddd}tj|dd t|d	d
d W 5 Q R X d S )Nr   XPathEvalErrorrG   r<   rH   rI   zInvalid expressionr   r   r4   r]   r   r   r   r   r   rU   r   rV   r1   r1   r2   test_bad_xpath_lxml<  s    r   c                 C  sn   t tdddi| d}tt| ddddgid	}td
ddgdddgdtddgd}t|| t|| d S )N	.//ns:rownshttp://example.comr^   
namespacesr6   ra   r   r   r   rQ   r   r   r   r   r         @nan      @r   )r   xml_default_nmspr@   r   floatr8   rT   r6   Zdf_nmsprj   r}   r1   r1   r2   test_default_namespaceH  s&    r   c                 C  sn   t tdddi| d}tt| ddddgid	}td
ddgdddgdtddgd}t|| t|| d S )N
.//doc:rowdocr   r   ra   r   r   r   rQ   r   r   r   r   r   r   r   r   r   )r   xml_prefix_nmspr@   r   r   r8   rT   r   r1   r1   r2   test_prefix_namespaceb  s&      r   c                  C  s8   t tdddidd} t tdddidd}t| | d S )	Nr   r   r   r4   r   r   r   r5   )r   r   r8   rT   df_lxmldf_etreer1   r1   r2   "test_consistency_default_namespacey  s    r   c                  C  s8   t tdddidd} t tdddidd}t| | d S )Nr   r   r   r4   r   r5   )r   r   r8   rT   r   r1   r1   r2   !test_consistency_prefix_namespace  s    r   c              	   C  s:   | dddd}t jtdd t|d|d W 5 Q R X d S )	NrG   r<   rH   rI   r   r   z.//Placemarkr]   r   rU   r6   rV   r1   r1   r2   *test_missing_prefix_with_default_namespace  s    r   c              	   C  s:   | dddd}t jtdd t|ddd	 W 5 Q R X d S )
NrG   r<   rH   cta_rail_lines.kmlz'you used an undeclared namespace prefixr   .//kml:Placemarkr5   r]   r   r   r1   r1   r2   $test_missing_prefix_definition_etree  s    r   c              	   C  sF   ddl m} | dddd}tj|dd t|d	d
d W 5 Q R X d S )Nr   r   rG   r<   rH   r   zUndefined namespace prefixr   r   r4   r]   r   r   r1   r1   r2   #test_missing_prefix_definition_lxml  s    r   keyc              	   C  s2   t jtdd ttd| didd W 5 Q R X d S )Nz0empty namespace prefix is not supported in XPathr   r   http://www.opengis.net/kml/2.2r4   r   )r   r   	TypeErrorr   r   )r   r1   r1   r2   test_none_namespace_prefix  s     r   c              	   C  s   | dddd}t ||d}t ||dddd	d
dgid}tdddgdddgdddgdddgdddgd}t|| t|| d S )NrG   r<   rH   rI   rJ   rK   rL   rM   rO   rN   rP   rQ   rm   rn   ro   rp   rq   rr   rs   rt   ru   rv   rw   rx   ry   rz   r{   r   rU   r6   rV   r|   rj   r}   r1   r1   r2   test_file_elems_and_attrs  s"    
r   c                 C  s^   | dddd}t |d|d}t ||ddgid	}tdd
ddgi}t|| t|| d S )NrG   r<   rH   rI   T)
attrs_onlyr6   rK   rL   rQ   rm   rn   ro   r   r   r1   r1   r2   test_file_only_attrs  s    r   c                 C  s|   | dddd}t |d|d}t ||ddd	d
dgid}tdddgdddgdddgdddgd}t|| t|| d S )NrG   r<   rH   rI   T)
elems_onlyr6   rK   rM   rO   rN   rP   rQ   rp   rq   rr   rs   rt   ru   rv   rw   rx   ry   rz   )rM   rO   rN   rP   r   r   r1   r1   r2   test_file_only_elems  s     	r   c              	   C  s<   | dddd}t jtdd t|dd|d W 5 Q R X d S )	NrG   r<   rH   r   z3Either element or attributes can be parsed not bothr   T)r   r   r6   r   r   r1   r1   r2   test_elem_and_attrs_only  s    r   c              	   C  s2   d}t jtdd t|dd| d W 5 Q R X d S )NaU  
      <data>
        <row>
          <shape sides="4">square</shape>
          <degrees>360</degrees>
        </row>
        <row>
          <shape sides="0">circle</shape>
          <degrees>360</degrees>
        </row>
        <row>
          <shape sides="3">triangle</shape>
          <degrees>180</degrees>
        </row>
      </data>-xpath does not return any nodes or attributesr   ./rowT)r^   r   r6   r   r6   rH   r1   r1   r2   test_empty_attrs_only  s    r   c              	   C  s2   d}t jtdd t|dd| d W 5 Q R X d S )Nz
      <data>
        <row sides="4" shape="square" degrees="360"/>
        <row sides="0" shape="circle" degrees="360"/>
        <row sides="3" shape="triangle" degrees="180"/>
      </data>r   r   r   T)r^   r   r6   r   r   r1   r1   r2   test_empty_elems_only  s    r   c                  C  sd   d} t | dd}t | ddd}t| dddgid	}t| ddddgid
}t|| t|| d S )Na  <?xml version="1.0" encoding="UTF-8"?>
<TrainSchedule>
      <Stations>
         <station Name="Manhattan" coords="31,460,195,498"/>
         <station Name="Laraway Road" coords="63,409,194,455"/>
         <station Name="179th St (Orland Park)" coords="0,364,110,395"/>
         <station Name="153rd St (Orland Park)" coords="7,333,113,362"/>
         <station Name="143rd St (Orland Park)" coords="17,297,115,330"/>
         <station Name="Palos Park" coords="128,281,239,303"/>
         <station Name="Palos Heights" coords="148,257,283,279"/>
         <station Name="Worth" coords="170,230,248,255"/>
         <station Name="Chicago Ridge" coords="70,187,208,214"/>
         <station Name="Oak Lawn" coords="166,159,266,185"/>
         <station Name="Ashburn" coords="197,133,336,157"/>
         <station Name="Wrightwood" coords="219,106,340,133"/>
         <station Name="Chicago Union Sta" coords="220,0,360,43"/>
      </Stations>
</TrainSchedule>z
.//stationr   r5   r]   ZstationNameZcoordsrR   rQ   )r   r@   r8   rT   )rH   r   r   Z
df_iter_lxZ
df_iter_etr1   r1   r2   test_attribute_centric_xml-  s      
r   c              
   C  s   | dddd}t |ddddd	g|d
}t ||ddddd	gddddddgid}tdddgdddgdddgdddgdddgd }t|| t|| d S )!NrG   r<   rH   rI   Col1Col2Col3Col4Col5namesr6   rK   rL   rM   rO   rN   rP   )r6   r   rR   rm   rn   ro   rp   rq   rr   rs   rt   ru   rv   rw   rx   ry   rz   )r   r   r   r   r   r   r   r1   r1   r2   test_names_option_outputR  s,      
r   c                 C  sr   d}t |d| dddgd}t|| ddddgidddgd	}td
dgddgddgd}t|| t|| d S )Nz<shapes>
  <shape type="2D">
    <name>circle</name>
    <type>curved</type>
  </shape>
  <shape type="3D">
    <name>sphere</name>
    <type>curved</type>
  </shape>
</shapes>.//shapetype_dimr   	type_edger^   r6   r   typer$   r6   rR   r   2D3Dr   sphereZcurved)r   r   r   r   r@   r   r8   rT   r6   rH   ri   rj   r}   r1   r1   r2   test_repeat_namesl  s*       r   c                 C  sn   d}t |d| ddgd}t|| dddgiddgd}td	d
ddgd	d	ddgd}t|| t|| d S )NaB  <shapes>
  <shape>
    <name>rectangle</name>
    <family>rectangle</family>
  </shape>
  <shape>
    <name>square</name>
    <family>rectangle</family>
  </shape>
  <shape>
    <name>ellipse</name>
    <family>ellipse</family>
  </shape>
  <shape>
    <name>circle</name>
    <family>ellipse</family>
  </shape>
</shapes>r   r$   groupr   r   familyr   Z	rectangler   ellipser   )r$   r   r   r   r1   r1   r2   test_repeat_values_new_names  s    


r  c                 C  s   d}t |d| ddddgd}t|| dd	d	d	d	giddddgd
}tdddgdddgdddgdddgd}t|| t|| d S )Na  <shapes>
  <shape>
    <value item="name">circle</value>
    <value item="family">ellipse</value>
    <value item="degrees">360</value>
    <value item="sides">0</value>
  </shape>
  <shape>
    <value item="name">triangle</value>
    <value item="family">polygon</value>
    <value item="degrees">180</value>
    <value item="sides">3</value>
  </shape>
  <shape>
    <value item="name">square</value>
    <value item="family">polygon</value>
    <value item="degrees">360</value>
    <value item="sides">4</value>
  </shape>
</shapes>r   r$   r   r   r   r   r   valuer   r   r   r   r  Zpolygonr   r   r   r   r   )r$   r   r   r   r   r   r1   r1   r2   test_repeat_elements  s,    

	r  c              	   C  s@   | dddd}t jtdd t|ddd	g|d
 W 5 Q R X d S )NrG   r<   rH   rI   znames does not match lengthr   r   r   r   r   r   r   r1   r1   r2   test_names_option_wrong_length  s    r  c              	   C  s:   | dddd}t jtdd t|d|d W 5 Q R X d S )	NrG   r<   rH   rI   zis not a valid type for namesr   zCol1, Col2, Col3r   r   r   r   r   r   r1   r1   r2   test_names_option_wrong_type  s    r  c              	   C  s8   | dddd}t jtdd t||d W 5 Q R X d S )NrG   r<   rH   baby_names.xmlz'utf-8' codec can't decoder   rJ   r   r   UnicodeDecodeErrorr   r   r1   r1   r2   test_wrong_encoding  s    r  c              	   C  s:   | dddd}t jtdd t|d|d W 5 Q R X d S )	NrG   r<   rH   r  zIUTF-16 stream does not start with BOM|'utf-16-le' codec can't decode byter   zUTF-16encodingr6   )r   r   UnicodeErrorr   r   r1   r1   r2   test_utf16_encoding  s    r  c              	   C  s:   | dddd}t jtdd t|d|d W 5 Q R X d S )	NrG   r<   rH   r  zunknown encoding: UFT-8r   zUFT-8r  )r   r   LookupErrorr   r   r1   r1   r2   test_unknown_encoding  s    r  c              	   C  s:   | dddd}t jtdd t|d|d W 5 Q R X d S )	NrG   r<   rH   r  z'ascii' codec can't decode byter   asciir  r	  r   r1   r1   r2   test_ascii_encoding  s    r  c                 C  s   | dddd}t |ddd}t |dd	d}t |ddd
dddgid}t |ddd
dddgid}t|| t|| t|| d S )NrG   r<   rH   r  r4   z
ISO-8859-1r6   r  r5   z
iso-8859-1ra   ZrankZmalenameZ
femalename)r6   r  rR   rS   )rU   rV   Zdf_xpath_lxmlZdf_xpath_etreerW   rX   r1   r1   r2   %test_parser_consistency_with_encoding  s$    r  c               	   C  s4   d} t jtdd tt| dd d W 5 Q R X d S )N-<data>
  <row>
    <a>c</a>
  </row>
</data>
zencoding Noner   r4   r  )r   r   r   r   r   )r<   r1   r1   r2   test_wrong_encoding_for_lxml/  s    r  c                  C  s4   d} t t| dd d}tddgi}t|| d S )Nr  r5   r  r   c)r   r   r   r8   rT   )r<   resultexpectedr1   r1   r2   test_none_encoding_etree<  s    r  c              	   C  s4   | dddd}t jtdd t| W 5 Q R X d S )NrG   r<   rH   rI   z7lxml not found, please install or use the etree parser.r   )r   r   ImportErrorr   r   r1   r1   r2   test_default_parser_no_lxmlL  s     r  c              	   C  s8   | dddd}t jtdd t|dd W 5 Q R X d S )	NrG   r<   rH   rI   z,Values for parser can only be lxml or etree.r   Zbs4rJ   r   r   r1   r1   r2   test_wrong_parserV  s     r  c              	   C  sh   | dddd}| dddd}t |dddi|d	}t |d
ddddddgid}tt| tt| d S )NrG   r<   rH   r   flatten_doc.xsl.//k:Placemarkkr   r^   r   
stylesheetZ	Placemarkr#   r$   r%   r&   r'   r(   r   )r   r8   rT   df_kml)rU   kmlxsldf_stylerj   r1   r1   r2   test_stylesheet_fileb  s,    r(  c              	   C  sV   | dddd}| dddd}t ||}t|dddi|d	}W 5 Q R X tt| d S 
NrG   r<   rH   r   r  r   r!  r   r"  )r:   r   r8   rT   r$  )rU   r3   r%  r&  r?   r'  r1   r1   r2   test_stylesheet_file_like  s    r*  c              	   C  sx   | dddd}| dddd}t ||(}|dkr>t| }nt| }W 5 Q R X t|ddd	i|d
}tt| d S )NrG   r<   rH   r   r  r)   r   r!  r   r"  )r:   r   rC   r   r   r8   rT   r$  rU   r3   r%  r&  r?   xsl_objr'  r1   r1   r2   test_stylesheet_io  s    r-  c              	   C  s^   | dddd}| dddd}t ||}| }W 5 Q R X t|dddi|d	}tt| d S r)  )r:   rC   r   r8   rT   r$  r+  r1   r1   r2   test_stylesheet_buffered_reader  s    r.  c                  C  s,   d} d}t | }t | |d}t|| d S )Nr   u  <xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
 <xsl:output omit-xml-declaration="yes" indent="yes"/>
 <xsl:strip-space elements="*"/>

 <xsl:template match="node()|@*">
     <xsl:copy>
       <xsl:apply-templates select="node()|@*"/>
     </xsl:copy>
 </xsl:template>

 <xsl:template match="中文標籤">
     <根>
       <xsl:apply-templates />
     </根>
 </xsl:template>

</xsl:stylesheet>r#  rS   )rH   r&  Zdf_origr'  r1   r1   r2   test_style_charset  s
    r0  c              	   C  sR   ddl m} | dddd}| dddd}tj|dd	 t||d
 W 5 Q R X d S )Nr   XSLTParseErrorrG   r<   rH   r   rI   zdocument is not a stylesheetr   r/  r   r2  r   r   r   )rU   r2  r%  r&  r1   r1   r2   test_not_stylesheet  s
    r4  c              	   C  sH   ddl m} d}| dddd}tj|dd	 t||d
 W 5 Q R X d S )Nr   r   a  <xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
                              xmlns:k="http://www.opengis.net/kml/2.2"/>
    <xsl:output method="xml" omit-xml-declaration="yes"
                cdata-section-elements="k:description" indent="yes"/>
    <xsl:strip-space elements="*"/>

    <xsl:template match="node()|@*">
     <xsl:copy>
       <xsl:apply-templates select="node()|@*"/>
     </xsl:copy>
    </xsl:template>

    <xsl:template match="k:MultiGeometry|k:LineString">
        <xsl:apply-templates select='*'/>
    </xsl:template>

    <xsl:template match="k:description|k:Snippet|k:Style"/>
</xsl:stylesheet>rG   r<   rH   r   z(Extra content at the end of the documentr   r/  )r   r   r   r   r   )rU   r   r&  r%  r1   r1   r2   test_incorrect_xsl_syntax  s     r5  c              	   C  sH   ddl m} d}| dddd}tj|dd	 t||d
 W 5 Q R X d S )Nr   r1  a  <xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
                              xmlns:k="http://www.opengis.net/kml/2.2">
    <xsl:output method="xml" omit-xml-declaration="yes"
                cdata-section-elements="k:description" indent="yes"/>
    <xsl:strip-space elements="*"/>

    <xsl:template match="node(*)|@*">
     <xsl:copy>
       <xsl:apply-templates select="node()|@*"/>
     </xsl:copy>
    </xsl:template>

    <xsl:template match="k:MultiGeometry|k:LineString">
        <xsl:apply-templates select='*'/>
    </xsl:template>

    <xsl:template match="k:description|k:Snippet|k:Style"/>
</xsl:stylesheet>rG   r<   rH   r   zfailed to compiler   r/  r3  )rU   r2  r&  r%  r1   r1   r2   test_incorrect_xsl_eval  s
    r6  c              	   C  sH   ddl m} d}| dddd}tj|dd	 t||d
 W 5 Q R X d S )Nr   )XSLTApplyErrorag  <xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
    <xsl:output method="xml" encoding="utf-8" indent="yes" />
    <xsl:strip-space elements="*"/>

    <xsl:template match="@*|node()">
        <xsl:copy>
            <xsl:copy-of select="document('non_existent.xml')/*"/>
        </xsl:copy>
    </xsl:template>
</xsl:stylesheet>rG   r<   rH   r   zCannot resolve URIr   r/  )r   r7  r   r   r   )rU   r7  r&  r%  r1   r1   r2   test_incorrect_xsl_apply  s
    r8  c               	   C  sV   ddl m}  tjddd}tjddd}tj| dd t||d	 W 5 Q R X d S )
Nr   r   r<   rH   r   zflatten.xslr   r   r/  r   )r   r%  r&  r1   r1   r2   test_wrong_stylesheet5  s    r9  c              	   C  sn   | dddd}| dddd}t ||>}|dkr>t| }nt| }t||d |jr`tW 5 Q R X d S )NrG   r<   rH   r   r  r)   r/  )r:   r   rC   r   r   r   r   )rU   r3   r%  r&  r?   r,  r1   r1   r2   test_stylesheet_file_closeC  s    r:  c               	   C  sL   t jddd} t jddd}tjtdd t| d|d W 5 Q R X d S )	Nr<   rH   r   r  z*To use stylesheet, you need lxml installedr   r5   )r6   r#  )r   r>   r   r   r   r   r   )r%  r&  r1   r1   r2   test_stylesheet_with_etreeU  s     r;  c              	   C  sF   ddl m} tjddd}tj|dd t|| d W 5 Q R X d S )	Nr   r   r<   rH   r   z3Document is empty|Start tag expected, '<' not foundr   r/  r   )r   r   r%  r1   r1   r2   test_empty_stylesheet`  s     r<  c              	   C  s8   t jtdd  tt| dddddgid W 5 Q R X d S )	N)iterparse is designed for large XML filesr   ra   r   r   r   daterQ   )r   r   r   r   r   rJ   r1   r1   r2   test_string_errorp  s     r?  c                 C  s   | dddd}t ||v}|dkrn|dkrntjtdd" t||d	d
ddddgid W 5 Q R X W 5 Q R  d S t||d	d
ddddgid}W 5 Q R X tdddgdddgdddgdddgdddgd}t|| d S NrG   r<   rH   rI   r*   r4   z.reading file objects must return bytes objectsr   rK   rL   rM   rN   rO   rP   rQ   rm   rn   ro   rp   rq   rr   rs   rt   ru   rv   rw   rx   ry   rz   r{   )r:   r   r   r   r   r   r8   rT   )rU   r6   r3   rV   r?   Zdf_fileliker}   r1   r1   r2   test_file_like_iterparse{  s:      
rA  c           	      C  s  | dddd}|dkrt nt}t||}|| }|dkr|dkrtjtdd" t||d	d
ddddgid W 5 Q R X W 5 Q R  W 5 Q R  d S t||d	d
ddddgid}W 5 Q R X W 5 Q R X tdddgdddgdddgdddgdddgd}t	
|| d S r@  )r   r   r:   rC   r   r   r   r   r   r8   rT   )	rU   r6   r3   rV   ZfuncIOr?   bZ	df_fileior}   r1   r1   r2   test_file_io_iterparse  sB       
rC  c              	   C  s<   d}t jtdd  t|| dddddgid	 W 5 Q R X d S )
Nr   r=  r   ra   r   r   r   r>  rQ   r   r   r   r   r   r1   r1   r2   test_url_path_error  s     rE  c              
   C  sb   t jddL}tj|| |d tjtdd" t|| dddd	d
gi|d W 5 Q R X W 5 Q R X d S )Nzgeom_xml.zipr\   r6   rA   r=  r   ra   r   r   r   r>  r6   rR   rA   )r8   r9   geom_dfrh   r   r   r   r   )r6   rE   r>   r1   r1   r2   test_compression_error  s     rI  c              	   C  sD   | dddd}t jtdd t||ddd	d
dgd W 5 Q R X d S )NrG   r<   rH   rI   z&list is not a valid type for iterparser   rL   rM   rN   rO   rP   rQ   r  r   r1   r1   r2   test_wrong_dict_type  s    rJ  c              	   C  s>   | dddd}t jtdd t||ddid	 W 5 Q R X d S )
NrG   r<   rH   rI   z8<class 'str'> is not a valid type for value in iterparser   rK   rL   rQ   r  r   r1   r1   r2   test_wrong_dict_value  s     rK  c                 C  sx   d}t jdd^}t|d}|| W 5 Q R X tjtdd$ t|| dgdd	d
ddgid W 5 Q R X W 5 Q R X d S )Na  <?xml version='1.0' encoding='utf-8'?>
  <row>
    <shape>square</shape>
    <degrees>00360</degrees>
    <sides>4.0</sides>
    <date>2020-01-01</date>
   </row>
  <row>
    <shape>circle</shape>
    <degrees>00360</degrees>
    <sides/>
    <date>2021-01-01</date>
  </row>
  <row>
    <shape>triangle</shape>
    <degrees>00180</degrees>
    <sides>3.0</sides>
    <date>2022-01-01</date>
  </row>
zbad.xmlr\   r7   zDExtra content at the end of the document|junk after document elementr   r>  ra   r   r   r   )r6   Zparse_datesrR   )r8   r9   r:   r;   r   r   r   r   )r6   Zbad_xmlr>   r?   r1   r1   r2   test_bad_xml  s    rL  c                 C  sZ   d}t |d| d}t|| dddgid}tdd	gd
dgd}t|| t|| d S )Na-  <!-- comment before root -->
<shapes>
  <!-- comment within root -->
  <shape>
    <name>circle</name>
    <type>2D</type>
  </shape>
  <shape>
    <name>sphere</name>
    <type>3D</type>
    <!-- comment within child -->
  </shape>
  <!-- comment within root -->
</shapes>
<!-- comment after root -->r   r]   r   r$   r   rQ   r   r   r   r   r$   r   r   r   r1   r1   r2   test_comment  s      
rN  c                 C  sZ   d}t |d| d}t|| dddgid}tdd	gd
dgd}t|| t|| d S )Na8  <?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE non-profits [
    <!ELEMENT shapes (shape*) >
    <!ELEMENT shape ( name, type )>
    <!ELEMENT name (#PCDATA)>
]>
<shapes>
  <shape>
    <name>circle</name>
    <type>2D</type>
  </shape>
  <shape>
    <name>sphere</name>
    <type>3D</type>
  </shape>
</shapes>r   r]   r   r$   r   rQ   r   r   r   r   rM  r   r   r1   r1   r2   test_dtdB  s      
rO  c                 C  sZ   d}t |d| d}t|| dddgid}tdd	gd
dgd}t|| t|| d S )Nam  <?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="style.xsl"?>
<?display table-view?>
<?sort alpha-ascending?>
<?textinfo whitespace is allowed ?>
<?elementnames <shape>, <name>, <type> ?>
<shapes>
  <shape>
    <name>circle</name>
    <type>2D</type>
  </shape>
  <shape>
    <name>sphere</name>
    <type>3D</type>
  </shape>
</shapes>r   r]   r   r$   r   rQ   r   r   r   r   rM  r   r   r1   r1   r2   test_processing_instructionf  s      
rP  c              	   C  sF   | dddd}t jtdd  t||ddd	d
dgid W 5 Q R X d S )NrG   r<   rH   rI   z+No result from selected items in iterparse.r   nodeattr1elem1elem2elem3rQ   rD  r   r1   r1   r2   test_no_result  s     rV  c              	   C  sF   | dddd}t jtdd  t||ddd	d
dgid W 5 Q R X d S )NrG   r<   rH   rI   zNo columns to parse from filer   rK   rR  rS  rT  rU  rQ   )r   r   r   r   r   r1   r1   r2   test_empty_data  s    rW  4https://www.w3schools.com/xml/cdcatalog_with_xsl.xmlc                  C  sR   d} d}t | dddg|d}tddd	d
ddddddddd}t|| d S )NrX  z+https://www.w3schools.com/xml/cdcatalog.xslz.//tr[td and position() <= 6]rM   artist)r^   r   r#  zEmpire BurlesquezHide your heartzGreatest HitszStill got the bluesZErosr    z	Bob DylanzBonnie TylerzDolly Partonz
Gary MoorezEros Ramazzotti)rM   rY  r   )rH   r&  Zdf_xslr}   r1   r1   r2   test_online_stylesheet  s0    rZ  c              	   C  sl   t  B}tj|d| |d t|| |d}t||| ddddgi|d}W 5 Q R X t |t t |t d S )	NF)r`   r6   rA   rF  ra   r   r   r   rG  )r8   r9   rH  rh   r   rF   rT   )r6   rE   rD   ri   rj   r1   r1   r2   test_compression_read  s"    
   r[  c              
   C  s   |}|}||krd S t dft dftdftdfd}tddd}|d k	rT|jd	f|d
< tddd}|d k	rttdf|d< || \}}	t >}
tj	|
| |d t
j||	d t|
| |d W 5 Q R X W 5 Q R X d S )NzInvalid data streamzNot a gzipped filezFile is not a zip filez%file could not be opened successfully)bz2gzipziptarZ	zstandardignore)errorszUnknown frame descriptorzstdlzmaz%Input format not supported by decoderxzrF  r   )OSErrorr	   r   r   Z	ZstdErrorr   r8   r9   rH  rh   r   r   r   )r6   rA   rE   Zactual_compressionZattempted_compressionra  rb  rc  Z	error_clsZ	error_strr>   r1   r1   r2   test_wrong_compression  s(    
rf  c              
   C  s@   t jtdd( t }t|| dd W 5 Q R X W 5 Q R X d S )NzUnrecognized compression typer   Z7zrF  )r   r   r   r8   r9   r   )r6   r>   r1   r1   r2   test_unsuported_compression  s    
rg  Zs3fsz02022.1.17: Hanging on the CI min versions build.)reasonc                  C  sH   d} t | dddidddid}t | dddid	ddid}t|| d S )
Nz/s3://irs-form-990/201923199349319487_public.xmlz .//irs:Form990PartVIISectionAGrpZirszhttp://www.irs.gov/efiler4   ZanonT)r^   r   r6   Zstorage_optionsr5   rS   )Zs3r   r   r1   r1   r2   test_s3_parser_consistency  s     
ri  c                   sd  d}|dkr>t tjddgtjd}t tjdtgtjd}n.tdtddg}tdd g}t	d| t
|| |d}W 5 Q R X t|td	d
gddtddgdd|td
tgddtdtgddtttgddtddgddtdtgddd	|dkrTtdddlm  t fddjD  d d gd< t| d S )Na;  <?xml version='1.0' encoding='utf-8'?>
<data xmlns="http://example.com">
<row>
  <a>x</a>
  <b>1</b>
  <c>4.0</c>
  <d>x</d>
  <e>2</e>
  <f>4.0</f>
  <g></g>
  <h>True</h>
  <i>False</i>
</row>
<row>
  <a>y</a>
  <b>2</b>
  <c>5.0</c>
  <d></d>
  <e></e>
  <f></f>
  <g></g>
  <h>False</h>
  <i></i>
</row>
</data>pythonxre   )ZdtypeZpyarrowzmode.string_storage)r6   dtype_backendr!   r"   ZInt64r   g      @ZFloat64TFboolean)	r   rB  r  der?   rc   hir   )ArrowExtensionArrayc                   s$   i | ]}| j | d dqS )T)Zfrom_pandas)array).0colrr  r  par1   r2   
<dictcomp>\  s    z1test_read_xml_nullable_dtypes.<locals>.<dictcomp>rc   )r   nprs  Zobject_r   r   Zimportorskipr   pdZoption_contextr   r   r   Zpandas.arraysrr  columnsr8   rT   )r6   Zstring_storagerl  r<   Zstring_arrayZstring_array_nar  r1   rv  r2   test_read_xml_nullable_dtypes!  s>    


r|  c               	   C  s.   d} t jt| d tddd W 5 Q R X d S )NzPdtype_backend numpy is invalid, only 'numpy_nullable' and 'pyarrow' are allowed.r   testnumpy)rl  r   )r   r1   r1   r2   test_invalid_dtype_backendf  s    r  )
__future__r   rG   r   r   rc  r   r   tarfiler   urllib.errorr   xml.etree.ElementTreer   zipfiler	   r~  ry  r   Zpandas.compatr
   Zpandas.compat._optionalr   Zpandas.errorsr   r   Zpandas.util._test_decoratorsutilZ_test_decoratorstdZpandasrz  r   r   r   Zpandas._testingZ_testingr8   Zpandas.core.arraysr   r   Zpandas.io.commonr   Zpandas.io.xmlr   r   rH  r   r   r$  Zfixturer3   r.   Z
skip_if_nor6   r@   rF   rY   marknetworkZslowrk   r~   r   r   r   r   r   r   Zparametrizer   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r  r  r  r  r  r  r  r  r  Zskip_if_installedr  r  r(  r*  r-  r.  r0  r4  r5  r6  r8  r9  r:  r;  r<  r?  rA  rC  rE  rI  rJ  rK  rL  rN  rO  rP  rV  rW  rZ  r[  rf  rg  Zskipifri  r|  r  r1   r1   r1   r2   <module>   s  6
	m


+
	


	





	
$#(1



	













$(
)#$$
 $	E