U
    >vfX                     @   s  d Z ddlZddlmZ ddlZddlZddlmZ ddlZ	ddl
Z
ddlmZ ddlmZ ddlmZmZ ddlm  mZ ddlZddlmZ ddlmZ dd	lmZmZm Z m!Z!m"Z" zddl#Z#d
Z$W n e%k
r   dZ$Y nX zddl&Z&d
Z'W n e%k
r   dZ'Y nX e
j(e
j)de
j*j+e' p2eddkddde
j)de
j*j+e$ dddgddd Z,e
j(dd Z-e
j(dd Z.e
j(dd Z/e
j(dd Z0e
j(dd  Z1e
j(ej2ej3j4ej2ej3j5ej2ej3j6ej7d!d"ej7d#d"ej7d$d"ej7d%d"gdd&d' Z8dEd)d*Z9d+d, Z:d-d. Z;d/d0 Z<d1d2 Z=d3d4 Z>d5d6 Z?d7d8 Z@d9d: ZAd;d< ZBG d=d> d>ZCG d?d@ d@eCZDG dAdB dBeCZEG dCdD dDeCZFdS )Fz test parquet compat     N)BytesIO)catch_warnings)
get_option)is_platform_windows)pa_version_under7p0pa_version_under8p0)Version)FastParquetImplPyArrowImpl
get_engineread_parquet
to_parquetTFfastparquetmode.data_managerarrayz4fastparquet is not installed or ArrayManager is usedreason)Zmarkspyarrowpyarrow is not installed)paramsc                 C   s   | j S Nparamrequest r   @/tmp/pip-unpacked-wheel-vdrwu74i/pandas/tests/io/test_parquet.pyengine3   s    r   c                   C   s   t std dS )Nr   r   )_HAVE_PYARROWpytestskipr   r   r   r   paH   s    
r!   c                   C   s*   t std ntddkr&td dS )Nzfastparquet is not installedr   r   z.ArrayManager is not supported with fastparquetr   )_HAVE_FASTPARQUETr   r    r   r   r   r   r   fpO   s
    
r#   c                   C   s   t dddgddS )N         fooAB)pd	DataFramer   r   r   r   	df_compatX   s    r-   c               
   C   sD   t tdttddtjdddddd	dgt jd
ddd} | S )Nabcr$            @      @float64dtypeTF20130101r&   periods)abdef)r+   r,   listrangenparange
date_range)dfr   r   r   df_cross_compat]   s    rC   c                   C   s   t tddtjdgdd dgdddgddd	gttd
dtdddtjdddddtjdgdddgt jdddt 	dt j
t 	dgdS )Nr.   r8   c   foo   bars   bazr'   barbazr$   r/   r&      u1r0   r1   r2   r3          @      @TFr5   r6   Z20130103)stringZstring_with_nanZstring_with_nonebytesunicodeintZuintfloatZfloat_with_nanbooldatetimedatetime_with_nat)r+   r,   r=   r?   nanr>   r@   astyperA   	TimestampZNaTr   r   r   r   df_fullo   s$    

rX   z2019-01-04T16:41:24+0200z%Y-%m-%dT%H:%M:%S%zz2019-01-04T16:41:24+0215z2019-01-04T16:41:24-0200z2019-01-04T16:41:24-0215c                 C   s   | j S r   r   r   r   r   r   timezone_aware_date_list   s    rY   r%   c
              	      s   p
ddipi dkr |r4|d< |d<  fdd}
dkrtt  |
|	 W 5 Q R X n|
|	 dS )a  Verify parquet serializer and deserializer produce the same results.

    Performs a pandas to disk and disk to pandas round trip,
    then compares the 2 resulting DataFrames to verify equality.

    Parameters
    ----------
    df: Dataframe
    engine: str, optional
        'pyarrow' or 'fastparquet'
    path: str, optional
    write_kwargs: dict of str:str, optional
    read_kwargs: dict of str:str, optional
    expected: DataFrame, optional
        Expected deserialization result, otherwise will be equal to `df`
    check_names: list of str, optional
        Closed set of column names to be compared
    check_like: bool, optional
        If True, ignore the order of index & columns.
    repeat: int, optional
        How many times to repeat the test
    compressionNr   c              
      sV   t | D ]H}jf tdd tf}W 5 Q R X tj| d qd S )NTrecord)check_names
check_likecheck_dtype)r>   r   r   r   tmassert_frame_equal)repeat_actualr_   r^   r]   rB   expectedpathread_kwargswrite_kwargsr   r   compare   s    z!check_round_trip.<locals>.compare)r`   ensure_clean)rB   r   rg   ri   rh   rf   r]   r^   r_   rb   rj   r   re   r   check_round_trip   s    "
rl   c                 C   s|   t rLddlm} |j| dd}t|jjt|ks6t|jjt|ksxtn,ddl	m
} |j
| dd}|jjj|ksxtdS )zCheck partitions of a parquet file are as expected.

    Parameters
    ----------
    path: str
        Path of the dataset.
    expected: iterable of str
        Expected partition names.
    r   NF)Zvalidate_schemaZhive)partitioning)r   pyarrow.parquetparquetZParquetDatasetlenZ
partitionsZpartition_namesAssertionErrorsetZpyarrow.datasetdatasetrm   schemanames)rg   rf   pqrs   Zdsr   r   r   check_partition_names   s    
rw   c              	   C   s.   d}t jt|d t| dd W 5 Q R X d S )Nz.engine must be one of 'pyarrow', 'fastparquet'matchr'   rG   )r   raises
ValueErrorrl   )r-   msgr   r   r   test_invalid_engine   s    r}   c              	   C   s$   t dd t|  W 5 Q R X d S )Nio.parquet.enginer   r+   option_contextrl   )r-   r!   r   r   r   test_options_py   s    r   c              	   C   s$   t dd t|  W 5 Q R X d S )Nr~   r   r   )r-   r#   r   r   r   test_options_fp   s    r   c              	   C   s$   t dd t|  W 5 Q R X d S )Nr~   autor   )r-   r#   r!   r   r   r   test_options_auto  s    r   c              	   C   s  t tdtstt tdts$ttdd< t tdtsDtt tdtsVtt tdtshtW 5 Q R X tdd< t tdtstt tdtstt tdtstW 5 Q R X tdd> t tdtstt tdtstt tdtstW 5 Q R X d S )Nr   r   r~   r   )
isinstancer   r
   rq   r	   r+   r   )r#   r!   r   r   r   test_options_get_engine	  s    r   c               	   C   s.  ddl m}  | d}| d}ts(dnttjt|k }tsBdnttjt|k }to\| }tof| }|s*|s*|rd| d}t	j
t|d td	 W 5 Q R X n&d
}t	j
t|d td	 W 5 Q R X |rd| d}t	j
t|d td	 W 5 Q R X n&d}t	j
t|d td	 W 5 Q R X d S )Nr   )VERSIONSr   r   FzPandas requires version .z. or newer of .pyarrow.rx   r   z%Missing optional dependency .pyarrow.z. or newer of .fastparquet.z)Missing optional dependency .fastparquet.)Zpandas.compat._optionalr   getr   r   r   __version__r"   r   r   rz   ImportErrorr   )r   Z
pa_min_verZ
fp_min_verZhave_pa_bad_versionZhave_fp_bad_versionZhave_usable_paZhave_usable_fpry   r   r   r   "test_get_engine_auto_error_message  s8    



r   c              	   C   sj   | }t  T}|j||d d t||d}t || t||ddgd}t ||ddg  W 5 Q R X d S )Nr   rZ   r   r8   r:   r   columns)r`   rk   r   r   ra   rC   r!   r#   rB   rg   resultr   r   r   test_cross_engine_pa_fpH  s    
r   c              
   C   s   | }t  j}|j||d d tddD t||d}t || t||ddgd}t ||ddg  W 5 Q R X W 5 Q R X d S )Nr   Tr[   r   r8   r:   r   )r`   rk   r   r   r   ra   r   r   r   r   test_cross_engine_fp_paV  s    
r   c                   @   s:   e Zd Zdd Zdd Zejjejddddd	 Z	d
S )Basec              
   C   sB   t  0}tj||d t|||d d W 5 Q R X W 5 Q R X d S )Nrx   rZ   )r`   rk   r   rz   r   )selfrB   r   excerr_msgrg   r   r   r   check_error_on_writee  s    
zBase.check_error_on_writec              
   C   s>   t  ,}t | t|||d d W 5 Q R X W 5 Q R X d S )Nr   )r`   rk   external_error_raisedr   )r   rB   r   r   rg   r   r   r   check_external_error_on_writek  s    
z"Base.check_external_error_on_writedhttps://raw.githubusercontent.com/pandas-dev/pandas/main/pandas/tests/io/data/parquet/simple.parquetT)urlZcheck_before_testc                 C   s.   |dkrt | d}t|}t|| d S )Nr   r   )r   importorskipr   r`   ra   )r   r-   r   r   rB   r   r   r   test_parquet_read_from_urlq  s    	
zBase.test_parquet_read_from_urlN)
__name__
__module____qualname__r   r   r   marknetworkr`   r   r   r   r   r   r   d  s   r   c                   @   s   e Zd Zdd Zdd Zejddddd	gd
d Zdd Z	dd Z
dd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zejjed d!d"d# Zejd$d%d&d'd(d)d*d+d,d-g	d.d/ ZdS )0	TestBasicc              	   C   sJ   t dddgddt dtdddgfD ]}d}| ||t| q,d S )Nr$   r%   r&   r'   r5   z+to_parquet only supports IO with DataFrames)r+   SeriesrW   r?   r   r   r{   )r   r   objr|   r   r   r   
test_error  s    zTestBasic.test_errorc                 C   s6   t tdttddd}ddg|_t|| d S )Nr.   r$   r/   rM   rP   r'   rG   )r+   r,   r=   r>   r   rl   )r   r   rB   r   r   r   test_columns_dtypes  s    
zTestBasic.test_columns_dtypesrZ   Ngzipsnappybrotlic                 C   sP   |dkrt d n|dkr&t d tddddgi}t||d|id d S )	Nr   r   r)   r$   r%   r&   rZ   ri   )r   r   r+   r,   rl   )r   r   rZ   rB   r   r   r   test_compression  s    
zTestBasic.test_compressionc                 C   sJ   t tdttddd}t dtdi}t|||ddgid d S )Nr.   r$   r/   r   rM   r   rf   rh   )r+   r,   r=   r>   rl   )r   r   rB   rf   r   r   r   test_read_columns  s       zTestBasic.test_read_columnsc                 C   s   |dk}t ddddgi}t|| dddgt jdddtd	dddgg}|D ]2}||_t|t jrt|jd |_t|||d
 qPdddg|_d|j_	t|| d S )Nr   r)   r$   r%   r&   r/   r5   r6   r.   )r]   r   r'   )
r+   r,   rl   rA   r=   indexr   ZDatetimeIndex
_with_freqname)r   r   r]   rB   Zindexesr   r   r   r   test_write_index  s     
zTestBasic.test_write_indexc                 C   s>   |}t ddddgi}t jdddg}||_t|| d S )Nr)   r$   r%   r&   r8   r$   r8   r%   r9   r$   )r+   r,   
MultiIndexfrom_tuplesr   rl   )r   r!   r   rB   r   r   r   r   test_write_multiindex  s
    zTestBasic.test_write_multiindexc                 C   s   |}t jdddd}t jtjdt| dtdd}t jj	d	d
g|gddgd}|j
d d}||fD ]4}||_t|| t||dddgi|ddg d qhd S )Nz01-Jan-2018z01-Dec-2018ZMS)freqr%   r&   ABCr   Level1Level2leveldate)ru   r   r)   r*   rh   rf   )r+   rA   r,   r?   randomrandnrp   r=   r   Zfrom_productcopyr   rl   )r   r!   r   datesrB   Zindex1index2r   r   r   r   test_multiindex_with_columns  s"    $
 
  
 
z&TestBasic.test_multiindex_with_columnsc              	   C   s   t dddgdddgd}d dd	}|jd
d}t||||d t jdddgdddgddddgd}t||||d ddddddddgddddddddgg}t jttddd tdD d|d}|jd
d}t||||d d S )Nr$   r%   r&   qrs)r8   r9   F)rZ   r   T)dropri   rf   ZzyxZwvuZtsrr   rG   rH   r'   quxonetwo   c                 S   s   g | ]
}| qS r   r   ).0ir   r   r   
<listcomp>  s     z7TestBasic.test_write_ignoring_index.<locals>.<listcomp>)r   r   )r+   r,   Zreset_indexrl   r=   r>   )r   r   rB   ri   rf   arraysr   r   r   test_write_ignoring_index  s$    
  z#TestBasic.test_write_ignoring_indexc                 C   sZ   t jdddg}t jtjdd|d}|dkrD| ||td n|d	krVt	|| d S )
Nr   r   r   r/   r&   r   r   Column name must be a stringr   )
r+   r   r   r,   r?   r   r   r   	TypeErrorrl   )r   r   Z
mi_columnsrB   r   r   r   test_write_column_multiindex  s       z&TestBasic.test_write_column_multiindexc              	   C   s   ddddddddgddddddddgg}t jtjdd|d}d	d
g|j_|dkrttj	tdk rlt
}nt}| |||d n|dkrt|| d S )NrG   rH   r'   r   r$   r%   r   r   r   r   r   z0.7.0zColumn namer   )r+   r,   r?   r   r   r   ru   r   r   r   r   r{   r   rl   )r   r   r   rB   errr   r   r   &test_write_column_multiindex_nonstring  s    z0TestBasic.test_write_column_multiindex_nonstringc              	   C   s^   |}ddddddddgddddddddgg}t jtjdd|d}d	d
g|j_t|| d S )NrG   rH   r'   r   r   r   r   r   Z	ColLevel1Z	ColLevel2)r+   r,   r?   r   r   r   ru   rl   r   r!   r   r   rB   r   r   r   #test_write_column_multiindex_string  s    z-TestBasic.test_write_column_multiindex_stringc                 C   s>   |}ddddg}t jtjdd|d}d|j_t|| d S )	NrG   rH   r'   r   r   r/   r   Z	StringCol)r+   r,   r?   r   r   r   r   rl   r   r   r   r   test_write_column_index_string*  s
    z(TestBasic.test_write_column_index_stringc                 C   sT   ddddg}t jtjdd|d}d|j_|dkrF| ||td	 n
t	|| d S )
Nr$   r%   r&   r/   r   r   ZNonStringColr   r   )
r+   r,   r?   r   r   r   r   r   r   rl   )r   r   r   rB   r   r   r   !test_write_column_index_nonstring6  s       z+TestBasic.test_write_column_index_nonstringzminimum pyarrow not installedr   c           
      C   s  dd l m} |dkr.tjjdd}|j| tt	dddd gdt	dddd gd	t	d
ddd gt	dddd gt	ddddgdt	dddd gdt	dddd gdd}t
 ,}||| t||d}t||dd}W 5 Q R X |d
 jtdkstttj	dddd gddtj	dddd gddtj	d
ddd gddtj	dddd gddtj	ddddgddtj	dddd gddtj	dddd gddd}	|dkr|jddd }|	jddd }	t
||	 d S )!Nr   r   z.Fastparquet nullable dtype support is disabledr   r$   r%   r&   Zint64Zuint8r8   r9   rD   TFr/         ?rK   rL   Zfloat32r2   )r8   r9   rD   r:   r;   r<   gr   numpy_nullabler   dtype_backendInt64r3   UInt8rM   booleanZFloat32Float64)Zaxis)rn   ro   r   r   xfailnode
add_markerr   tabler   r`   rk   Zwrite_tabler   r4   r?   rq   r+   r,   r   ra   )
r   r   r   rv   r   r   rg   Zresult1Zresult2rf   r   r   r   test_dtype_backendD  sF    

zTestBasic.test_dtype_backendr4   r   r   r   objectzdatetime64[ns, UTC]rQ   z	period[D]r   rM   c                 C   sT   t dt jg |di}d }|dkr<t dt jg ddi}t||ddi|d d S )Nvaluer3   rQ   r   r   r   r   )r+   r,   r   rl   )r   r!   r4   rB   rf   r   r   r   test_read_empty_arrayv  s$         zTestBasic.test_read_empty_array)r   r   r   r   r   r   r   parametrizer   r   r   r   r   r   r   r   r   r   r   skipifr   r   r   r   r   r   r   r     s<   

		 
1r   c                   @   s  e Zd Zdd Zdd Zdd Zdd Zd	d
 Zdd Zdd Z	e
jje dde
jdeejgdd Zdd Ze
jjdd Ze
jjdd Ze
jjede
jddgg gdd Zedd d! Zedd"d# Zd$d% Zd&d' Ze
jjded(d) gd*d+gd,d-d. Zd/d0 Zd1d2 Z edd3d4 Z!edd5d6 Z"edd7d8 Z#d9d: Z$d;d< Z%edd=d> Z&d?d@ Z'dAdB Z(dCdD Z)dEdF Z*dGdH Z+dIS )JTestParquetPyArrowc                 C   sB   |}t jdddd}|d }||d< dd dg|d< t|| d S )Nr5   r&   Europe/Brusselsr7   tzdatetime_tzTbool_with_none)r+   rA   r   rl   )r   r!   rX   rB   dtir   r   r   
test_basic  s    
zTestParquetPyArrow.test_basicc                 C   s<   |}t jdddd|d< t|||ddg dddgid	 d S )
Nr5   r&   r   r   r   rM   rP   r   r   )r+   rA   rl   )r   r!   rX   rB   r   r   r   test_basic_subset_columns  s    

z,TestParquetPyArrow.test_basic_subset_columnsc                 C   s:   |j |d}t|tstt|}t|}t|| d S )Nr   )r   r   rN   rq   r   r   r`   ra   )r   r!   rX   Z	buf_bytesZ
buf_streamresr   r   r   *test_to_bytes_without_path_or_buf_provided  s
    z=TestParquetPyArrow.test_to_bytes_without_path_or_buf_providedc                 C   s8   t jtdddtdd }| ||td d S )N   r/   r&   aaar   zDuplicate column names found	r+   r,   r?   r@   Zreshaper=   r   r   r{   r   r!   rB   r   r   r   test_duplicate_columns  s    $z)TestParquetPyArrow.test_duplicate_columnsc                 C   s:   t dt jdddi}tr,| ||t n
t|| d S )Nr8   1 dayr&   r6   )r+   r,   timedelta_ranger   r   NotImplementedErrorrl   r  r   r   r   test_timedelta  s    z!TestParquetPyArrow.test_timedeltac                 C   s(   t ddddgi}| ||tj d S )Nr8   r$   rK   )r+   r,   r   r   ArrowExceptionr  r   r   r   test_unsupported  s    z#TestParquetPyArrow.test_unsupportedc                 C   s6   t jddt jd}tj|dgd}| ||tj d S )Nr%   
   r3   fp16datar   )r?   r@   float16r+   r,   r   r   r	  )r   r!   r  rB   r   r   r   test_unsupported_float16  s    z+TestParquetPyArrow.test_unsupported_float16zqPyArrow does not cleanup of partial files dumps when unsupported dtypes are passed to_parquet function in windowsr   	path_typec              
   C   sx   t jddt jd}tj|dgd}t D}||}ttj	 |j
||d W 5 Q R X tj|rjtW 5 Q R X d S )Nr%   r  r3   r  r  )rg   r   )r?   r@   r  r+   r,   r`   rk   r   r   r	  r   osrg   isfilerq   )r   r!   r  r  rB   Zpath_strrg   r   r   r    test_unsupported_float16_cleanup  s    
z3TestParquetPyArrow.test_unsupported_float16_cleanupc                 C   sx   t  }t td|d< t jddddd dgt dddgd|d< t jddddddgddd	gd
d|d< t|| d S )NZabcdefr8   rG   r'   rH   r3   r9   rD   r:   T)
categoriesZordered)r+   r,   Categoricalr=   ZCategoricalDtyperl   r  r   r   r   test_categorical  s    
  
z#TestParquetPyArrow.test_categoricalc                 C   s4   t d}|jf |}d|i}t||d||d d S )Ns3fs
filesystemzpandas-test/pyarrow.parquetrg   rh   ri   )r   r   ZS3FileSystemrl   )r   r-   s3_resourcer!   s3sor  Zs3kwr   r   r   test_s3_roundtrip_explicit_fs  s    
z0TestParquetPyArrow.test_s3_roundtrip_explicit_fsc                 C   s   d|i}t ||d||d d S )Nstorage_optionsz s3://pandas-test/pyarrow.parquetr  rl   )r   r-   r  r!   r  r   r   r   test_s3_roundtrip  s    z$TestParquetPyArrow.test_s3_roundtripr  partition_colr)   c              
   C   s^   |  }|r6|t|tj}d}|| |||< t|||dd|i|d |dddd d S )Ncategoryzs3://pandas-test/parquet_dirr  )partition_colsrZ   r  Tr$   )rf   rg   rh   ri   r^   rb   )r   rV   dictfromkeysr?   Zint32rl   )r   r-   r  r!   r"  r  Zexpected_dfZpartition_col_typer   r   r   test_s3_roundtrip_for_dir  s(    z,TestParquetPyArrow.test_s3_roundtrip_for_dirr   c                 C   s(   t  }|| t|}t|| d S r   )r   r   r   r`   ra   )r   r-   bufferZdf_from_bufr   r   r   test_read_file_like_obj_support>  s    
z2TestParquetPyArrow.test_read_file_like_obj_supportc              	   C   sb   | dd | dd tjtdd td W 5 Q R X tjtdd |d W 5 Q R X d S )NHOMEZTestingUserUSERPROFILEz.*TestingUser.*rx   z~/file.parquet)Zsetenvr   rz   OSErrorr   r   )r   r-   Zmonkeypatchr   r   r   test_expand_userE  s    z#TestParquetPyArrow.test_expand_userc                 C   s>   ddg}|}|j ||d d t|| t|j|jks:td S )NrR   rP   r$  rZ   r   rw   r   shaperq   )r   tmp_pathr!   rX   r$  rB   r   r   r   test_partition_cols_supportedN  s
    
z0TestParquetPyArrow.test_partition_cols_supportedc                 C   s@   d}|g}|}|j ||d d t|| t|j|jks<td S )NrR   r.  r/  )r   r1  r!   rX   r$  partition_cols_listrB   r   r   r   test_partition_cols_stringV  s    
z-TestParquetPyArrow.test_partition_cols_stringc                 C   s   | S r   r   )xr   r   r   <lambda>`      zTestParquetPyArrow.<lambda>rM   zpathlib.Path)idsc           	      C   s<   d}|g}|}||}|j ||d t|j|jks8td S )Nr*   )r$  )r   r   r0  rq   )	r   r1  r!   r-   r  r$  r3  rB   rg   r   r   r   test_partition_cols_pathlib_  s    z.TestParquetPyArrow.test_partition_cols_pathlibc                 C   s   t jg g d}t|| d S )N)r   r   )r+   r,   rl   r  r   r   r   test_empty_dataframem  s    z'TestParquetPyArrow.test_empty_dataframec                 C   sV   dd l }tdddgi}||jd| dg}|t}t||d|i|d d S )Nr   r5  r$   )typert   r   )	r   r+   r,   rt   fieldZbool_rV   rR   rl   )r   r!   r   rB   rt   Zout_dfr   r   r   test_write_with_schemar  s
    
z)TestParquetPyArrow.test_write_with_schemac                 C   sz   t t jdddgddt jdddgddt jdd dgd	dd
}t|| t dt jdddd gddi}t|| d S )Nr$   r%   r&   r   r3   ZUInt32r8   rD   rM   )r8   r9   rD   )r+   r,   r   rl   r  r   r   r    test_additional_extension_arraysz  s    
 z3TestParquetPyArrow.test_additional_extension_arraysc              	   C   sV   t dt jdd dgddi}t d|" t|||d| dd W 5 Q R X d S )	Nr8   rD   zstring[pyarrow]r3   string_storagezstring[]rf   )r+   r,   r   r   rl   rV   )r   r!   r?  rB   r   r   r    test_pyarrow_backed_string_array  s    z3TestParquetPyArrow.test_pyarrow_backed_string_arrayc                 C   sN   t t jdddgt jddddt jt jddddd	}t|| d S )
N)r   r$   )r$   r%   )r&   r/   z
2012-01-01r&   D)r7   r   r/   )rD   r:   r;   )r+   r,   ZIntervalIndexr   period_rangeZfrom_breaksrA   rl   r  r   r   r   test_additional_extension_types  s    
z2TestParquetPyArrow.test_additional_extension_typesc                 C   s>   t s
d}nd}tdtjddddi}t||d|id	 d S )
Nz2.6z2.0r8   z
2017-01-01Z1nr  r   r7   versionr   )r   r+   r,   rA   rl   )r   r!   verrB   r   r   r   test_timestamp_nanoseconds  s
    z-TestParquetPyArrow.test_timestamp_nanosecondsc                 C   sV   t s(|jtjjkr(|jtjj	dd d|g }t
j|d|id}t||dd d S )Nzitemporary skip this test until it is properly resolved: https://github.com/pandas-dev/pandas/issues/37286r      index_as_colr   r  F)r_   )r   tzinforS   timezoneutcr   r   r   r   r   r+   r,   rl   )r   r   r!   rY   idxrB   r   r   r   test_timezone_aware_index  s    
z,TestParquetPyArrow.test_timezone_aware_indexc              	   C   s^   t dttddi}t $}||| t||dgdd}W 5 Q R X t|dksZt	d S )Nr8   r   r&   r8   z==r   F)filtersZuse_legacy_datasetr$   )
r+   r,   r=   r>   r`   rk   r   r   rp   rq   )r   r!   rB   rg   r   r   r   r   test_filter_row_groups  s    
   z)TestParquetPyArrow.test_filter_row_groupsc              	   C   s~   t jtjdddddgd}t }||| t||}W 5 Q R X |rdt	|j
t jjjsztnt	|j
t jjjsztd S )Nr  r&   r)   r*   Cr   )r+   r,   r?   r   r   r`   rk   r   r   r   Z_mgrcoreZ	internalsZArrayManagerrq   ZBlockManager)r   r!   Zusing_array_managerrB   rg   r   r   r   r   test_read_parquet_manager  s    
z,TestParquetPyArrow.test_read_parquet_managerc                 C   s   dd l }|}tjdddd}|d }||d< dd dg|d< |j|}|jtjd	}|d
 d|d
< |d d|d< |d t|j	ddd|d< t
||ddi|d d S )Nr   r5   r&   r   r   r   Tr   )Ztypes_mapperrS   ztimestamp[us][pyarrow]rT   us)unitr   r   r   r   rh   rf   )r   r+   rA   r   ZTableZfrom_pandasZ	to_pandasZ
ArrowDtyperV   	timestamprl   )r   r!   rX   r   rB   r   Zpa_tablerf   r   r   r   &test_read_dtype_backend_pyarrow_config  s*    
z9TestParquetPyArrow.test_read_dtype_backend_pyarrow_configc                 C   sn   t jdddgit jddgdddd	}| }d
d l}t|jtdkrV|jd|_t	||ddi|d d S )Nr8   r$   r%   r&   r/   testr   zint64[pyarrow])r   r4   r   z11.0.0r   r   rZ  )
r+   r,   Indexr   r   r   r   r   rV   rl   )r   r!   rB   rf   r   r   r   r   ,test_read_dtype_backend_pyarrow_config_index  s    
  z?TestParquetPyArrow.test_read_dtype_backend_pyarrow_config_indexc              	   C   s   t tdttddd}ddg|_t|| ddg|_tjtdd	 t|| W 5 Q R X t		d
ddddt		d
ddddg|_t|| d S )Nr.   r$   r/   r   r   rE   rF   z|S3rx     )
r+   r,   r=   r>   r   rl   r   rz   r  rS   r  r   r   r   test_columns_dtypes_not_invalid	  s    


z2TestParquetPyArrow.test_columns_dtypes_not_invalidc                 C   s*   t jt jdddgddd}t|| d S )Nr8   r9   rD   custom namer^  r   r+   r,   r_  rl   r  r   r   r   test_empty_columns  s    z%TestParquetPyArrow.test_empty_columnsN),r   r   r   r   r   r   r  r  r
  r  r   r   r   r   r   strpathlibPathr  r  
single_cpur  r!  tdZ
skip_if_nor'  r)  r-  r2  r4  r9  r:  r=  r>  rB  rE  rI  rQ  rT  rW  r\  r`  rb  re  r   r   r   r   r     sp   



	 
 





r   c                   @   s   e Zd Zdd Zdd Zdd Zdd Zd	d
 Zdd Zdd Z	e
jjdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd  Zd!d" Zd#d$ Zd%d& Zd'S )(TestParquetFastParquetc                 C   sF   |}t jdddd}|d }||d< t jddd|d< t|| d S )	Nr5   r&   z
US/Easternr   r   r  r6   	timedelta)r+   rA   r   r  rl   )r   r#   rX   rB   r   r   r   r   r   $  s    
z!TestParquetFastParquet.test_basicc                 C   s   t tdttddd}t}d}ddg|_| |||| ddg|_| |||| td	ddddtd	ddddg|_| |||| d S )
Nr.   r$   r/   r   r   r   rE   rF   ra  )r+   r,   r=   r>   r   r   r   rS   )r   r#   rB   r   r|   r   r   r   test_columns_dtypes_invalid-  s    

z2TestParquetFastParquet.test_columns_dtypes_invalidc                 C   s<   t jtdddtdd }d}| ||t| d S )Nr   r/   r&   r  r   z9Cannot create parquet dataset with duplicate column namesr  r   r#   rB   r|   r   r   r   r  B  s    $z-TestParquetFastParquet.test_duplicate_columnsc                 C   sB   t ddd dgi}t jddtjdgidd}t|||dd d S )	Nr8   TFr   g        r  r3   )rf   r_   )r+   r,   r?   rU   rl   r   r#   rB   rf   r   r   r   test_bool_with_noneH  s    z*TestParquetFastParquet.test_bool_with_nonec                 C   sV   t dt jddddi}| ||td  t ddddgi}d}| ||t| d S )	Nr8   Z2013Mr&   rF  r$   rK   z"Can't infer object conversion type)r+   r,   rD  r   r{   rn  r   r   r   r
  O  s
    z'TestParquetFastParquet.test_unsupportedc                 C   s&   t dt tdi}t|| d S )Nr8   r.   )r+   r,   r  r=   rl   )r   r#   rB   r   r   r   r  Z  s    z'TestParquetFastParquet.test_categoricalc              	   C   sf   dt tddi}t|}t (}|j||d dd t||dgd}W 5 Q R X t|dksbt	d S )Nr8   r   r&   r$   )rZ   Zrow_group_offsetsrR  )rS  )
r=   r>   r+   r,   r`   rk   r   r   rp   rq   )r   r#   r:   rB   rg   r   r   r   r   rT  ^  s    

z-TestParquetFastParquet.test_filter_row_groupsc                 C   s    t ||dd|id |dd d S )Nz$s3://pandas-test/fastparquet.parquetr  )rZ   r  r  r   )r   r-   r  r#   r  r   r   r   r!  f  s    z(TestParquetFastParquet.test_s3_roundtripc                 C   s\   ddg}|}|j |d|d d tj|s.tdd l}|t|dj}t	|dksXtd S )NrR   rP   r   r   r$  rZ   r   Fr%   
r   r  rg   existsrq   r   ZParquetFilerf  Zcatsrp   r   r1  r#   rX   r$  rB   r   Zactual_partition_colsr   r   r   r2  q  s    z4TestParquetFastParquet.test_partition_cols_supportedc                 C   sX   d}|}|j |d|d d tj|s*tdd l}|t|dj}t	|dksTtd S )NrR   r   rr  r   Fr$   rs  ru  r   r   r   r4    s    z1TestParquetFastParquet.test_partition_cols_stringc                 C   s\   ddg}|}|j |dd |d tj|s.tdd l}|t|dj}t	|dksXtd S )NrR   rP   r   )r   rZ   partition_onr   Fr%   rs  ru  r   r   r   test_partition_on_supported  s    z2TestParquetFastParquet.test_partition_on_supportedc              	   C   sB   ddg}|}d}t jt|d |j|dd ||d W 5 Q R X d S )NrR   rP   zYCannot use both partition_on and partition_cols. Use partition_cols for partitioning datarx   r   )r   rZ   rv  r$  )r   rz   r{   r   )r   r1  r#   rX   r$  rB   r|   r   r   r   3test_error_on_using_partition_cols_and_partition_on  s    zJTestParquetFastParquet.test_error_on_using_partition_cols_and_partition_onc                 C   s"   t  }| }t|||d d S )NrA  )r+   r,   r   rl   ro  r   r   r   r:    s    z+TestParquetFastParquet.test_empty_dataframec                 C   s>   d|g }t j|d|id}| }d|j_t|||d d S )NrJ  rK  rL  r   rA  )r+   r,   r   r   r   rl   )r   r#   rY   rP  rB   rf   r   r   r   rQ    s
    
z0TestParquetFastParquet.test_timezone_aware_indexc                 C   s   t dddgi}t v}|| tjtdd* tt	 t
|ddd W 5 Q R X W 5 Q R X tjtdd t
|dd	d
 W 5 Q R X W 5 Q R X d S )Nr8   r$   r%   z!not supported for the fastparquetrx   r   T)r   Zuse_nullable_dtypesr   r   )r+   r,   r`   rk   r   r   rz   r{   Zassert_produces_warningFutureWarningr   )r   r#   rB   rg   r   r   r   &test_use_nullable_dtypes_not_supported  s    

"z=TestParquetFastParquet.test_use_nullable_dtypes_not_supportedc              
   C   sb   t dN}t|d tjtdd t|dd W 5 Q R X t|j	dd W 5 Q R X d S )	Ntest.parquets   breakit rx   r   r   F)
missing_ok)
r`   rk   rg  rh  write_bytesr   rz   	Exceptionr   unlink)r   rg   r   r   r   $test_close_file_handle_on_read_error  s
    z;TestParquetFastParquet.test_close_file_handle_on_read_errorc              
   C   sp   t jddgddgdd}td6}t| d}|| W 5 Q R X t||d}W 5 Q R X t|| d S )Nr   r$   r(   )r  r{  wbr   )	r+   r,   r`   rk   openencoder   r   ra   )r   r   rB   rg   r<   r   r   r   r   test_bytes_file_name  s    z+TestParquetFastParquet.test_bytes_file_namec              
   C   sf   d}t dttddi}td6}|| tjt	|d t
|dd W 5 Q R X W 5 Q R X d S )	NzPdtype_backend numpy is invalid, only 'numpy_nullable' and 'pyarrow' are allowed.rP   r$   r/   ztmp.parquetrx   numpy)r   )r+   r,   r=   r>   r`   rk   r   r   rz   r{   r   )r   r   r|   rB   rg   r   r   r   test_invalid_dtype_backend  s    
z1TestParquetFastParquet.test_invalid_dtype_backendc                 C   sJ   t jt jdddgddd}t jt jdddgddd}t|||d d S )Nr8   r9   rD   rc  r^  r   rA  rd  ro  r   r   r   re    s    z)TestParquetFastParquet.test_empty_columnsN)r   r   r   r   rm  r  rp  r
  r  rT  r   r   ri  r!  r2  r4  rw  rx  r:  rQ  rz  r  r  r  re  r   r   r   r   rk  #  s(   	

	
rk  )	NNNNNTFTr%   )G__doc__rS   ior   r  rg  warningsr   r  r?   r   Zpandas._configr   Zpandas.compatr   Zpandas.compat.pyarrowr   r   Zpandas.util._test_decoratorsutilZ_test_decoratorsrj  Zpandasr+   Zpandas._testingZ_testingr`   Zpandas.util.versionr   Zpandas.io.parquetr	   r
   r   r   r   r   r   r   r   r"   Zfixturer   r   r   r   r!   r#   r-   rC   rX   nowrN  rO  minmaxstrptimerY   rl   rw   r}   r   r   r   r   r   r   r   r   r   r   rk  r   r   r   r   <module>   s   

 






         
A+      