U
    >vfX                     @   s  d dl Zd dlZd dlmZ d dlmZ d dlZd dl	m
Z d dlmZ ejddddgddggd	d
gd	dgd	dgdfdddgdejgddggd	dd
gd	ddgd	ddgdfgdd Zejddddgddggdd
gddgddgdfdddgdejgddgejdggddd
dgddddgdd	ddgdfgdd Zejddddgd
d	gdd	gdd	gdfdddejgd
d	dgdd	dgdd	dgdfgdd Zejdddddejgejddgddgd fddddejgejdddgddejgd fgd!d" Zejd#dejd$d%gddgd&d'fdejd$d%d(gddejgd&d'fgd)d* Zejd+d,d-d. Zejd/ed d0ed1ejjd2d3gd4d5gd6gd7d8 Zejddddgddggd	d
gddgddgdfdddgdejgddggd	dd
gdddgdddgdfgd9d: Zejjejd;ed<ed=fe d>e d?fe!d<e!d=fgejd@ddAdgfddAddBgfgdCdD Z"ejdEej#ddddejgdFdFdGdHgdIdJd dKd d giddejgdLdMej#ddddejgdFdFdGdHgdIdJd dKd giddNdMej#dddddOgdFdFdGdHgdIdJd dKd d giddPdMej#dddddOgdFdFdGdHgdIdJd dKd d giddQdMgdRdS Z$ejdTddgddggejdUdgddggejdVddgdWdX Z%dYdZ Z&d[d\ Z'ejd]e(d^ejd_dd`dadbdcdddedfdgdhdgdidjdkej#dlejj)edmdndodpdqdrgejdsddgdtdu Z*ejdsddgejd_e+dgdvdw Z,ejdxdydzd{gd|d} Z-d~d Z.ejdddgdd Z/dd Z0dd Z1dS )    N)pa_version_under7p0)na_value_for_dtype)get_groupby_method_argszdropna, tuples, outputsTABg      *@Q^@g     ^@      ?cdeF皙(@      m@g      (@c           	      C   s   dddddgd|dddgdddddgddddd	gg}t j|d
ddddgd}|jd
dg| d }t jj|tdd}| s|jddtj	gdd}t j||d}t
|| d S )Nr   r      r   r   r   {      r   abr
   r   r   columnsdropnaabnameslevelindexpd	DataFramegroupbysum
MultiIndexfrom_tupleslist
set_levelsnpnantmassert_frame_equal)	r   tuplesoutputsnulls_fixturedf_listdfgroupedmiexpected r4   L/tmp/pip-unpacked-wheel-vdrwu74i/pandas/tests/groupby/test_groupby_dropna.py:test_groupby_dropna_multi_index_dataframe_nan_in_one_group   s    r6   g*@g     @m@c           
   	   C   s   dddddgd|dddgdddddg|dddd	gd|ddd	gg}t j|d
ddddgd}|jd
dg| d }t jj|tdd}| s|ddtj	gddtj	gg}t j||d}	t
||	 d S )Nr   r   r   r   r   r   r   r   r   r   r   r
   r   r   r   r   r   r   r   r   )
r   r,   r-   r.   nulls_fixture2r/   r0   r1   r2   r3   r4   r4   r5   ;test_groupby_dropna_multi_index_dataframe_nan_in_two_groups8   s    r8   zdropna, idx, outputs)r   r
   r   c                 C   s~   ddddgd dddgddddgdddd	gg}t j|d
dddgd}|jd
| d }t j|t j|dd
dd}t|| d S )Nr   r   r   r   r   r   r   r   r   r   r   r
   r   r   r   objectdtypenamer   )r    r!   r"   r#   Indexr*   r+   )r   idxr-   r/   r0   r1   r3   r4   r4   r5   *test_groupby_dropna_normal_index_dataframed   s    



r?   zdropna, idx, expectedr   r      r   c                 C   s8   t jddddg|d}|jd| d }t|| d S )Nr      r@   r   r   )r   r   )r    Seriesr"   r#   r*   assert_series_equal)r   r>   r3   serresultr4   r4   r5    test_groupby_dropna_series_level   s    rF   zdropna, expectedg     @j@     u@	Max Speedr   r<         4@c                 C   sL   t jddddgddddgdd}|jd	d
d	tjg| d }t|| d S )Ng     `x@rG   g      >@rJ   ZFalconZParrotrH   rI   r   r   r   )r    rB   r"   r(   r)   Zmeanr*   rC   )r   r3   rD   rE   r4   r4   r5   test_groupby_dropna_series_by   s    

rK   r   )FTc                 C   sB   t dddd gdddd gd}|jd| d}|jj| ks>td S )Nr   r   rA   r@   r   r   r   r   )r    r!   r"   Zgrouperr   AssertionError)r   r0   gbr4   r4   r5   test_grouper_dropna_propagation   s     rO   r      Zabcd)r   rA   )Rr   numcolr   c                 C   s   dddd| rt jndgi}tjdddd gdddd gd|d}|jd| d	}|t}tj||d}t|| |dg t}tj||d}t|| |d t}tj	|d |dd
}t
|| d S )Nr   rA   r   r   r@   rL   r   r   r   rI   )r(   r)   r    r!   r"   	transformlenr*   r+   rB   rC   )r   r   Zexpected_datar0   rN   rE   r3   r4   r4   r5   +test_groupby_dataframe_slice_then_transform   s    
$
rV   c           	      C   s   dddddgdd dddgdddddgddddd	gg}t j|d
ddddgd}ttdd}|jd
dg| d|}t jj|tdd}| s|j	ddt
jgdd}t j||d}t|| d S )Nr   r   r   r   r   r   r   r   r   r   r   r
   r   r   r   minr	   r   r   r   r   r   )r    r!   r#   maxr"   aggr$   r%   r&   r'   r(   r)   r*   r+   )	r   r,   r-   r/   r0   Zagg_dictr1   r2   r3   r4   r4   r5   -test_groupby_dropna_multi_index_dataframe_agg   s    rZ   zdatetime1, datetime2z
2020-01-01z
2020-02-01z-2 daysz-1 dayszdropna, valuesr      c           
   	   C   s   t ddddddg||||||gd}| r6||g}n||tjg}|jd| d	d
ti}t jd
|it j|ddd}	t	||	 d S )Nr   rA   r@   rP      r[   )valuesdtr^   r   r]   r<   r   )
r    r!   r(   r)   r"   rY   r#   r=   r*   r+   )
r   r]   Z	datetime1Z	datetime2Zunique_nulls_fixtureZunique_nulls_fixture2r0   Zindexesr1   r3   r4   r4   r5   &test_groupby_dropna_datetime_like_data   s"    
r`   z#dropna, data, selected_data, levels
         )groupsr]   r]   r   Zdropna_false_has_nan)idZdropna_true_has_nanr
   Zdropna_false_no_nanZdropna_true_no_nanc           
      C   s   t |}|jd| d}|dd }tt|d |d }t jj|dd gd}| sf|rf|j|dd}t j||d}	t	
||	 d S )	Nrd   r   c                 S   s   t dtt| iS )Nr]   )r    r!   rangerU   )grpr4   r4   r5   <lambda>G      z@test_groupby_apply_with_dropna_for_multi_index.<locals>.<lambda>r]   r   r   r   )r    r!   r"   applytuplezipr$   r%   r'   r*   r+   )
r   dataZselected_datalevelsr0   rN   rE   Z	mi_tuplesr2   r3   r4   r4   r5   .test_groupby_apply_with_dropna_for_multi_index  s    &
ro   input_indexkeysseriesc                 C   s   t dtjgddgddgd}||}|r8|d }n | ddgkrX|dgkrX|dg }| d k	rj|| }|j|dd	}|r|d }| }t|| d S )
Nr   rA   r@   )r   r   r
   r
   r   r   Fr   )	r    r!   r(   r)   	set_indexr"   r#   r*   assert_equal)rp   rq   rr   objr3   rN   rE   r4   r4   r5   )test_groupby_dropna_with_multiindex_inputT  s$    



rv   c               	   C   s   dt jddt jgdddddgd} t| }|jd	d
d}|j}t j}dt jddg|ddt jdg|dt jt jddg|di}t|	 |	 D ]\}}t
|| qt t| d stt| dd ddgkstd S )NZg1Zg2r   r   rA   r@   rP   )groupr   rw   Fr   r;   )r(   r)   r    r!   r"   indicesZintparrayrl   r]   r*   Zassert_numpy_array_equalisnanr&   rq   rM   )rm   r0   r1   rE   r;   r3   Zresult_valuesZexpected_valuesr4   r4   r5   test_groupby_nan_includedr  s     "
   r|   c                  C   sV   t jtjddggdddgd} | ddg} | jddgdd }| }t|| d S )	Nr   r   r   r   r
   r   Fr   )	r    r!   r(   r)   rs   r"   firstr*   r+   )r0   rE   r3   r4   r4   r5   &test_groupby_drop_nan_with_multi_index  s
    r~   sequence_indexQ   r;   ZUInt8ZInt8ZUInt16ZInt16ZUInt32ZInt32ZUInt64ZInt64ZFloat32ZFloat64categorystringstring[pyarrow]zpyarrow is not installedreason)Zmarksdatetime64[ns]	period[d]zSparse[float]test_seriesc                    s  d  fddtdD }|dkr4ddtjdn&|d	krLd
dtjdnddtjdttjfdd|D |dddddgd}|jddd|d}|r|d }|	 }i }t
|D ]\}	}
||
d|	 ||
< q|dkrtjfdd|D |d jjdd}n^t|trL|drLtjtjfdd|D |ddd}ntjfdd|D |dd}tj| |dd d}|s| }|s| }|d k	r|dr|d ||d< t|| d S )N c                    s(   g | ] }d ddd d|  d  qS )xyz)r   r   rA   r@   r4   ).0k)r   r4   r5   
<listcomp>  s     z(test_no_sort_keep_na.<locals>.<listcomp>rP   )r   r   r   r   )r   r   r   )r   r   z
2016-01-01z
2017-01-01r   rA   c                    s   g | ]} | qS r4   r4   r   labeluniquesr4   r5   r     s     rx   r   r@   )keyr   r   F)r   sortas_indexr   r   c                    s   g | ]} | qS r4   r4   r   r   r   r4   r5   r     s     r_   ZSparsec                    s   g | ]} | qS r4   r4   r   r   r4   r5   r     s     c                    s   g | ]} | qS r4   r4   r   r   r4   r5   r     s     r:   )r   r<   r;   )joinrf   r    ZNAr(   r)   r!   rB   r"   r#   	enumerategetZCategoricalIndexcat
categories
isinstancestr
startswithr=   rz   r]   Zto_framereset_indexastyper*   rt   )r   r;   r   r   sequencer0   rN   rE   Zsummedr>   r   r   r3   r4   )r   r   r5   test_no_sort_keep_na  sP    #


 r   c                 C   s   t dddgi}t j||g|d}|r0|d n|}|j|d| d}| }	t t|jg}
t jddgi|
d}|rt	|	|d  nt
|	| d S )	Nr   r   rA   rx   F)r   r   r@   r   )r    r!   rB   r"   r#   r=   r   r;   r*   rC   r+   )r   r;   r.   r7   r   r0   rd   ru   rN   rE   r   r3   r4   r4   r5   test_null_is_null_for_dtype  s    r   
index_kindrf   singlemultic                 C   s  |dkr.|s.|dkr.d}| j tjj|d ttjjddd gddd }t	
t	j|ddd	gd
tdd}| }	t	j|ddd	dgd
d|	d< |dkrdg}
n`|dkrdg}
|d}|	d}	n<ddg}
|d |d< |ddg}|	d |	d< |	ddg}	t||}t||	}|dkrT|dkrT|d j|
df}|d j|
df}|	j|
||dd}t|||  }|d dd |d< |dkr|d dd |d< |r|dkr|ddg}n
|d}n6|dkr|dkr|jdd}|dkr|jdd}|dkr^|dkr^|d j }|dkrHdd |D }ndd |D }||d< |dkr|jddid}|r|d d }|j|
d|||d}t||| }t|| d S )N)idxminidxmaxr   z@GH#10694 - idxmin/max broken for categorical with observed=Falser   r   rA      sizer@   r   rb   r   r   rP   r   rf   r   Zx2Zcorrwithr   r   T)observedr   r   r   )r   r   r   c                 S   s   g | ]}|d krt jn|qS )rP   r(   r)   r   r4   r4   r5   r   5  s     z-test_categorical_reducers.<locals>.<listcomp>c                 S   s$   g | ]}|d krt jt jfn|qS ))rP   rP   r   r   r4   r4   r5   r   7  s     Fr   r   r   r   )node
add_markerpytestmarkxfailr(   appendrandomchoicer    r!   Categoricalrf   copyfillnars   r   Zdropr"   getattrr   replacer]   tolistrenamer*   rt   )requestZreduction_funcr   r   r   r   msgr]   r0   Z	df_filledrq   argsZargs_filledZ	gb_filledr3   	gb_keepnarE   r4   r4   r5   test_categorical_reducers  s|     







    r   c                 C   s
  |dkr$d}| j tjj|dd ttjjddd gddd }t	
t	j|ddd	gd
tdd}t||}||d   d }	|dkrttt|	}
nx|dkr|r|r|d jddd }q|d jddd }n|jd |	jd  d  }t|	|g }
nt|	|| }
t	
d|
i}|jdd|||d}|jdd||d}t||| }t||| }t||d   j |j D ]0\}}|jdkr||j|< n||j|df< q|dkr||d  ||@   d7  < |dkr|d}t|| d S )Nr   zBGH#49651 fillna may incorrectly reorders results when dropna=FalseF)r   strictr   rA   r   r   r@   r   rb   r   r   r   ZcumcountZngroupr   r   r   T)r   r   r   )ZrankZdiffZ
pct_changeshiftZint64) r   r   r   r   r   r(   r   r   r   r    r!   r   rf   r   Zisnullr&   rU   Znuniqueilocr   r   r"   rl   r   r]   ZravelndimZnotnullger   r*   rt   )r   Ztransformation_funcr   r   r   r   r]   r0   r   Znull_group_valuesZnull_group_dataZna_groupZnull_group_resultr   Z	gb_dropnarE   r3   r   valuer4   r4   r5   test_categorical_transformersH  sT    

     
"

r   methodheadtailc           
      C   s   t jddd gd}ttj|dddgdtt|d}|jdd|||d	}t	||  }| d
krr|d d d }|dk|dk
 dk@ |dk|dk
 dk@ B |d k|d k
 dk@ B }| d
kr|d d d }|| }	t||	 d S )Nr   rA   rc   r@   r   r   r   Fr   r   r\   )r(   r   r   r    r!   r   rf   rU   r"   r   Zcumsumr*   r+   )
r   r   r   r   r]   r0   rN   rE   maskr3   r4   r4   r5   test_categorical_head_tail}  s$     r   c                  C   sp   t jddd gd} ttj| dddgdtt| d}|jddd	}|	d
d }|
 }t|| d S )Nr   rA   rc   r@   r   r   r   Fr   c                 S   s   |   S Nr#   r   r4   r4   r5   rh     ri   z&test_categorical_agg.<locals>.<lambda>)r(   r   r   r    r!   r   rf   rU   r"   rY   r#   r*   r+   r]   r0   rN   rE   r3   r4   r4   r5   test_categorical_agg  s     r   c                  C   sr   t jddd gd} ttj| dddgdtt| d}|jddd	}|	d
d }|	d}t
|| d S )Nr   rA   rc   r@   r   r   r   Fr   c                 S   s   |   S r   r   r   r4   r4   r5   rh     ri   z,test_categorical_transform.<locals>.<lambda>r#   )r(   r   r   r    r!   r   rf   rU   r"   rT   r*   r+   r   r4   r4   r5   test_categorical_transform  s     
r   )2Znumpyr(   r   Zpandas.compat.pyarrowr   Zpandas.core.dtypes.missingr   Zpandasr    Zpandas._testingZ_testingr*   Zpandas.tests.groupbyr   r   Zparametrizer)   r6   r8   r?   rB   rF   rK   rO   Z
RangeIndexr&   r$   Zfrom_productrV   rZ   Zarm_slowZ	TimestampZ	TimedeltaZPeriodr`   paramro   rv   r|   r~   rf   Zskipifr   r9   r   r   r   r   r   r   r4   r4   r4   r5   <module>   st  




 

$






"
#

 8
P5
