U
    >vf!                    @   s`  d dl m Z  d dlZd dlZd dlZd dlmZmZmZm	Z	m
Z
mZmZ d dlmZ d dlmZ d dlmZ ejfddZejejd ejejejejejejejejejejd ejejejd ejejd ejdZd	d
 Zdd Zdd Zdd Zejdddgdd Zdd Zdd Zdd Z dd Z!ejddd d!d ged"d#d$gdd%fdd&gd'd(d d d d!d d d g	ed"d#d$gdd%e	d)d*d+ggfdd,gd d d d d!d d d d g	ed"d#d$gdd%ed"d#d$gdd%gfgejd-ddgd.d/ Z"d0d1 Z#d2d3 Z$d4d5 Z%ejdddgejd6ddgejd7ddgd8d9 Z&d:d; Z'd<d= Z(d>d? Z)d@dA Z*dBdC Z+ejdDdEgee,d)d d$gd"d#gdFfee,d)-dGdH d#gd d"gdFfee,d'd d$gd"d#gdFfgdIdJ Z.dKdL Z/dMdN Z0dOdP Z1ejdQdRdSdRgfdTdUdVgfdWdUdRgfdXdSdVgfgdYdZ Z2d[d\ Z3d]d^ Z4d_d` Z5ejdddgdadb Z6ejdddgdcdd Z7dedf Z8dgdh Z9didj Z:ejdkdee
j;ed"d"d#gdldmd"d#d#ggdd&gdnd"d#d$gdodpfdeed"d"d#gdldmd"d#d#gd"d#d$gdqfgdrds Z<ejdtdej=ej>gdudv Z?ej@dwdx ZAejdydzd{gd|d} ZBejdydzd{gejd6ddgd~d ZCejdde
;e	dgd) dgd)  dldde	ddddddddgdldde	dWdXgd) gd#d#d)d)d"d"d$d$gfde
jDeddgddedddgdde	dWdXggdddgdnd#d#d)d)ej=ej=d"d"ej=ej=d$d$gfde
jDeddgddedddgdde	dWdXggdddgdnd#d#d)d)ej=ej=d"d"ej=ej=d$d$gfgdd ZEdd ZFejdd"d d gd d d ggdd ZGdd ZHdd ZIdd ZJdd ZKdd ZLejd6ddgdd ZMdd ZNejdejOd"d"d#gfejPd"d#d#gfgdd ZQdd ZRejddRdTgdd ZSdd ZTdd ZUdd ZVdd ZWdd ZXejddRdTgeYeZdddZ[ejddRdTgeYeZdddZ\dd Z]ejddRdTgdd Z^dd Z_dd Z`dd Zaejddddgejdddgddń ZbejdddgejdddgddǄ Zcejddddgejdddgejdddgdd̄ Zdejddddgejdd{dzdgejdddgddτ Zeejddddgddф Zfejd-ddgejddgdd,ggddՄ ZgdS )    )datetimeN)CategoricalCategoricalIndex	DataFrameIndex
MultiIndexSeriesqcut)SeriesGroupBy)get_groupby_method_argsc                 C   s.   dd }t jt|||d}| j||d S )zpReindex to a cartesian production for the groupers,
    preserving the nature (Categorical) of each grouper
    c                 S   s4   t | ttfr0| j}tjtt||| jd} | S )N
categoriesordered)	
isinstancer   r   r   
from_codesnparangelenr   )ar    r   I/tmp/pip-unpacked-wheel-vdrwu74i/pandas/tests/groupby/test_categorical.pyf   s      z)cartesian_product_for_groupers.<locals>.fnames
fill_value)r   from_productmapreindex
sort_index)resultargsr   r   r   indexr   r   r   cartesian_product_for_groupers   s    r#   )allanycountcorrwithfirstidxmaxidxminlastmaxmeanZmedianminnthnuniqueprodZquantileZsemsizeZskewstdsumvarc                 C   sB   t | jd}dd }| j|ddj|}|jjd dks>td S )N   c                 S   s   |   |  |  |  dS )Nr.   r,   r&   r-   r7   )groupr   r   r   	get_statsI   s
    z2test_apply_use_categorical_name.<locals>.get_statsFobservedr   C)r	   r<   groupbyDapplyr"   r   AssertionError)dfcatsr9   r    r   r   r   test_apply_use_categorical_nameF   s    rC   c               
   C   sr  t dddddddddg	ddddgdd} tddddddd	d
dg	| d}ttdddd}tdddd
tjgi|d}|jddd }t	|| t ddddgdddgdd}t ddddgdddgdd}t||ddd	d
gd}|jddd}tdddgddd}	tdt
d	ddg|	di}|jdd}t	|| tddgddgddggddgd}
t |
j|
d< |
jdgdd}|dd  }t	||
dg  |
d}|
jddg }t	|| d!d" }||}|
jddg  }tddgdd#|_|d d$|d< t	|| tddd%d&gi}tj|jdd'd(d)d*gd+}|jj|ddt}t||d  t|jj|ddd,d  |d  t	|j|ddt|dg  |j|dd}|d-d  }t	||dg  td : |d.d  }|t}|tjj}|d/d  }W 5 Q R X tj	||dg dd0 tj	||dg dd0 t	||dg  t	||dg  t|jj|ddtj|d  t	|j|ddtj| tddd%d&d1gi}tj|jd2dd'd(d)d*gd+}|jj|ddt}t||d  t|jj|ddd3d  |d  t	|j|ddt|dg  t	|j|ddd4d  |dg  tdddddgi}tj|jdddd	d
gt tdd5}|j|ddt}t|j j!|j j"d6}t
ddddg|d}d|j_#t|| d7d8d9d:g}tj$j%dd
d;d<}t j&||dd6} ttj$'d;d
}|j| dd }|jt(| dd }t|| j!dd}	|)|	}t	|| |j| dd}|* }| j+, }t(| -|}|-|}t |dd7d8d9d:gd=}|j|ddd>* }t	|| t j&t.d
/d?|dd6}t|}t0|1 j2d| td@dAdBdCdDdEdFdGgd
 }t0|1 j2d| d S )HNr   bcdTr            r6      r   rD   abcdnamer   r"   Fr:   zyABvaluesrS   rU      r   Znumeric_onlyzJohn P. Doez	Jane DoveZ	person_idperson_namecolumnsc                 S   s   | S Nr   xr   r   r   <lambda>u       ztest_basic.<locals>.<lambda>c                 S   s   |  djd S )NrX   r   )drop_duplicatesilocr\   r   r   r   r   |   s    ztest_basic.<locals>.frN   object      
         (   )binsc                 S   s
   t | S r[   r   r4   xsr   r   r   r^      r_   c                 S   s   t j| ddS Nr   Zaxisr   r,   rl   r   r   r   r^      r_   c                 S   s   t j| ddS rn   rp   rl   r   r   r   r^      r_   c                 S   s   t j| S r[   )r   maximumreducerl   r   r   r   r^      r_   )Zcheck_dtypeic                 S   s
   t | S r[   rk   rl   r   r   r   r^      r_   c                 S   s
   t | S r[   rk   rl   r   r   r   r^      r_   )labelsr   foobarbazquxd   r2   )r   r   sortr;      r&   r-   r3   r.   25%50%75%r,   )3r   r   r   listr   nanr=   r-   tmassert_frame_equalr   r4   rX   	transformr`   ra   r?   copyr   r"   astypepdcutr   assert_series_equalZassert_produces_warningr,   rq   rr   filterr$   r   rU   r   r   rN   randomrandintr   randnasarrayr   describecodesargsorttaker   repeatassert_index_equalstackget_level_values)rB   data	exp_indexexpectedr    cat1cat2rA   gbZexp_idxr]   gr   rE   Zgbcresult2Zresult3Zresult4Zresult5levelsr   groupeddesc_resultidx
ord_labelsord_dataZexp_catsexpcexpr   r   r   
test_basicU   s    
 

 "
$ " $


  
r   c                 C   s   t tdddttddgtdgdgd dgd  tdgd	d
gdd}|jd	g| d}t tdddttddgtdgdgd tdgd	d
gdd}|d}t	|| d S )NrH      r   rD   rf   r   rJ   rG   ZIndex1ZIndex2)r   r   r   r   r"   levelr;      )
r   r   r   r   r   ranger=   	get_groupr   r   )r;   rA   r   r   r    r   r   r   test_level_get_group   s&    
r   c                  C   s   t dgd dgd  dddgd tdd	} t| jdddgd
d| _| dd  }|jdd
d}ddddddg}t|dddgd
d}ddddddgt|g}t	j
|ddgd}tdgd |dd}t|| d S )NrS      rT   highZmedlowr6   g      (@)r8   doseZoutcomesTr   r8   r   r   )r   Zsort_remainingr   rH   r&   r"   rN   )r   r   r   r   r   r=   Zvalue_countsr   r   r   from_arraysr   r   r   rA   r    r"   r   r   r   r   (test_sorting_with_different_categoricals   s    r   r   TFc           	      C   s  t td| d}t tdddg| d}tt|}t|||d}|jdd	gd
d}tj||gdd	gd}tdddg|dgd}|	dd }t
|| | }t
|| |tj}t
|| tj||gdd	gd}td|d}|	dd }t
|| d S )Nabcru   Zaaar   rD   r   )missingdenserU   r   r   Tr:   r   r   rG          @rU   )r"   rZ   c                 S   s   t j| ddS rn   )r   r-   r\   r   r   r   r^   #  r_   ztest_apply.<locals>.<lambda>rO   c                 S   s   dS NrG   r   r\   r   r   r   r^   /  r_   )r   r   r   r   r   r   r=   r   r   r?   r   r   r-   aggr   r   )	r   r   r   rU   rA   r   r   r   r    r   r   r   
test_apply  s"    r   c              	   C   s  t ddddgdddgdd}t ddddgdddgdd}t||d	d
ddgd}ddgd
 |d< |jdddg| d}tj||ddgd
 gdddgd}tdtd	d
ddg|di }| }| st|||ddggt	ddd}t
|| |jddg| d}tj||gddgd}td	d
ddgddddgd|d}| }| s\t|||gt	ddd}t
|| t ddddgdddgddd	d	d
d
gddddgd }t|}|jd!| d}	|	 }tt	d"d!t	d#dd$}td%d%gd&dgd'|d}| stt	d#d!t	d#dd$}
||
}t
|| |jd!d(g| d}|d)}td*d+d&d,gt ddddgdddgddd	d
d	d
gd-d!d(g}| st||jjd	d
ggd!d(g}t
|| d.D ]<}|\}}||}||j|k|j|k@  }t
|| qdd/dd/dd	d	gddddd0d1d2gddd3ddddgd4}t|}t|d tddd}||d5< |jd5d6gd7| d8}|d)}|jd5d6gd| d8}|d) }t
|| d S )9Nr   rD   rP   Tr   rE   rF   rQ   rG   rH   rI   r6   rR   rv   rw   r<   rS   rT   r:   r   rU   rO   ABCr   r   )rU   r<   ABrf   rg   rh   ri   )catintsvalr   abr   )rN   r   r   g      ?      4@)r   r   r   r-         $@g      >@g      D@)r   r   r   ))r   rG   )rD   rH   )rD   rG   )r   rH   r~   2   <   F   erv   rw   rx   r   rx   Fas_indexr;   )r   r   r=   r   r   r   r   r4   r#   r   r   r   r-   r   r   r   	set_indexr   rU   r   r   r   r   r   linspacereset_index)r;   r   r   rA   r   r   r   r    rF   Zgroups_single_keyr"   groups_double_keykeyrE   ir   groupsZgroups2r   r   r   test_observed3  s             
  

      



  
	  

r   c                 C   s   ddddgddddgdddd	gd
}t |}t|d ddddg}d|_|j|dg| d}tj|ddddggddgd}t ddddgddddgd|d}| st||jddddggddg}|	d}t
|| d S )NrI   r6   rJ   rG   rH   rf   rz      "   )C1C2C3r   r   r   r   r:   r   g      @g      @g      @r   g      Y@g      i@g      A@)r   r   rO   r-   )r   r   r   rN   r=   r   r   r#   rU   r   r   r   )r;   rF   rA   rU   r   r   r   r    r   r   r   test_observed_codes_remap  s$    $   
r   c                  C   s   t tjjddddtjjddddtjjdddddd} | jtd| d< | jdd	d
gdd}| }|j	j
d  | j kst|j	j
d  | j kst|j	j
d  | j kstd S )Nr      i0u  r{   '  )r   int_idother_idrv   categoryr   r   r   Tr:   rG   rH   )r   r   r   r   r   r   strr=   r&   r"   r   r0   r@   r   r   )rA   r   r    r   r   r   test_observed_perf  s    r   c                 C   s   t dddgdddgd}t|dddgd}|jd	| d
}|j}| rftddgddtdgddd}n*tddgddtg ddtdgddd}t|| d S )Nr   rE   rD   r   rG   rH   rI   r   valsr   r:   r   int64dtype)r   rE   r   rD   rE   )r   r   r=   r   r   r   assert_dict_equal)r;   r   rA   r   r    r   r   r   r   test_observed_groups  s    "
r   z,keys, expected_values, expected_index_levelsr   rd   	   rG   rH   rI   rb   rD   rV   r~   r6   rJ   r   a2test_seriesc           
   	   C   s   t tdddgdddgdtdddgdddgddddgdd	d
gdddg}d| krd|jdd}|j| dd}|r~|d }| }t| dkr|}n2dddddddddg	ddddg g}t||| d}t d|i|d}	|r|	d }	t	||	 d S )NrG   rH   rI   r   r6   rJ   r   rV   r~   r   )r   r   rD   rE   r   r   rD   rY   Fr:   rE   r   )r   r   rO   )
r   r   r   dropr=   r4   r   r   r   assert_equal)
keysexpected_valuesZexpected_index_levelsr   rA   r   r    r"   r   r   r   r   r   test_unobserved_in_index  s6    $r   c                 C   s   t tdtjdgdddgddddgd}|jd	| d
}|j}| rXdtddgddi}n(tddgddtg ddtg ddd}t|| d S )Nr   rD   rF   r   rG   rH   rI   r   r   r:   r   r   r   )r   rD   rF   )	r   r   r   r   r=   r   r   r   r   )r;   rA   r   r    r   r   r   r   test_observed_groups_with_nan  s    

r   c                  C   sp   t dtjtjgdddgd} tdddg}t| |d}|jd	d
dd d}|d jdg }t	|| d S )Nr   rD   rE   r   rG   rH   rI   )r   serr   Fr:   r   r   )
r   r   r   r   r   r=   r/   ra   r   r   )r   r   rA   r    r   r   r   r   test_observed_nth*  s    r   c                 C   s   t tjdtjdgdddgd}tddddg}t||d	}|jd
| d  }| rxtt dgdddgddgd	}n,tt dddgdddgddtjtjgd	}t	|| d S )Nr   rD   rE   r   rG   rH   rI   r6   )s1s2r   r:   )
r   r   r   r   r   r=   r(   r   r   r   )r;   r   r   rA   r    r   r   r   r   #test_dataframe_categorical_with_nan5  s    r   r;   r}   c           	      C   s   t ddddddgddddg| d}tddddddg}t||d}|jd||dd	 d
}t|jjdd}t|j}|sd|| < t||ksd|  d| d| d| }dst	|d S )NrF   r   rD   r   r   )labelr   r   )r;   r}   r   r(   rc   r   zDLabels and aggregation results not consistently sorted
for (ordered=z, observed=z, sort=z
)
Result:
F)
r   r   r   r=   	aggregater"   arrayisnar$   r@   )	r   r;   r}   r   r   rA   r    Zaggrmsgr   r   r   0test_dataframe_categorical_ordered_observed_sortI  s     	

r  c               	   C   s  t jddd} tjjdddd}tj|| dd}ttjdd}|j	|d	d

 }|j	t|d	d

 }|| }t|j|jdd|_t|| |j	|d	d
}| }|j }||}	||}
|
j	|	d	d
 }t|| t|j|j t|jd|jd tjtdd| dd}t|}t| jd| tddddddddgd }t| jd| d S )Nz
2014-01-01r6   )periodsr   rz   r{   Tru   Fr:   r   r~   r&   r-   r3   r.   r   r   r   r,   rG   )r   
date_ranger   r   r   r   r   r   r   r=   r-   r   r   r   r"   r   r   r   r   r   r   r   r   r   r   r   r   )r   r   rB   r   r    r   r   r   r   r   r   r   r   r   r   r   test_datetimel  s<    
  



 
r  c                  C   s(  t jd} ddddg}| jdddd	}tj||d
d}tt t dd	ddt
dd}||d< |djddd }|t
d j|jdd }ttjddddg|d
ddd|_t|| |jddd }|t
d j|jdd }ttjddddg|d
ddd|_t|| d S )Ni90  rv   rw   rx   ry   r   r6   rg   r{   Tru   rL   rY   rB   Fr   r:   rG   rH   rI   rb   )r   r   ZRandomStater   r   r   r   r   r   Zreshaper   r   r=   r4   r   r   r"   r   r   )sr   r   rB   rA   r    r   r   r   r   test_categorical_index  s(    &  r  c                  C   sz   t ddddgddddgdd} ttjdd| d	}|d
dddgd  }t|	 j
|  t|	 j
j| j d S )Nry   rv   rx   rw   Tr   rg   r6   rY   rG   rH   rI   rJ   )r   r   r   r   r   r=   r   r   r   r   rZ   assert_categorical_equalrU   )rB   rA   r    r   r   r   !test_describe_categorical_columns  s    

r	  c                  C   s   t tdddgd tdd d} | d d	| d< | jd
dgddd   }| }tddgddd}t	
|j| t	|jj|j |d |d  }tddgtddgd
dd}t	|| d S )Nrf   rS   rT   rJ   ZXYXXYrH   )r   mediumartistr
  r   r  Fr:   r   r   rN   r   r6   XYrb   rO   )r   r   r   r   r=   r&   unstackr   r   r   r   rZ   r  rU   r   r   r   )rA   Zgcatr    Zexp_columnsr   r   r   r   test_unstack_categorical  s    r  c                  C   s^   t tjtjddddddddg
} t|  jd}tjt	dd | 
|  W 5 Q R X d S )NrG   rH   rI   r6   z$Grouper and axis must be same lengthmatch)r   r   r   r   r   dropnarU   pytestraises
ValueErrorr=   r-   )seriesrj   r   r   r   test_bins_unequal_len  s     r  r  r   rS   rT   c                 C   s   | d S r   r   )r   r   r   r   r^     r_   r^   c                 C   sD   |  ttddd}|t}t|t| d}t|| d S )NZABBAr   r   rO   )r=   r   r   r   r   r   r   r   )r  r   r=   r    r   r   r   r   test_categorical_series  s    
r  c                     s  t tdddgdddgdddgdddgd	  jd
dgddd } t tddg jjjdddgddgd	d
ddgd}t| |  fdd} jd
|gddd } t tddg jjjdddgddgd	d
ddgd}t| | tdddgd
d} jd
|gddd } t| | d
dg}t tddg jjjdddgddgd	d
ddgd}dD ]8}t	t
d|d _ j|ddd } t| | q`d S )NrG   rH   rI   rf      e   f   g   )r   rS   rT   r   rS   FTr   r      rT   rY   c                    s    j | df S )NrS   )loc)rrA   r   r   r^     r_   ztest_as_index.<locals>.<lambda>r   r   rD   rb   )Nr  rT   r   )r   r   r=   r4   r   r   r   r   r   r   r   r"   )r    r   r   r  Zgroup_columnsrN   r   r"  r   test_as_index  sP    	r#  c                  C   s  t d} tdtt d| ddi}t| | ddd}tt d| ddd}t|jdddd	 j| t|jdddd	 j| tdtt d| ddi}t| | ddd}tt dt dddd}t|jdddd	 j| t|jdddd	 j| d S )
Nr   rS   baTr   r  bacFr|   )	r   r   r   r   r   r   r=   r(   r"   )r   rA   r   Znosort_indexr   r   r   test_preserve_categories%  s.        r&  c               	   C   s   t dddddgdddddgttdtd	d
dttdtd	ddd} t ddtjgddtjgttd	td	d
dttd	td	ddd}dD ]^}| j|d
d
djdd}| j|dd
djdd }|j|j	d}t
|| t
|| qd S )NrG   rH   rf      r      r   Zabaabr%  Fr   T)rS   rT   r   r   r   g      ?g      9@r   )r   r   byr   r;   rW   rY   )r   r   r   r   r   r=   r-   r   r   rZ   r   r   )rA   Zexp_fullcolZresult1r   r   r   r   r   test_preserve_categorical_dtypeC  s2    	

r,  zfunc, valuesr(   secondr+   fourththirdr.   r,   c                 C   s   t ddddgdd}tddddg|d	}|d
}t||  }tddgt||jdd	d
}t|| |d
d }t||  }|d }t	|| d S )Nr(   r-  r/  r.  Tru   r  )payloadr+  r1  r   r+  )
r   r   r=   getattrr   r   r   r   r   r   )funcrU   rE   rA   r   r    r   Zsgbr   r   r   test_preserve_on_ordered_opsd  s    
r4  c                  C   s  t tjd} tdddddddddg	}tj|dddgdd}| j|dd }| j|dd }t	|j
|j|jd	|_
t|| tddddddd
d
d
g	}tj|dddd
gdd}| j|dd }| j|dd |j}t	|j
|j|jd	|_
t|| tdddddddddg	ddddgdd	}tddddddd
ddg	|d} | jddd }|d j}tdddtjg}t|| d S )Nr   r   rG   rH   Tru   Fr:   r   rI   r   rD   rE   rF   r6   rJ   rK   )r   r   r   r   r   r   r   r=   r-   r   r"   r   r   r   r   r   r   rU   r   assert_numpy_array_equal)r   r   rB   r    r   r   r   r   test_categorical_no_compress  s>        
 
r6  c                  C   sd   t d gd tdddgd} | d d }ttg ddgdtg ddd	dd
}t|| d S )NrI   Ztraintestr  rS   rT   r   rc   r   rN   r   )r   r   r=   r(   r   r   r   rA   r    r   r   r   r    test_groupby_empty_with_category  s    r:  c                  C   s   t dtjdddi} dd tdddD }t||}| jdgdd	} tj| j	tdd
dd|d| d< | j
dgddd  }|t|jdd d }t|j|jjd|_t|| d S )Nvaluer   r   rz   c                 S   s   g | ]}| d |d  qS )z - i  r   ).0r   r   r   r   
<listcomp>  s     ztest_sort.<locals>.<listcomp>i  T)r*  Z	ascendingi)  F)rightrt   Zvalue_groupr:   c                 S   s   t |  d S )Nr   )floatsplitr\   r   r   r   r^     r_   ztest_sort.<locals>.<lambda>)r   rb   )r   r   r   r   r   r   Zsort_valuesr   r   r;  r=   r&   sortedr"   r   rN   r   r   )rA   rt   Z
cat_labelsresr   r   r   r   	test_sort  s    
 
  
rC  c              
   C   s   t dddgdddgdddgdd	d
gdddgdddgdddggdddgd}t|d |d|d< |jd| dd }| rddgddgd	d
gddgg}ddddg}n(ddgddgd	d
gddgg}ddddg}t |ddgt|d|dd}t|| d S )Nz	(7.5, 10]rf   r~   rg   z(2.5, 5]rJ   rh   z(5, 7.5]r   ri   r6   r   z(0, 2.5]rG   r   rV   r   r   rv   rw   rY   ru   Fr|   rM   rZ   r"   )r   r   r=   r(   r   r   r   )r}   r   rA   r    data_valuesindex_valuesr   r   r   r   
test_sort2  s0    	rG  c                 C   sf  t tdddtdddtdddtdddtdddtdddtdddgddddd	ddgdd
dddddgddddgd}t|d |d|d< | rddgddgddgddgg}tdddtdddtdddtdddg}nHddgddgddgddgg}tdddtdddtdddtdddg}t |ddgt|d|dd}|jd| dd }t|| d S )Ni  rV   rG   rH   rJ   rf   r~   r   r6   rg   rh   ri   r   r   r   )dtrv   rw   rH  rv   rw   rY   ru   rM   rD  Fr|   )r   r   r   r   r=   r(   r   r   )r}   r   rA   rE  rF  r   r    r   r   r   test_sort_datetimelike  sF    






	







rI  c                  C   s   t tdddgdddgddddgd} tdddgdd	}| jdd
dj }tdddg|dd	}t|| | jdd
djjdd}tdddg|dd	}t|| | jdd
djjdd}tddt	j
g|dd	}t|| | jdd
djjdd}tdt	j
t	j
g|dd	}t|| d S )Nr   rD   rE   r   rG   rH   r  rS   rb   Fr:   rI   r   rT   Z	min_count)r   r   r   r=   rT   r4   r   r   r   r   r   rA   Zexpected_idxr    r   r   r   r   test_empty_sum  s     "rL  c                  C   s   t tdddgdddgddddgd} tdddgdd	}| jdd
dj }tdddg|dd	}t|| | jdd
djjdd}tdddg|dd	}t|| | jdd
djjdd}tddt	j
g|dd	}t|| d S )Nr   rD   rE   r   rG   rH   r  rS   rb   Fr:   rT   r   rJ  )r   r   r   r=   rT   r1   r   r   r   r   r   rK  r   r   r   test_empty_prod;  s    "rM  c                  C   s   t ttdtttjddddd tdd} | dd	g }t	j
td
ddgttjddddgdd	gd}t ddddddddtjdg	i|d}t|| d S )NZ	abcbabcbaz2018-06-01 00Z1TrI   )freqr  r   )key1key2rU   rO  rP  r   rD   rE   r   rU   r   r6   r~   rJ   r   rH   rO   )r   r   r   r   r  r   r   r=   r-   r   r   r   r   r   )rA   r    r   r   r   r   r   ,test_groupby_multiindex_categorical_datetimeS  s"    
	$rQ  zas_index, expectedr   r   r   r]   )r"   r   rN   r   rD   r]   c                 C   sV   t tdddgdddddgdddgd}|jddg| d	d
d  }t|| d S )NrG   rH   r   r   rI   rR  r   rD   Tr   r]   )r   r   r=   r4   r   r   )r   r   rA   r    r   r   r   ,test_groupby_agg_observed_true_single_columnl  s
    $rS  r   c                 C   sZ   t ddddgddddgdd}t d dddgddddgdd}|jd| d}t|| d S )	Nr   rD   rE   rF   Fr   rG   r   )r   shiftr   r   )r   ctr   rB  r   r   r   
test_shift  s    
 
 
 
 rV  c                 C   s\   |   dd }|d d|d< |d d|d< tddddg|d	< |jd
gdd}|S )a  
    DataFrame with multiple categorical columns and a column of integers.
    Shortened so as not to contain all possible combinations of categories.
    Useful for testing `observed` kwarg functionality on GroupBy objects.

    Parameters
    ----------
    df: DataFrame
        Non-categorical, longer DataFrame from another fixture, used to derive
        this one

    Returns
    -------
    df_cat: DataFrame
    Nr6   rS   r   rT   rG   rH   rI   r<   r>   ro   )r   r   r   r   )rA   df_catr   r   r   rW    s    rW  	operationr   r?   c                 C   s   t ddddg| d jdd}t ddddg| d jdd}t||g}td	d
ddg|dd }| jddgddd }t||t}t	
|| d S )Nrw   rv   rS   r8  onethreetworT   rH   r6   rG   rI   r<   r   r"   rN   Tr:   )r   r   r   r   r   r   r=   r2  r4   r   r   )rW  rX  Zlev_aZlev_br"   r   r   r    r   r   r    test_seriesgroupby_observed_true  s    r]  c                 C   s   t jtddgddtdddgddgdd	gd
 \}}tddtjdtjdg|dd}|dkrl|jddd}| jdd	g|dd }t	||t
}t|| d S )Nrw   rv   Fru   rY  rZ  r[  rS   rT   r   rH   r6   rG   rI   r<   r\  r   r   Zinfer)Zdowncastr:   )r   r   r   Z	sortlevelr   r   r   Zfillnar=   r2  r4   r   r   )rW  r;   rX  r"   _r   r   r    r   r   r   )test_seriesgroupby_observed_false_or_none  s    r_  zobserved, index, datarw   rv   rS   r8  rY  rZ  r[  rT   ru   c                 C   s>   t ||dd}| jddg|dd dd }t|| d S )Nr<   r\  rS   rT   r:   c                 S   s   |   |  dS )Nr.   r,   r`  r\   r   r   r   r^     r_   z8test_seriesgroupby_observed_apply_dict.<locals>.<lambda>)r   r=   r?   r   r   )rW  r;   r"   r   r   r    r   r   r   &test_seriesgroupby_observed_apply_dict  s
    .ra  c                 C   s<   |  ddgd  }|  ddg d }t|| d S )NrS   rT   r<   )r=   r-   r   r   )rW  r   r    r   r   r   4test_groupby_categorical_series_dataframe_consistent  s    rb  codec                 C   sr   t ddddgddddgd	d
ddgd}tj| tdd}|j|dd }|jj|dd j}t|| d S )NrG   rH   rI   r6   r  r0  rJ   r   rV   r~   r   r   r   ro   r   )	r   r   r   r   r=   r-   Tr   r   )rc  rA   r   r    r   r   r   r   test_groupby_categorical_axis_1  s
    (rg  c                 C   s\   t tddg|dddgdddgd	}| }|jd| d
jt jdd }t|| d S )NZBobZGregru   rG   rH   )NameItemrh  ri  rY   r:   T)Zskipna)	r   r   r   r=   r   r4   r   r   r   )r;   r   rA   r   r    r   r   r   $test_groupby_cat_preserves_structure  s     rj  c               	   C   sL   t ddddgtdd} tjtdd | ddd	  W 5 Q R X d S )
Nr   rD   r6   r5   r   z'vau'r  r5   c                 S   s&   t | jd d g| jd d gdS )Nr  r5   Zvaurk  )r   ra   )Zrowsr   r   r   r^   4  s    z/test_get_nonexistent_category.<locals>.<lambda>)r   r   r  r  KeyErrorr=   r?   r"  r   r   r   test_get_nonexistent_category/  s
    
rm  c           
      C   s   | dkrt d | dkr4t jjdd}|j| tttdtddttd	d
 tdddgd d}t	| |}|rdnd}|j
ddg|dd }t|| }|| }	t|	|kstd S )Nngroupngroup is not truly a reductionr'   6TODO: implemented SeriesGroupBy.corrwith. See GH 32293reasonAABBABCDr   r   rH   皙?r6   cat_1cat_2r;  r'  rw  rx  r:   r;  )r  skipmarkxfailnode
add_markerr   r   r   r   r=   r2  r   r@   )
reduction_funcr;   requestrz  rA   r!   Zexpected_lengthseries_groupbyr   r    r   r   r   0test_series_groupby_on_2_categoricals_unobserved:  s&    


r  c                 C   s,  | dkrt d | dkr4t jjdd}|j| tttdtddttd	d
 tdddgd d}t	dt	dt	dt	dt	dg}t
| |}|jddgddd }t|| }|| }t|  }	|D ].}
|j|
 }t|	rt|s||	kstq|	dkr(| dkr(t|jtjs(td S )Nrn  ro  r'   rp  rq  rs  r   r   r   rH   ru  r6   rv  ZACZBCCAZCBCCrw  rx  Fr:   r;  r   r4   )r  ry  rz  r{  r|  r}  r   r   r   tupler   r=   r2  -_results_for_groupbys_with_missing_categoriesr   r   r   r@   r   Z
issubdtyper   integer)r~  r  rz  rA   Z
unobservedr!   r  r   r    Zzero_or_nanr   r   r   r   r   ?test_series_groupby_on_2_categoricals_unobserved_zeroes_or_nansW  s0    
"


"r  c                 C   s   | dkrt d tttdtddttdtddddddgd	}d
dddg}|jddgdd}t| |}t|| | }|D ]}||jkst	qd S )Nrn  2ngroup does not return the Categories on the indexrs  r   r   111112ru  rv  rS   2rT   r  r<   1r<   r  rw  rx  Tr:   )
r  ry  r   r   r   r=   r   r2  r"   r@   )r~  rA   unobserved_catsdf_grpr!   rB  r   r   r   r   >test_dataframe_groupby_on_2_categoricals_when_observed_is_true  s    


r  c                 C   s   | dkrt d tttdtddttdtddddddgd	}d
dddg}|jddg|d}t| |}t|| | }t|  }|t	j
kr|j|    stn|j| |k  std S )Nrn  r  rs  r   r   r  r  ru  rv  r  r  r  r  rw  rx  r:   )r  ry  r   r   r   r=   r   r2  r  r   r   r   Zisnullr$   r@   )r~  r;   rA   r  r  r!   rB  r   r   r   r   ?test_dataframe_groupby_on_2_categoricals_when_observed_is_false  s     	



r  c                  C   s   ddddgddddgdd	dd	gd
} t | }t|d tddd}||d< |jddgddd}|d d}|dd }t|| d S )Nrf   r~   r6   rG   rg   rh   ri   rF   rE   r   rv   r   rJ   r   rx   T)r   r}   r-   )	r   r   r   r   r   r=   r   r   r   )rF   rA   r   r   r    r   r   r   r   3test_series_groupby_categorical_aggregation_getitem  s    $r  zfunc, expected_valuesc              	   C   sv   t dddddgdddddgtdddddgdd}|d| }t d	|itdddgdd
d}t|| d S )Nr   rG   rH   rI   r6   )idr   r;  r  r   r;  rb   rO   )r   r   r   r=   r   r   r   r   )r3  r   rA   r    r   r   r   r   $test_groupby_agg_categorical_columns  s     r  c                  C   s   t dtdddgdddgdi} t dddgitddgd}| dddgtj}t	|| | dddg }t	|| d S )	NrS   r   rD   rE   r   rH   rG   rO   )
r   r   r   r   r=   r   r   r0   r   r   rA   r   r    r   r   r   test_groupby_agg_non_numeric  s     r  r3  c                 C   sl   t dgtdgddj d}|dd }t||  }tdgtdgddd|d jd	}t	|| d S )
Ni  rD   r   r   r  rS   rT   rb   r"   rN   r   )
r   r   r   Z
as_orderedr=   r2  r   r   r   r   )r3  rA   Z
df_groupedr    r   r   r   r   <test_groupby_first_returned_categorical_instead_of_dataframe  s        r  c                  C   s~   t ddg} d| j_tddddgtddddgt| dd}td	d
dgit| ddd}|jddd	 }t
|| d S )NrG   rH   FrI   rJ   rV   r   rK   r   r   g      @rD   rb   r   r}   )r   r   flagsZ	writeabler   r   r   r   r=   r-   r   r   )rB   rA   r   r    r   r   r   test_read_only_category_no_sort  s    $r  c               
   C   s   t ddddddddgddddddddgd} | d djjd	dddgd
d| d< t ddddddddddddd}|jddd}td	dddgd	dddgd
ddd|_| ddg 	 }t
|| d S )Nsmalllarger
  r<   rS   )rv   rw   rv   r   tinyTru   r   )rS   r<   rG   rI   rH   )r  r  r
  r  rw   r"   ro   )r   r   rN   r   )r   r   r   Zset_categoriesZrename_axisr   rZ   r=   r2   r  r   r   r  r   r   r   #test_sorted_missing_category_values  sJ    

 

r  c                  C   s   t dddddgi} | d d| d< | dj }tdddgtdddgddd| d jd}t	|| | d
dd	i}| }t|| d S )
NZcol_numrG   rH   rI   r   col_catrb   r  r(   )r   r   r=   r  r(   r   r   r   r   r   r   to_framer   r9  r   r   r   1test_agg_cython_category_not_implemented_fallback2  s    r  c               	   C   s   t ddddgddddgddtjdgddddgddddgd	} | d
di} | ddgdd }tjddgddggdd}t ddgddgddgd|d}t	|| d S )NrG   rH   ru  g?g333333?rv   rw   Zfee)rS   rT   numerical_col
object_colcategorical_colr  r   rS   rT   c                 S   s   |    S r[   )r   r4   r"  r   r   r   r^   V  r_   z7test_aggregate_categorical_with_isnan.<locals>.<lambda>r  r   r   )r  r  r  r   )
r   r   r   r   r=   r   r   r   r   r   r   r   r   r   %test_aggregate_categorical_with_isnanH  s&    




r  c               	   C   s   t ddddddgddddddgd} tjdddgdd	}| d
 || d
< | dd
 t| d< |  }t ddddddgddddddgddddddgd}|d
 ||d
< |d ||d< t	|| d S )NrG   rH   rI   ZWaitingZOnTheWayZ	Delivered)
package_idstatusTr   r  r  last_status)r  r  r  )
r   r   CategoricalDtyper   r=   r   r,   r   r   r   )rA   Zdelivery_status_typer    r   r   r   r   test_categorical_transformc  sN     	r  )r3  r;   c                 C   s   t ddddg}ddddg}t|||d}t ddg}tj||gddgd}tdtjtjdg|ddtdtjtjdg|ddd	}||  }|r| tj	}|j
ddg|d
d }	t|	|  }
t|
| d S Nr   rG   r   r   rD   r   rE   rb   )r(   r+   r:   )r   r   r   r   r   r   NaNr  r   r   r=   r2  r   r   )r3  r;   r   r   rA   r   r   expected_dictr   Zsrs_grpr    r   r   r   Ftest_series_groupby_first_on_categorical_col_grouped_on_2_categoricals  s    r  c                 C   s   t ddddg}ddddg}t|||d}t ddg}tj||gddgd}tdtjtjdg|ddtdtjtjdg|ddd	}||   }|r| 	tj
}|jddg|d
}	t|	|  }
t|
| d S r  )r   r   r   r   r   r   r  r  r  r   r   r=   r2  r   r   )r3  r;   r   r   rA   r   r   r  r   r  r    r   r   r   Btest_df_groupby_first_on_categorical_col_grouped_on_2_categoricals  s    r  c                  C   s   t tdddgdddgdtdd} | jddd	}|j}tjd
dgddtjdgddtjg ddd}| | ks~t| D ]}t	
|| ||  qd S )NrD   r   rE   r   rI   )r   r+  r   Fr  r   rG   Zintpr   rH   )rD   r   rE   )r   r   r   r=   indicesr   r   r   r@   r   r5  )rA   r   r    r   r   r   r   r   2test_groupby_categorical_indices_unused_categories  s    r  c                 C   sp   t ddddgi}|d d|d< t|dd |  }ttdddgdtdddgddd}t|| d S )	Nr   rG   rH   rI   r   rD   rb   )rN   r"   )	r   r   r2  r=   r   r   r   r   r   )r3  rA   r    r   r   r   r   1test_groupby_last_first_preserve_categoricaldtype  s      r  c               	   C   s   t ddgddgddgd} | jdddd} | jd	d
gdd d }tddgttddgd	dtddgd
dgdd}t	|| d S )NrG   rH   rf   r  r   r   rK   r   r   rD   Tr:   rE   rb   r   )
r   r   r=   r0   r   r   r   r   r   r   r9  r   r   r   )test_groupby_categorical_observed_nunique  s    r  c                  C   s   t jddgdd} tddgddgddggddgd	d| i}|dd  }tddgtddgdd
dt jddgddd}t	|| d S )Nr  bigTr   rG   rH   grpdescriptionrY   rb   r  )
r   r  r   r   r=   r,   r   r   r   r   )r   rA   r    r   r   r   r   ,test_groupby_categorical_aggregate_functions  s     r  c                 C   s   t ddgdddgd}tt ddgdddgdddgd}|jd| |d}| }| rltd	ddgi|d
}n,tdddgdddg}td	dddgi|d
}d|j_t|| d S )NrG   rH   rI   r   r6   )r]   rQ   r]   )r;   r  rQ   rO   r   )	r   r   r=   r4   r   r"   rN   r   r   )r;   r  r   rA   r   r    r   r"   r   r   r   test_groupby_categorical_dropna  s    $r  
index_kindr   singlemultic                 C   s|  |dkr0|s0|dkr0d}| j tjj|d n@|dkrX|sXd}| j tjj|d n|dkrp|sptjdd ttd	d
d	dgd
ddd	g|dtdd}|dkrdg}	n@|dkrdg}	|	|	}n&|dkrddg}	|d |d< |	|	}t
||}
|j|	|||d}t|||
 }|r,|jdj}n|d jj}td
ddd	g}t|| |dkrx|jdj}t|| d S )N)r)   r*   r  z1GH#10694 - idxmax/min fail with unused categoriesrq  r'   zDGH#49950 - corrwith with as_index=False may not have grouping columnr   /Result doesn't have categories, nothing to testrH   rG   rI   r6   r   rK   r   r  r   r   r}   r;   )r|  r}  r  rz  r{  ry  r   r   r   r   r   r=   r2  r"   r   r   r   r   r   r   )r  r   r}   r;   r~  r  r   r   rA   r   r!   r   	op_resultr    r   r   r   r   test_category_order_reducer  sL    


r  c                 C   s   t tddddgddddg|dtdd}|dkrFdg}||}n&|d	krldd
g}|d |d
< ||}t||}|j|| ||d}	t|	|| }
|
jdj	}t
ddddg}t|| |d	kr|
jd
j	}t|| d S )NrH   rG   rI   r6   r   rK   r  r   r  r   r  )r   r   r   r   r   r=   r2  r"   r   r   r   r   r   )r   r}   r;   Ztransformation_funcr  r   rA   r   r!   r   r  r    r   r   r   r   test_category_order_transformerL  s*    

r  methodheadtailc                 C   s   t tddddgddddg|dtdd}|dkr<dg}n@|d	krVdg}||}n&|d
kr|ddg}|d |d< ||}|j|| ||d}t|| }	|dkr|	d jj}
n|	j	dj}
t
ddddg}t|
| |d
kr|	j	dj}
t|
| d S )NrH   rG   rI   r6   r   rK   r   r   r  r  r   r  )r   r   r   r   r=   r2  r   r   r"   r   r   r   r   r   r}   r;   r  r  r   rA   r   r   r  r    r   r   r   r   test_category_order_head_tailk  s0    
r  r   c                 C   s:  |dkr|dks| s&|dkr&t d ttddddgddddg|dtdd	}|dkrbd
g}n@|dkr|d
g}||}n&|dkrd
dg}|d
 |d< ||}|j|| ||d}t||dd }	|dks| s|dkr|	d
 jj	}
n|	j
d
j	}
tddddg}t|
| |dkr6|	j
dj	}
t|
| d S )Nr   r   z(No categories in result, nothing to testrH   rG   rI   r6   r   rK   r   r  r  r   r  c                 S   s   | j ddS )NTrW   )r4   r\   r   r   r   r^     r_   z+test_category_order_apply.<locals>.<lambda>)r  ry  r   r   r   r   r=   r2  r   r   r"   r   r   r   r   r  r   r   r   test_category_order_apply  s<    


r  c                 C   sd  |dkr| st jdd tddd}tddddg||d	}t|td
d}|dkr^dg}n@|dkrxdg}||}n&|dkrddg}|d |d< ||}|j|| |dd}|	 }	|rdddgndddg}
t
|
|j|dd}| rtd|
i}|dkrtt||d|_n||_n6|dkrBtt|t||
d}ntt||
d}t|	| d S )Nr   r  rq  i'  r  rH   rG   rI   r   r6   rK   r   r  r  r   Tr  )r   r   rN   rD   )r   r   )r   r   rD   )r  ry  r   r   r   r   r   r   r=   r4   r   r   r   Z
from_framer"   r   r   r   )r   r}   r  r   r   ZgrouperrA   r   r   r    r   r"   r   r   r   r   test_many_categories  s@    
   

r  r   a1c                 C   s  |r&|dkr&t tdrttd n^|dkrJd}| jtjj|d n:|dkr|st	|dkr|s|sd}| jtjj|d t
dddgd	d
d
gdddgd}|ddd}d|kr|jdd}|j|||d}|r|d }t||}	|j|gf|	 }
t|||	 }|r0|s&|dkr0||}|sf|sF||}tdg|ggdgdggd|_n|sx||g |_t|
| d S )Nr'   z*corrwith not implemented for SeriesGroupByz1GH#32293: attempts to call SeriesGroupBy.corrwithrq  r0   rG   zGH#52848 - raises a ValueErrorr   rH   rI   r6   rJ   r   )r  r   rD   r   )r  r   r   rY   r)  rD   r2   )r   r   )hasattrr
   r@   r  ry  r|  r}  rz  r{  r   r   r   r   r=   r   r   r2  r  r   r   rZ   r   r   )r  r   r;   r~  r   r   r   rA   r   r!   r    r   r   r   r   test_agg_list  sP    
"



 

r  )hr   Znumpyr   r  Zpandasr   r   r   r   r   r   r   r	   Zpandas._testingZ_testingr   Zpandas.core.groupby.genericr
   Zpandas.tests.groupbyr   r  r#   r  rC   r   r   r   rz  Zparametrizer   r   r   r   r   r   r   r   r   r  r  r  r	  r  r  r   renamer  r#  r&  r,  r4  r6  r:  rC  rG  rI  rL  rM  rQ  r   rS  r   ZNaTrV  ZfixturerW  r]  r_  r   ra  rb  rg  rj  rm  r  r  r  r  r  r0   r&   r  r  r  r  r  r  r  r  r   boolr  r  r  r  r  r  r  r  r  r  r  r  r  r   r   r   r   <module>   s  $	  
n" '
$

8!



	
%
"
2 




"
",
	
	+
!

-5  
.!#
'