U
    >vf(                     @   s	  d Z ddlmZ ddlZddlZddlmZmZm	Z	m
Z
mZmZmZmZmZ ddlmZ ddlmZ dd Zdd	 Zg Zg Zd
D ]ZeddD ]\ZZeeeeZdededed  d dfZddddgfZ ee eD ]6\Z!Z"e#ee!e"eef e#e! de de  qqqej$j%ej$j&deedej$&dddgej$&dddgej$&dddgej$&dddgej$&dddgdd  Z'ej$&d!ddgd"d# Z(ej$&d$d%d&gd%d&d'ggd(d) Z)ej$&d$d%d&gd%d&d'ggd*d+ Z*d,d- Z+d.d/ Z,ej-d0d1 Z.d2d3 Z/d4d5 Z0d6d7 Z1d8d9 Z2ej$&d:d;d<d=gej$&dddgej$&d>d?d@d
gej$&dAddgej$&dBddgdCdD Z3ej$&dEddgej$&dFddddddGdHgdddddgddGddGdgfdddHdGdddgdddddgddGdGddgfdddHddGddgdddddgddGdGddgfgdIdJ Z4ej-dKdL Z5ej$&dMddddNdddgdOdPdQgfddddNdddgdOdRdQgfddddNdddgdOdSdTgfddddUdVdWdWgdOdSdTgfgdXdY Z6ej-dZd[ Z7ej$&d\dddddGdd]d^d_ddHg	dVdVd`dWdWdWdWd`d`g	fdddddGdddHgdVdVd`d`d`d`gfdddddd]d^d_gdVdVdWdWdWdWgfdddddgdVdVd`gfgdadb Z8ej-dcdd Z9ej$&dedddgej:dfdgdhgdidjdkgdlfdddddgeedgedmdndogedpdqej;ggddddgddddgddddggdidjdkgdrfgej$&dddgdsdt Z<ej$&dAddgej$&duddgej$&dvddNej=ddddddddddddgej>dwfddUe=dVdWdWdxdxdxdVdVdxdxdxdxgfgdydz Z?d{d| Z@ej$&dAddgej$&dvddNej=ddddddddddddgej>dwfddUe=dVdWdWdxdxdxdVdVdxdxdxdxgfgd}d~ ZAej$&dAddgej$&dvddNej=ddddddddddddddddddgej>dwfddUe=dVdWdWdxdxdxdVdVdxdxdxdxdxdxdxdxdxdxgfgdd ZBej$&dAddgej$&ddddddddddddddgfddddddgfgej$&dvddNej=ddddddddddddgej>dwfddUe=d`dxd`dxd`dxd`dxd`dxdxdxgfgdd ZCej$&dAddgej$&duddgej$&dvddNej=ddddddddddddgej>dwfddUe=dVdWdWdxdxdxdVdVdxdxdxdxgfgdd ZDej$&dddNdddgfddUdVdVd`gfgdd ZEej$&ddeFdddddddgfdeFddg ddddddgfgej$&dAddgdd ZGej$&dddgdd ZHdd ZIdd ZJdd ZKdd ZLdd ZMej$&d!ddgdd ZNdS )z
these are systematically testing all of the args to value_counts
with different size combinations. This is to ensure stability of the sorting
and proper parameter handling
    )productN)	CategoricalCategoricalIndex	DataFrameGrouperIndex
MultiIndexSeries
date_rangeto_datetime)Versionc                  C   s   t dgdgd} | d d| d< | dd  }t ddggddgd}|d d|d< t|}tdg|d	d
}t|| d S )NfemaleUS)gendercountryr   categoryr   columns   countindexname)	r   astypegroupbyvalue_countsr   
from_framer	   tmassert_series_equal)dfresultZdf_mi_expectedZmi_expectedexpected r"   J/tmp/pip-unpacked-wheel-vdrwu74i/pandas/tests/groupby/test_value_counts.py.tests_value_counts_index_names_category_column   s    
r$   c                 C   s   t jd tddd}tt jtd|t j||t jd|d |d}| r|d d	|d< t j	|j
dd d
df< t j	|j
dd ddf< t j	|j
dd ddf< t j	|j
dd ddf< t j	|j
dd ddf< |S )Ni  z
2015-08-24
   )Zperiodsabcdr   )1st2nd3rdr)   float   r'         r(            	   )nprandomseedr
   r   choicelistrandintr   nanloc)	seed_nansnmdaysframer"   r"   r#   seed_df0   s     r?   )TF)d   i  )      rA   r)   r      r'   r(   -zdf, keys, bins, n, m)idsisortTFznormalize, name)T
proportion)Fr   sort	ascendingdropnac                 C   s   dd }|||	|
|d}| j ||d}|d jf |}| j ||d}|d jtjf|}|jjd d dg |j_||}t|||f\}}t	|
 |
  d S )Nc                 S   s2   t t| jjt| jj}tj|| jjd| _| S )Nnames)	r6   mapr   Zget_level_valuesrangeZnlevelsr   from_arraysrL   )r   Zarrr"   r"   r#   rebuild_index_   s    z7test_series_groupby_value_counts.<locals>.rebuild_index)	normalizerH   rI   rJ   binsrH   r)   )r   r   applyr	   r   rL   renamerM   r   r   
sort_index)r   keysrR   r;   r<   rF   rQ   r   rH   rI   rJ   rP   kwargsZgrleftrightr"   r"   r#    test_series_groupby_value_countsU   s    

r\   utcc              	   C   s   t dddddddgddd	d	d
d
dgddg}t|d | dd|d< |tddd}|d   }|d tj }|j	j
|j	_
|d}t|| d S )NGI])J]鍙J]K])<M]U=M]驍N]applebananaorangepear	TimestampFoodr,   rj   sr]   unitDatetime1Dfreqkeyrk   r   )r   dropr   r   r   r   rW   rU   r	   r   rL   rV   r   r   )r]   r   dfgr    r!   r"   r"   r#   -test_series_groupby_value_counts_with_grouperz   s*    	
rv   r   ABCc                 C   sf   t | d}|| d d }|| d   }tg |jdd}tjg gt|  | d|_t	
|| d S )Nr   rT   r   )dtyper   rK   )r   r   r   r	   rz   r   rO   lenr   r   r   r   r   ru   r    r!   r"   r"   r#   &test_series_groupby_value_counts_empty   s    
r}   c                 C   sP   t tt| g| d}|| d d }|| d   }| }t|| d S )N)datar   rT   )r   rN   r{   r   r   r   r   r|   r"   r"   r#   (test_series_groupby_value_counts_one_row   s
    r   c                  C   sp   t tdgddgd} | dg }t ddgttddgtddgddgdddgd	d
}t	
|| d S )Nab)
categoriesr   r   Fr   )r   Zorderedrz   r   r~   r   r   )r	   r   r   r   r   rO   r2   arrayr   r   r   )rl   r    r!   r"   r"   r#   /test_series_groupby_value_counts_on_categorical   s"       r   c               	   C   s   t ddddddgddddddgddddddgd} | jd	d
gddd }|jdd}tddgddgdddggdddddgdddddgdddddggd	d
dgd}tdddddg|dd}t|| d S )Nmaler   lowmediumhighr   FRr   	educationr   r   r   FrS   r   r   r   rC   levelscodesrL   r   r   )r   r   r   r   r	   r   r   )r   gbr    r   r!   r"   r"   r#   (test_series_groupby_value_counts_no_sort   s    &r   c                	   C   s4   t ddddddgddddddgddddddgdS )	Nr   r   r   r   r   r   r   r   r   r"   r"   r"   r#   education_df   s    r   c              	   C   s4   | j ddd}tjtdd |  W 5 Q R X d S )Nr   r   axisr   match)r   pytestraisesNotImplementedErrorr   r   gpr"   r"   r#   	test_axis   s    r   c              	   C   s6   |  d}tjtdd |jdgd W 5 Q R X d S )Nr   subsetr   r   )r   r   r   
ValueErrorr   r   r"   r"   r#   test_bad_subset   s    
r   c                 C   s   t tjt dkr*|jtjjddd | dddg j	dd	}t
d
ddd
d
gtjdddddgdddgddd}t|| d S )N1.25Ypandas default unstable sorting of duplicatesissue with numpy>=1.25 with AVX instructionsFreasonstrictr   r   r   TrQ         ?      ?r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rK   rG   r   )r   r2   __version__node
add_markerr   markxfailr   r   r	   r   from_tuplesr   r   )r   requestr    r!   r"   r"   r#   
test_basic   s0    	
r   c                 C   s   | | j |||dS )NrQ   rH   rI   )r   )r   rX   rQ   rH   rI   r"   r"   r#   _frame_value_counts  s    r   r   columnr   functionzsort, ascending)FN)TTas_indexr>   c	                    sH  t tjt dkr6|r6|r6|r6|jtjjddd d d j fddd| }	 j	|	|d	}
|
d
dg j
|||d}|r>|
td
dg|||}|rt|| n|rdnd}| jd|idd}|dkr|jddidd}t|d dd|d< n2|dkr|d dk|d< nt|d dd|d< t|| n d
 d  d   d< |
d j
|||d}||_|r|jjdd}|d jdjd|d
< |d jdjd|d< |d= |jdd idd}t||_t|| nV|dd
|d jdjd |dd|d jdjd |d= t|| d S )Nr   r   Fr   r   c                    s    d |  dkS )Nr   r   r"   )xr   r"   r#   <lambda>@      z6test_against_frame_and_seriesgroupby.<locals>.<lambda>)r   r   r   )byr   r   r   r   rG   r   r   r   r   r   level_0r   r   r   rD   Zbothr   rC   )r   r2   r   r   r   r   r   r   valuesr   r   rU   r   r   r   reset_indexrV   whereassert_frame_equalr   r   Zto_framestrsplitgetr   r   insert)r   r   rQ   r   rH   rI   r   r>   r   r   r   r    r!   Zindex_framer"   r   r#   $test_against_frame_and_seriesgroupby  sr    

      
  ""r   rQ   zCsort, ascending, expected_rows, expected_count, expected_group_sizer,      c           
         s   j ddgddd}|d j|||d}t }	dD ]  fdd	|D |	 < q2|rn||	d
< |	d
  |  < n||	d< t||	 d S )Nr   r   F)r   rH   r   r   )r   r   r   c                    s   g | ]}  | qS r"   r"   .0rowr   r   r"   r#   
<listcomp>  s     z!test_compound.<locals>.<listcomp>rG   r   )r   r   r   r   r   )
r   rQ   rH   rI   expected_rowsZexpected_countZexpected_group_sizer   r    r!   r"   r   r#   test_compoundp  s      r   c                   C   s4   t ddddgddddgddddgdddd	d
gdS )Nr   rC   r      r   )rs   num_legs	num_wingsZfalconZdogcatZantr   r   r"   r"   r"   r#   
animals_df  s    "
r   z?sort, ascending, normalize, name, expected_data, expected_indexr   )r   r   r   )rC   r   r   )rC   r   r   )rC   r   r   )r   rC   r   )r   rC   r   rG   r   r   c           
      C   s`   | j |||d}t|tj|dddgd|d}t|| | dj |||d}	t|	| d S )N)rH   rI   rQ   rs   r   r   rK   r   )r   r	   r   rO   r   r   r   )
r   rH   rI   rQ   r   expected_dataexpected_indexresult_framer!   result_frame_groupbyr"   r"   r#   test_data_frame_value_counts  s(       
  r   c                  C   s`   t j} tdd| d| ddddg	ddd| | ddddg	dddddd| d| g	ddddddd| | g	d	S )
Nr   r   r   r,   rC   rA   r0   r.   )rw   rx   ry   D)r2   r8   r   )r;   r"   r"   r#   nulls_df  s    r   z:group_dropna, count_dropna, expected_rows, expected_valuesr.   r   r0   g      ?c                    s   t tjt dkr.|s.|jtjjddd jddg|d}|j	dd|d	}t
 }jD ]  fd
d|D | < q\t|}	t||	dd}
t||
 d S )Nr   r   Fr   rw   rx   )rJ   T)rQ   rH   rJ   c                    s   g | ]}  | qS r"   r"   r   r   r   r"   r#   r     s     z,test_dropna_combinations.<locals>.<listcomp>rG   r   )r   r2   r   r   r   r   r   r   r   r   r   r   r   r   r	   r   r   )r   Zgroup_dropnaZcount_dropnar   expected_valuesr   r   r    r   r   r!   r"   r   r#   test_dropna_combinations  s    	

r   c                 C   s(   t ddddgddddgd| | dgdS )Nr   JohnAnneBethSmithLouise)rs   
first_namemiddle_namer   )Znulls_fixturer"   r"   r#   names_with_nulls_df  s    


r   z%dropna, expected_data, expected_indexr   r   )r   r   )r   r   rs   r   r   rK   r   r   r   r   r   r   c           	      C   s`   | j ||d}t|||d}|r0|tt| }t|| | dj ||d}t|| d S )N)rJ   rQ   r   rs   )r   r	   r*   r{   r   r   r   )	r   rJ   rQ   r   r   r   r   r!   r   r"   r"   r#   #test_data_frame_value_counts_dropna  s    !
 r   observedznormalize, name, expected_datarz           c                 C   s   t tjt dkr*|jtjjddd | dj	d||d}|j
|d}tjd	d
ddddddddddgdddgd}	t||	|d}
tdD ]"}|
jjt|
jj| |d|
_q|rt||
 n |
j|rdndd}t|| d S )Nr   r   Fr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rK   r   r,   levelrG   r   r   )r   r2   r   r   r   r   r   r   r   r   r   r   r   r	   rN   r   
set_levelsr   r   r   r   r   r   r   r   r   rQ   r   r   r   r   r    r   expected_seriesir!   r"   r"   r#   =test_categorical_single_grouper_with_only_observed_categories2  sZ    

   

r   c                 C   s   |   d} | d jdg| d< | jd||d}|j|d}t|tj|dddgd|d	}	t	d
D ]@}
t
|	jj|
 }|
dkr|| d jj}|	jj||
d|	_qf|rt||	 n|	j|d}t|| d S )Nr   r   ASIAr   r   r   r   rK   r   r,   r   r   r   )copyr   r   Zadd_categoriesr   r   r	   r   r   rN   r   r   r   Zset_categoriesr   r   r   r   r   r   )r   r   r   r   rQ   r   r   r   r    r   r   Zindex_levelr!   r"   r"   r#   !assert_categorical_single_grouper}  s.    
r  c                 C   s`   t tjt dkr*|jtjjddd ddddd	d
ddddddg}t| |d||||d d S )Nr   r   Fr   r   r   r   r   r   r   r   r   r   r   r   r   Tr   r   r   r   rQ   r   r   	r   r2   r   r   r   r   r   r   r  r   r   rQ   r   r   r   r   r"   r"   r#   -test_categorical_single_grouper_observed_true  s:    r  c                 C   sl   t tjt dkr*|jtjjddd ddddd	d
ddddddddddddg}t| |d||||d d S )Nr   r   Fr   r   r   r   r   r   r   r   r   r   r   r   r   )r   r   r   )r   r   r   )r   r   r   )r   r   r   )r   r   r   )r   r   r   r  r  r  r"   r"   r#   .test_categorical_single_grouper_observed_false  sF    ,r  zobserved, expected_index)r   r   r   )r   r   r   )r   r   r   )r   r   r   )r   r   r   )r   r   r   )r   r   r   )r   r   r   )r   r   r   )r   r   r   )r   r   r   )r   r   r   c                 C   s   |   } | d d| d< | d d| d< | jddg||d}|j|d}t|r^||dk n|tj|dddgd|d	}	td
D ]"}
|	jj	t
|	jj|
 |
d|	_q|rt||	 n |	j|rdndd}t|| d S )Nr   r   r   r   r   r   r   rK   r   rC   r   rG   r   r   )r  r   r   r   r	   r   r   rN   r   r   r   r   r   r   r   r   )r   r   r   r   rQ   r   r   r   r    r   r   r!   r"   r"   r#   "test_categorical_multiple_groupers1  s8    7   

r  c                 C   s  t tjt dkr*|jtjjddd |  } | d 	d| d< | d 	d| d< | j
d||d	}|j|d
}ddddddddddddg}	t|tj|	dddgd|d}
tddD ]"}|
jjt|
jj| |d|
_q|rt||
 n |
j|rdndd}t|| d S )Nr   r   Fr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rK   r   r   r,   r   rG   r   r   )r   r2   r   r   r   r   r   r   r  r   r   r   r	   r   r   rN   r   r   r   r   r   r   r   r   r   r"   r"   r#   test_categorical_non_groupers  sX    
 

r	  z*normalize, expected_label, expected_valuesc                 C   s   t dddgdddgd}|jdddgddd	 gd
d}|jd| d}t dtjdddgtjdddddgddddgddddg||i}t|| d S )Nr   rC   r,   )rw   rx   r   rA   rw   c                 S   s   | dkrdS dS )Nr   r.   r0   r"   )r   r"   r"   r#   r     r   z&test_mixed_groupings.<locals>.<lambda>Fr   T)rH   rQ   r   r   Zlevel_2r0   r.   rx   )r   r   r   r2   r   int_r   r   )rQ   expected_labelr   r   r   r    r!   r"   r"   r#   test_mixed_groupings  s"    	     	r  ztest, columns, expected_namesrepeatZabbder   dr   er   r&   level_1cc           
      C   s   t dddddgdddd	d
gg|d}ddg}dtjddgtjddg}|j||d }|rtdtj||ddd}t	
|| n@dd |D }t|}	d|	d< |	d t ||	d}t	|| d S )Nr   r,   rA   r.   r1   rC   r   r   r0   r%   r   )r   r   r.   r,   rA   r1   )rC   r   r0   r   r   r%   r   r   r   r  r
  r   rK   r   r   c                 S   s   g | ]}t |d g qS )r   )r6   r   r"   r"   r#   r     s     z0test_column_label_duplicates.<locals>.<listcomp>r  )r   r2   r   int64r   r   r	   r   r   r   r   r6   appendr   )
testr   Zexpected_namesr   r   r   rX   r    r!   Zexpected_columnsr"   r"   r#   test_column_label_duplicates  s(    $
r  znormalize, expected_labelc              	   C   sZ   t dddggdd|gdjddd}d	| d
}tjt|d |j| d W 5 Q R X d S )Nr   rC   r,   r   r   r   Fr
  zColumn label 'z' is duplicate of result columnr   r   )r   r   r   r   r   r   )rQ   r  r   msgr"   r"   r#   test_result_label_duplicates  s    	 r  c                  C   sf   t dddgi} | tjddgtjd}| }tdgtjddggd dgddd}t	
|| d S )Nr   r   r   rC   rK   r   r   )r   r   r2   r   r  r   r	   r   r   r   r   )r   r   r    r!   r"   r"   r#   test_ambiguous_grouping   s      r  c               	   C   sZ   t dddgdddgddddgd	} d
}tjt|d | djdgd W 5 Q R X d S )Nr   r   r  r   yc1c2r   r   r   z;Keys {'c1'} in subset cannot be in the groupby column keys.r   r  r   r   r   r   r   r   r   r   r  r"   r"   r#   "test_subset_overlaps_gb_key_raises+  s    $r   c               	   C   sZ   t dddgdddgddddgd	} d
}tjt|d | djdgd W 5 Q R X d S )Nr   r   r  r   r  r  r   r   r   z4Keys {'c3'} in subset do not exist in the DataFrame.r   r  c3r   r  r  r"   r"   r#   !test_subset_doesnt_exist_in_frame3  s    $r"  c                  C   sv   t dddgdddgddddgd	} | jdd
jdgd}tddgtjddgddggd dgddd}t|| d S )Nr   r   r  r   r  r  r   r   r   r   r  r   rC   rK   r   r   r   r   r   r	   r   rO   r   r   r   r    r!   r"   r"   r#   test_subset;  s    $r%  c                  C   s   t dddgdddgdddggdddgdddgd	} | jdd
jdgd}tddgtjddgddgddggd ddgddd}t|| d S )Nr   r   r   r  r   r   r  r  )r   r   r   r   rC   rK   r   r   r#  r$  r"   r"   r#   test_subset_duplicate_columnsG  s     r&  c              
   C   s   t dddddddgddd	d	d
d
dgddg}t|d | dd|d< |tddd}| }tddddg| d}|d  }t||dd	d
dggddddddgtdddddddggdddgd}t	d|dd }t
|| d S )!Nr^   r_   r`   ra   rb   rc   rd   re   rf   rg   rh   ri   r,   rj   rl   rm   ro   rp   rq   z
2019-08-06z
2019-08-07z
2019-08-09z
2019-08-10)r]   r   r   rC   r   rk   r   r   r   )r   rt   r   r   r   r   uniquer   rN   r	   r   r   )r]   r   r   r    datesZ
timestampsr   r!   r"   r"   r#   test_value_counts_time_grouperY  s:    	
 $r)  )O__doc__	itertoolsr   Znumpyr2   r   Zpandasr   r   r   r   r   r   r	   r
   r   Zpandas._testingZ_testingr   Zpandas.util.versionr   r$   r?   ZbinnedrE   r:   r;   r<   r   ZarangemaxrR   rX   kr   r  r   ZslowZparametrizer\   rv   r}   r   r   r   Zfixturer   r   r   r   r   r   r   r   r   r   r   r   rO   r8   r   r   r  r   r  r  r  r  r	  r  r6   r  r  r  r   r"  r%  r&  r)  r"   r"   r"   r#   <module>   s  ,$ 




 H***


""


	
 & :#& *& &0 & %& <


