U
    OvfȾ                     @   s	  d Z ddlZddlZddlZddlmZ ddlZddlZddl	m
Z ddlmZ ddlmZmZmZmZ ddlmZmZmZmZmZmZmZ ddlmZmZ dd	lmZ dd
l m!Z! ddl"m#Z#m$Z$ ddl%m&Z& ddl'm(Z( ddl)m*Z*m+Z+m,Z, ddl-m.Z. ddl/m0Z0 dZ1ej23de1 Z4e5dddddgdddddgdddddggZ6dZ7e6j8\Z9Z:ee7e6ddd\Z;Z<e=e;Z>ej2j?dej5ej=gddgdej2?ddd gej2?d!ej@ejAgd"d# ZBej2j?dej5ej=gddgdej2?ddd gd$d% ZCej2j?dej5ej=gddgdd&d' ZDej2?d(d)d*gej2j?dej5ej=gddgdej2?d+d,d-d.dgd/d0 ZEej2?d1dd gd2d3 ZFej2?d1d4d5gd6d7 ZGej2?d8eegd9d: ZHd;d< ZId=d> ZJej2j?d?e;e>gddgdej2j?d@dAdBe6dCdD gdAdBdEdFgdej2?d8eegdGdH ZKej2j?d@dAdBe6dIdD gdAdBdEdFgddJdK ZLej2?dLdMdNdOdD dPfdQgej2?d8eegdRdS ZMej2?d8eegdTdU ZNdVdW ZOej2?d1dd gej2?d+d,dgdXdY ZPdZd[ ZQej2?d8eegd\d] ZRd^d_ ZSej2j?d?e;e>gddgdd`da ZTdbdc ZUddde ZVej2?dfdgdhgdidj ZWdkdl ZXdmdn ZYej2?d8eegdodp ZZej2j?dej5ej=gddgdej2?dqedfed fedfgej2?drdsdgdtdu Z[ej2?d8eegdvdw Z\ej2j?d@dAdBe6gdAdBdEgdej2?d8eegdxdy Z]ej2j?dej5ej=gddgdej2?d!ej^ej_gej2?d@dBdEgej2?d8eegdzd{ Z`ej2?d8eegd|d} Zaej2?d8eegd~d Zbdd Zcdd Zdej2j?d?e;e>gddgdej2?d8eegdd Zeej2?d!ej^ej_ej@ejAgej2?d8eegdd Zfej2j?d?e;e>gddgddd Zgdd Zhdd Zidd Zjej2j?d?e;e>gddgdej2?d8eegdd Zkej2j?d?e;e>gddgdej2?d8eegdd Zldd Zmej2j?dej5ej=gddgddd Znej2?d8eegdd Zodd Zpej2j?dej5ej=gddgdej2?ddd gdd Zqej2?d!ej@ejAgej2?dddgdd Zrej2?d!ej@ejAgdd Zsej2?dedfedfgdd Ztej2?dedfedfgdd Zuej2?d8eegdd Zvej2?d8eegej2?dde7d idfd@e;dds idfd@ddD idfd@e;ddddsf idfd@ddD idfgdd Zwej2?dde;dds idfgdd Zxej2?d?e;e>gej2?d!ejAej@gdd Zyej2?de.e;dddgdd ZzddÄ Z{ddń Z|ej2?dd@e}dBiddgddggddǜfddɄ Z~ej2?dedfedfedfgdd΄ Zej2?dddgddф Zej2?d@dBdAgddӄ Zej2?d@dBdAgddՄ ZdS )zTesting for K-means    N)StringIO)sparse)clone)KMeansMiniBatchKMeansk_meanskmeans_plusplus)_euclidean_dense_dense_wrapper_euclidean_sparse_dense_wrapper_inertia_dense_inertia_sparse_is_same_clustering_relocate_empty_clusters_dense_relocate_empty_clusters_sparse)_labels_inertia_mini_batch_step)
make_blobs)ConvergenceWarning)pairwise_distancespairwise_distances_argmin)v_measure_score)euclidean_distances)assert_allcloseassert_array_equalcreate_memmap_backed_data)	row_norms)threadpool_limitszThe default value of `n_init` will change from \d* to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning:FutureWarningzignore:        g      @      ?g      @d   *   )	n_samplescentersZcluster_stdrandom_statearray_constrZdenser   )idsalgolloydelkandtypec                 C   s   | ddgddgddgddgg|d}ddddg}t jddgddgg|d}ddddg}d}t jddgddgg|d}d	}	td	d||d
}
|
j||d t|
j| t|
j| t|
j| |
j	|	kst
d S )Nr         ?   r)      g      ?g      ?g      ?   
n_clustersn_initinit	algorithmsample_weight)nparrayr   fitr   labels_r   inertia_cluster_centers_n_iter_AssertionError)r$   r&   r)   Xr5   init_centersexpected_labelsexpected_inertiaexpected_centersexpected_n_iterkmeans rE   F/tmp/pip-unpacked-wheel-qu3nn_q2/sklearn/cluster/tests/test_k_means.pytest_kmeans_results;   s    $rG   c           	      C   s   | ddgddgddgddgg}t ddgddgg}tdd||d}|| d}d}t|j| |j|ksrtz8ddddg}ddgddgg}t|j	| t|j
| W nH tk
r   ddddg}dd	gdd
gg}t|j	| t|j
| Y nX d S )Nr   r*   r+   r-   r.   r/   g      ?g      ?r   r   )r6   r7   r   r8   r   r:   r<   r=   r   r9   r;   )	r$   r&   r>   r?   rD   rC   rA   r@   rB   rE   rE   rF   test_kmeans_relocated_clustersU   s$     
rH   c                 C   s   t ddddddddd	d
g
dd}| |}t d
}t dddgdd}t dddgdd}t dddg}t jd
t jd}| t jkrt|||||| nt|j|j	|j
||||| t|dddg t|dgd
gd	gg d S )Ng      $g      #ig      !ir+   	   g      #@
   ig     0g      $@r   r,      i)r6   r7   reshapeoneszerosint32r   r   dataindicesZindptrr   r   )r$   r>   r5   centers_oldcenters_newZweight_in_clusterslabelsrE   rE   rF   test_relocate_empty_clustersv   s8    &

     rV   distributionnormalZblobstolg{Gz?g:0yE>g0.++c           	      C   s   t j|}| dkr"|jdd}nt|d\}}d||dk < ||}td|d|d}td	d|d|d
}|| || t|j|j t	|j
|j
 |j|jkst|jtj|jddkstd S )NrX   i  rK   sizer#   r      r+   )r0   r#   r1   rY   r(   )r3   r0   r#   r1   rY   ư>)rel)r6   randomRandomStaterX   r   r   r8   r   r;   r   r9   r<   r=   r:   pytestZapprox)	rW   r$   rY   global_random_seedrndr>   _Zkm_lloydZkm_elkanrE   rE   rF   test_kmeans_elkan_results   s(    

rg   r3   c                 C   sH   t j|}|jdd}d}t| d|dd|d|}|j|k sDtd S )NrZ   r[   i,  r^   r+   r   )r3   r0   r#   r1   rY   max_iter)r6   ra   rb   rX   r   r8   r<   r=   )r3   rd   re   r>   rh   kmrE   rE   rF   test_kmeans_convergence   s    	rj   autofullc              	   C   sV   t jdd}t| d}tjtd|  dd || |jdksHt	W 5 Q R X d S )Nr   r.   r3   zalgorithm='zB' is deprecated, it will be removed in 1.3. Using 'lloyd' instead.matchr'   )
r6   ra   randr   rc   warnsFutureWarningr8   
_algorithmr=   )r3   r>   rD   rE   rE   rF   ,test_algorithm_auto_full_deprecation_warning   s    


rt   	Estimatorc              	   C   s`   t jdd}t jjdd}|  }|j||d d}tjt|d |j||d W 5 Q R X d S )Nr   r.   r[   r4   zI'sample_weight' was deprecated in version 1.3 and will be removed in 1.5.rn   )	r6   ra   rp   uniformr8   rc   rq   rr   predict)ru   r>   r5   rD   Zwarn_msgrE   rE   rF   .test_predict_sample_weight_deprecation_warning   s    rx   c              	   C   sv  t j| }t|jtjd }| }t |}t |}t j|jd t	j
d}t j|jd t	j
d}t jt	jd t	j
d}t	d d }	td d }
|d d }t|	||||t j| dd}|dkstt|	||\}}|dkst||k stt|
||||t j| dd}|dkstt|
||\}}|dks<t||k sJtt|| t|| t|| t|| d S )Nr[   r   r,   rK   F)random_reassignr   )r6   ra   rb   r"   rX   shapecopyZ
zeros_likerO   r>   r)   rN   X_csrr   r=   r   r   r   )rd   rngrS   Zcenters_old_csrrT   Zcenters_new_csrZweight_sumsZweight_sums_csrr5   ZX_mbZX_mb_csrZsample_weight_mbZold_inertiarU   Znew_inertiaZold_inertia_csrZ
labels_csrZnew_inertia_csrrE   rE   rF   !test_minibatch_update_consistency   sZ    


	
	  


r~   c                 C   sX   | j }|jttfkst| j}t|jd tks6ttt	t
|d | jdksTtd S )Nr   r   r   )r;   rz   r0   
n_featuresr=   r9   r6   uniquer   r   true_labelsr:   )ri   r"   rU   rE   rE   rF   _check_fitted_model0  s    r   rQ   r2   ra   	k-means++c                 C   s   t S Nr"   r>   kr#   rE   rE   rF   <lambda>A      r   ndarraycallablec                 C   s4   t |trdnd}| |td|d|}t| d S )NrK   r+   r    r2   r0   r#   r1   )
isinstancestrr0   r8   r   )ru   rQ   r2   r1   ri   rE   rE   rF   test_all_init>  s    	   r   c                 C   s   t S r   r   r   rE   rE   rF   r   P  r   c                 C   sF   t | trdnd}t| td|d}tdD ]}|t q*t| d S )NrK   r+   r   r   r   )r   r   r   r0   rangepartial_fitr>   r   )r2   r1   ri   irE   rE   rF   &test_minibatch_kmeans_partial_fit_initN  s       r   zinit, expected_n_init)r   r+   )ra   defaultc                 C   s   |j || jd fdS )Nr+   r[   )rv   rz   )r>   r0   r#   rE   rE   rF   r   e  s   r   )
array-liker+   c                 C   sl   d\}}}t j||}|dkr.t j||}|dkrF| tkrBdnd}| ||dd|}|j|kshtdS )	zCheck that `n_init="auto"` chooses the right number of initializations.
    Non-regression test for #26657:
    https://github.com/scikit-learn/scikit-learn/pull/26657
    )r   rK   r^   r   r   r-   rK   rk   r0   r2   r1   N)r6   ra   randnr   r8   _n_initr=   )ru   r2   Zexpected_n_initZn_sampler   r0   r>   rD   rE   rE   rF   ,test_kmeans_init_auto_with_initial_centroids_  s    
r   c                 C   s`   t t}t t}| ttd|dt}| t|d|d|}t|j|j t|j	|j	 d S )Nr+   r0   r2   r1   r#   )
r6   asfortranarrayr>   r"   r0   r8   r   r;   r   r9   )ru   rd   	X_fortrancenters_fortranZkm_cZkm_frE   rE   rF   test_fortran_aligned_data~  s(    

   r   c                  C   s8   t tddd} tj}t t_z| t W 5 |t_X d S )Nr    r+   )r0   r#   verbose)r   r0   sysstdoutr   r8   r>   )ri   Z
old_stdoutrE   rE   rF   test_minibatch_kmeans_verbose  s    r   c              	   C   s   t jdjdd}t| tddd|dd| | }t	d|j
sJtt	d	|j
s\t|dkrxt	d
|j
stnt	d|j
std S )Nr   rZ   r[   r    ra   r+   )r3   r0   r#   r2   r1   rY   r   zInitialization completezIteration [0-9]+, inertiazstrict convergencez center shift .* within tolerance)r6   ra   rb   rX   r   r0   r8   
readouterrresearchoutr=   )r3   rY   capsysr>   capturedrE   rE   rF   test_kmeans_verbose  s$    
r   c                	   C   s0   t jtdd tdddt W 5 Q R X d S )Nz,init_size.* should be larger than n_clustersrn   rK      )	init_sizer0   )rc   rq   RuntimeWarningr   r8   r>   rE   rE   rE   rF   'test_minibatch_kmeans_warning_init_size  s
     r   c              	   C   s2   t jtdd | ttddt W 5 Q R X d S )NzAExplicit initial center position passed: performing only one initrn   rK   r2   r0   r1   )rc   rq   r   r"   r0   r8   r>   )ru   rE   rE   rF   'test_warning_n_init_precomputed_centers  s
    r   c                 C   s   t dd| d\}}d|d d dd d f< tdd| dd	|}|jjd
d dksXttdd| dd	|}|jjd
d dksttd| dd}tdD ]}|| q|jjd
d dkstd S )Nr   r^   )r!   r"   r#   r   r.   r   rK   ra   )r0   
batch_sizer#   r2   r+   Zaxis   )r0   r#   r2   )	r   r   r8   r;   anysumr=   r   r   )rd   Zzeroed_Xr   ri   r   rE   rE   rF    test_minibatch_sensible_reassign  s8      
      r   c              
   C   s   t ttf}ttD ]}tt|k jdd||< qt t	}t 
|}t| ||dd  }t| |||t tt j|ddd t| ||dd  }||kstt| |||t tt j|ddd t|| d S )Nr   r   r+   T)ry   Zreassignment_ratiogV瞯<)r6   emptyr0   r   r   r>   r   meanrN   r!   Z
empty_liker   r   rO   ra   rb   r=   r   )rQ   rd   Zperfect_centersr   r5   rT   Zscore_beforeZscore_afterrE   rE   rF   test_minibatch_reassign  s:    



r   c                   C   s   t ddtdddt d S )Nr   rK   r    T)r0   r   r   r#   r   )r   r!   r8   r>   rE   rE   rE   rF   &test_minibatch_with_many_reassignments  s    r   c                  C   sp   t ddddt} | jdks"tt ddddt} | jdksDtt dddtd dt} | jtksltd S )NrK   r^   r+   )r0   r   r1         )r0   r   r1   r   )r   r8   r>   Z
_init_sizer=   r!   ri   rE   rE   rF   test_minibatch_kmeans_init_size%  s       r   ztol, max_no_improvement)-C6?N)r   rK   c                 C   s   t dddd\}}}td|d|dddd|d	}|| d|j  k rNdk sTn t|  }|d krrd	|jksrt|dkrd
|jkstd S )Nr-   r   T)r"   r#   Zreturn_centersr   rK   r+   )	r0   r2   r   rY   r#   rh   r1   r   max_no_improvementz Converged (small centers change)z*Converged (lack of improvement in inertia))r   r   r8   r<   r=   r   r   )r   rY   r   r>   rf   r"   ri   r   rE   rE   rF   #test_minibatch_declared_convergence7  s&    
r   c                  C   s   d} t jd }td| ddt }|jt|j|  | ks@tt	|jt
sPttd| ddd ddt }|jdksxt|jd| |  kstt	|jt
std S )Nr   r   r-   )r0   r   r#   rK   )r0   r   r#   rY   r   rh   )r>   rz   r   r8   r<   r6   ceilZn_steps_r=   r   int)r   r!   ri   rE   rE   rF   test_minibatch_iter_stepsS  s$    
	r   c                  C   s6   t  } tdtdd}||  t| t| t  d S )NFr    )Zcopy_xr0   r#   )r>   r{   r   r0   r8   r   r   )Zmy_Xri   rE   rE   rF   test_kmeans_copyxl  s
    
r   c                 C   s`   t j|dd}| d|dd}|||}| d|dd}|||}||ks\td S )Nr   rK   r+   )r1   r#   rh   )r6   ra   rb   r   r8   Zscorer=   )ru   rd   r>   km1s1km2s2rE   rE   rF   test_score_max_iterw  s    r   zEstimator, algorithmrh   r.   c                 C   s   t ddd|d\}}|||d}| ddd||d}|d k	rF|j|d || |j}	||}
t|
|	 ||}
t|
|	 ||j}
t|
t	d d S )Nr   rK   r!   r   r"   r#   r,   ra   )r0   r2   r1   rh   r#   rm   )
r   
set_paramsr8   r9   rw   r   fit_predictr;   r6   Zarange)ru   r3   r$   rh   Zglobal_dtyperd   r>   rf   ri   rU   predrE   rE   rF   test_kmeans_predict  s0       





r   c                 C   sl   t j|tf}| t|dd}|jt|d | t|dd}|jt|d t	|j
|j
 t|j|j d S Nr+   r0   r#   r1   r4   )r6   ra   rb   Zrandom_sampler!   r0   r8   r>   r|   r   r9   r   r;   )ru   rd   r5   Zkm_denseZ	km_sparserE   rE   rF   test_dense_sparse  s"        r   c                 C   s^   t |trdnd}| t||dd}|t t|t|j |t t|t|j d S )NrK   r+   r   r   )	r   r   r0   r8   r|   r   rw   r>   r9   )ru   r2   r1   ri   rE   rE   rF   test_predict_dense_sparse  s    

r   c           
   	   C   s   t ddgddgddgddgddgddgg}|||d	}|d
krFdnd}|d
kr^|d d n|}| d|||d}| tkr|jdd || |jjt jkstddddddg}	t	t
|j|	d | tkrt||}|jjt jkstd S )Nr   rK      rJ   rI   r+   r.   rL   r,   r   r   )r   r   )r6   r7   r   r   r8   r;   r)   float64r=   r   r   r9   r   r   )
ru   r$   r)   r2   rd   X_denser>   r1   ri   r@   rE   rE   rF   test_integer_input  s&    .   
r   c                 C   sb   | t |dt}||j}t|t|j t| t	
t  |t}t|tt|j d S )Nr0   r#   )r0   r8   r>   	transformr;   r   r   r   Zdiagonalr6   rO   )ru   rd   ri   XtrE   rE   rF   test_transform  s    
r   c                 C   s8   | |dd tt}| |ddt}t|| d S )Nr+   )r#   r1   )r8   r>   r   Zfit_transformr   )ru   rd   ZX1ZX2rE   rE   rF   test_fit_transform  s    r   c                 C   s:   t j}dD ]*}ttd|| ddt}|j|ks
tq
d S )N)r+   r^   rK   ra   r+   )r0   r2   r1   r#   rh   )r6   infr   r0   r8   r>   r:   r=   )rd   Zprevious_inertiar1   ri   rE   rE   rF   test_n_init  s    r   c                 C   s`   t ttd | d\}}}|jttfks(tt|jd tks@ttt	t
|d |dks\td S )N)r0   r5   r#   r   r   r   )r   r>   r0   rz   r   r=   r6   r   r   r   r   )rd   Zcluster_centersrU   inertiarE   rE   rF   test_k_means_function  s       r   c           
      C   s0  | d|d}i }i }i }i }t jt jfD ]}|j|dd}	||	 |j||< ||	||< |j||< |j||< |jj	|kst
| tkr(||	dd  |jj	|ks(t
q(t|t j |t j dd t|t j |t j |t j  d d	 t|t j |t j |t j  d d	 t|t j |t j  d S )
Nr+   )r1   r#   Fr{   r   r-   r   rtol)Zatol)r6   r   float32astyper8   r:   r   r;   r9   r)   r=   r   r   r   maxr   )
ru   rQ   rd   ri   r   r   r"   rU   r)   r>   rE   rE   rF   test_float_precision-  s0    



(  r   c                 C   sJ   t j|dd}tj|dd}| |tdd}|| t|j|rFtd S )NFr   r+   r   )	r>   r   r"   r0   r8   r6   Zmay_share_memoryr;   r=   )ru   r)   Z
X_new_typeZcenters_new_typeri   rE   rE   rF   test_centers_not_mutatedS  s
    
r   c                 C   s8   t td| }t t|jdd| }t|j|j d S )N)r0   r+   r   )r   r0   r8   r;   r   )rQ   r   r   rE   rE   rF   test_kmeans_init_fitted_centersb  s    r   c              	   C   st   t ddgddgddgddgg}td| d}d}tjt|d* || t|jtt	dksft
W 5 Q R X d S )Nr   r+      r   zmNumber of distinct clusters \(3\) found smaller than n_clusters \(4\). Possibly due to duplicate points in X.rn   r-   )r6   asarrayr   rc   rq   r   r8   setr9   r   r=   )rd   r>   ri   msgrE   rE   rF   1test_kmeans_warns_less_centers_than_unique_pointsl  s    "
r   c                 C   s   t j| ddS Nr   r   )r6   sortr   rE   rE   rF   _sort_centers}  s    r   c                 C   s   t j| jddtd}t jt|dd}ttdt	| d}t
|jt|d}t |j|}t
||}t|j| t|j|j tt|jt|j d S )Nr+   r^   r[   r   r   )r2   r1   r0   r#   r4   )r6   ra   rb   randintr!   repeatr>   r   r"   r0   r   r8   r9   r   r   r:   r   r;   )rd   r5   ZX_repeatri   Zkm_weightedZrepeated_labelsZkm_repeatedrE   rE   rF   test_weighted_vs_repeated  s*         r   c                 C   s\   t t}| t|dd}t|j|d d}t|j||d}t|j|j t|j	|j	 d S r   )
r6   rN   r!   r0   r   r8   r   r9   r   r;   )ru   rQ   rd   r5   ri   Zkm_noneZkm_onesrE   rE   rF   test_unit_weights_vs_no_weights  s    
r   c                 C   sj   t j|jtd}| t|dd}t|j||d}t|j|d| d}t|j	|j	 t
|j|j d S )Nr[   r+   r   r4   r*   )r6   ra   rb   rv   r!   r0   r   r8   r   r9   r   r;   )ru   rQ   rd   r5   ri   Zkm_origZ	km_scaledrE   rE   rF   test_scaled_weights  s    r   c                  C   s$   t dddt} | jdks td S )Nr(   r+   )r3   rh   )r   r8   r>   r<   r=   r   rE   rE   rF    test_kmeans_elkan_iter_attribute  s    r   c                 C   st   | dgdgg}ddg}t dgdgg}td|dd}|j||d tt|jdks\tt|j	dgdgg d S )	NrI   r+   gffffff?g?rK   r.   r   r4   )
r6   r7   r   r8   lenr   r9   r=   r   r;   )r$   r>   r5   r2   ri   rE   rE   rF   #test_kmeans_empty_cluster_relocated  s    r   c              	   C   s~   t j|}|jdd}tddd | t|d|j}W 5 Q R X tddd | t|d|j}W 5 Q R X t|| d S )N)2   rK   r[   r+   Zopenmp)ZlimitsZuser_apir   r.   )	r6   ra   rb   rX   r   r0   r8   r9   r   )ru   rd   re   r>   Zresult_1Zresult_2rE   rE   rF   #test_result_equal_in_diff_n_threads  s    r   c                	   C   s0   t jtdd tdddt W 5 Q R X d S )Nz9algorithm='elkan' doesn't make sense for a single clusterrn   r+   r(   )r0   r3   )rc   rq   r   r   r8   r>   rE   rE   rE   rF   test_warning_elkan_1_cluster  s
    r   c                 C   sz   t j|jdd}|d d }| |}dd }|||\}}tdd||dd|}|j}	|j}
t||	 t	||
 d S )N)r   r^   r[   r^   c                 S   sP   |  }t| |}t|jd D ]}| ||k jdd||< q t| |}||fS r   )r{   r   r   rz   r   )r>   r2   Znew_centersrU   labelrE   rE   rF   	py_kmeans  s    

z+test_k_means_1_iteration.<locals>.py_kmeansr+   )r0   r1   r2   r3   rh   )
r6   ra   rb   rv   r   r8   r9   r;   r   r   )r$   r&   rd   r>   r?   r   Z	py_labelsZ
py_centersZ	cy_kmeansZ	cy_labelsZ
cy_centersrE   rE   rF   test_k_means_1_iteration  s$        
r   squaredTFc                 C   s   t j|}tjdddd|| d}| d}|dj| dd}|d	  }|| d	  }|rh|nt 	|}t
|||}	t|j|j|||}
| t jkrd
nd}t|	|
|d t|	||d t|
||d d S )Nr+   r   r*   csrZdensityformatr#   r)   rI   Fr   r.   r   gHz>r   )r6   ra   rb   sptoarrayrM   r   r   r   sqrtr	   r
   rQ   rR   r   r   )r)   r   rd   r}   Za_sparseZa_densebZb_squared_normexpectedZdistance_dense_denseZdistance_sparse_denser   rE   rE   rF   test_euclidean_distance  s4             r  c                 C   s|  t j|}tjdddd|| d}| }|dj| dd}|ddj| dd}|jddt jd	}|||  d
 j	dd}t 	|| }	t
||||dd}
t||||dd}| t jkrdnd}t|
||d t|
|	|d t||	|d d}||k}|| ||  d
 j	dd}t 	|||  }	t
||||d|d}
t||||d|d}t|
||d t|
|	|d t||	|d d S )Nr   rK   r*   r   r  Fr   r^   )r\   r)   r.   r+   r   )	n_threadsr   r_   r   )r	  Zsingle_label)r6   ra   rb   r  r  r   r   r   rP   r   r   r   r   r   )r)   rd   r}   ZX_sparser   r5   r"   rU   Z	distancesr  Zinertia_denseZinertia_sparser   r   maskrE   rE   rF   test_inertia*  sd                       r  zKlass, default_n_initrK   r-   c              	   C   s   | dd}t   t dt |t W 5 Q R X | jdkrBdnd}d| d}|  }tjt|d	 |t W 5 Q R X d S )
Nr+   r1   errorr   rK   r-   z/The default value of `n_init` will change from z to 'auto' in 1.4rn   )	warningscatch_warningssimplefilterrr   r8   r>   __name__rc   rq   )Klassdefault_n_initestr   rE   rE   rF   !test_change_n_init_future_warningV  s    


r  c                 C   s\   | ddd}| t |jdks$t| ddd}| t | jdkrP|jdksXndsXtd S )	Nrk   r   )r1   r2   r+   ra   r   rK   r-   )r8   r>   r   r=   r  )r  r  r  rE   rE   rF   test_n_init_autog  s    

r  c                 C   sV   t dgdgdgg}t dddg}| dddj||d	 t|t dddg d S )
Nr+   r.   r   r*   g?g333333?r   r   r4   )r6   r7   r8   r   )ru   r>   r5   rE   rE   rF   test_sample_weight_unchangedr  s    r  zparam, matchr0   r+   z#n_samples.* should be >= n_clusterszIThe shape of the initial centers .* does not match the number of clustersc                 C   s   | d d S )Nr.   rE   ZX_r   r#   rE   rE   rF   r     r   rL   zUThe shape of the initial centers .* does not match the number of features of the datac                 C   s   | d dd df S )NrL   r.   rE   r  rE   rE   rF   r     r   c              	   C   s:   | dd}t jt|d |jf |t W 5 Q R X d S )Nr+   r  rn   )rc   raises
ValueErrorr   r8   r>   )ru   paramro   ri   rE   rE   rF   test_wrong_params|  s    
r  x_squared_normszKThe length of x_squared_norms .* should be equal to the length of n_samplesc              	   C   s,   t jt|d tttf|  W 5 Q R X d S )Nrn   )rc   r  r  r   r>   r0   )r  ro   rE   rE   rF   !test_kmeans_plusplus_wrong_params  s    r  c                 C   s   |  |} t| t|d\}}|jd tks.t|dk s>t|| jd k sTt|jd tksft|jdd| jddk st|jdd| jddk sttt	|  || d S )Nr]   r   r   )
r   r   r0   rz   r=   allr   minr   r>   )rQ   r)   rd   r"   rR   rE   rE   rF   test_kmeans_plusplus_output  s    
  
  r!  r   c                 C   s$   t tt| d\}}tt| | d S )N)r  )r   r>   r0   r   )r  r"   rR   rE   rE   rF   test_kmeans_plusplus_norms  s    r#  c                 C   s<   t tt| d\}}tt}t |t| d\}}t|| d S )Nr]   )r   r>   r0   r6   r   r   )rd   Z	centers_crf   r   r   rE   rE   rF   test_kmeans_plusplus_dataorder  s    
  
r$  c               	   C   s   t jddddddddgt jd} t| | ds0tt jddddddddgt jd}t| |ds`tt jddddddddgt jd}t| |drtd S )Nr+   r   r.   r,   r-   )r6   r7   rP   r   r=   )Zlabels1Zlabels2Zlabels3rE   rE   rF   test_is_same_clustering  s       r%  kwargs)r2   r1   c                 C   sH   t jddgddgddgddggt jd}tf ddi| }|| dS )zZCheck that init works with numpy scalar strings.

    Non-regression test for #21964.
    r   r*   r+   r,   r0   r.   N)r6   r   r   r   r8   )r&  r>   Z
clusteringrE   rE   rF   -test_kmeans_with_array_like_or_np_scalar_init  s    (r'  zKlass, methodr8   r   c                    sR   | j   |  }t||t |jjd }| }t fddt|D | dS )z=Check `feature_names_out` for `KMeans` and `MiniBatchKMeans`.r   c                    s   g | ]}  | qS rE   rE   ).0r   
class_namerE   rF   
<listcomp>	  s     z*test_feature_names_out.<locals>.<listcomp>N)	r  lowergetattrr>   r;   rz   Zget_feature_names_outr   r   )r  methodrD   r0   Z	names_outrE   r)  rF   test_feature_names_out  s    
r/  	is_sparsec                 C   sb   t ddddd\}}| r"t|}t }||}t|j|_t|j|_||}t	|| dS )z_Check that predict does not change cluster centers.

    Non-regression test for gh-24253.
    r   rK   r   r   N)
r   r  
csr_matrixr   r   r   r;   r9   rw   r   )r0  r>   rf   rD   Zy_pred1Zy_pred2rE   rE   rF   ,test_predict_does_not_change_cluster_centers  s    


r2  c           	   	   C   s   t j|}tddd|d\}}t|dd}t }|j||| |j|jd ddt j|d	}|j||| t 	|jd dt j|d	}t
t t|| W 5 Q R X d
S )zCheck that sample weight is used during init.

    `_init_centroids` is shared across all classes inheriting from _BaseKMeans so
    it's enough to check for KMeans.
    r   rK   r   Tr"  r   r[   r^   r>   r  r2   r5   Zn_centroidsr#   N)r6   ra   rb   r   r   r   _init_centroidsrv   rz   rN   rc   r  r=   r   )	r2   rd   r}   r>   rf   r  rD   clusters_weightedZclustersrE   rE   rF   test_sample_weight_init   s6       


r6  c           
   	   C   s   t j|}tddd|d\}}|j|jd d}d|ddd< t|dd	}t }|j||| |d
t j|d}t	|ddd |}	t 
t |	drtdS )zCheck that if sample weight is 0, this sample won't be chosen.

    `_init_centroids` is shared across all classes inheriting from _BaseKMeans so
    it's enough to check for KMeans.
    r   r^   r   r   r[   Nr.   Tr"  rK   r3  )r6   ra   rb   r   rv   rz   r   r   r4  r   r   iscloser=   )
r2   rd   r}   r>   rf   r5   r  rD   r5  drE   rE   rF   test_sample_weight_zeroB  s*       


r9  )__doc__r   r   r  ior   Znumpyr6   rc   Zscipyr   r  Zsklearn.baser   Zsklearn.clusterr   r   r   r   Zsklearn.cluster._k_means_commonr	   r
   r   r   r   r   r   Zsklearn.cluster._kmeansr   r   Zsklearn.datasetsr   Zsklearn.exceptionsr   Zsklearn.metricsr   r   Zsklearn.metrics.clusterr   Zsklearn.metrics.pairwiser   Zsklearn.utils._testingr   r   r   Zsklearn.utils.extmathr   Zsklearn.utils.fixesr   r   markfilterwarningsZ
pytestmarkr7   r"   r!   rz   r0   r   r>   r   r1  r|   Zparametrizer   r   rG   rH   rV   rg   rj   rt   rx   r~   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rP   Zint64r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r  r  r  r  r  r!  r#  r$  r%  Zstr_r'  r/  r2  r6  r9  rE   rE   rE   rF   <module>   s  $	
   

 
  
  
 
) 
 


?
	





.

 
 #
   
 

$
	 
 

	 
 
+



	

	


 "



!