U
    Ovf9/                     @   s  d Z ddlZddlmZmZ ddlZddlZddlm	Z	m
Z
 ddlmZmZmZ ddlmZ ddlmZ ddlmZ dd	lmZmZ dd
lmZ ddlmZmZmZmZ e Ze Z dd Z!dd Z"dd Z#dd Z$dd Z%dd Z&dd Z'ej()dddgdd Z*dd  Z+d!d" Z,d#d$ Z-d%d& Z.d'd( Z/ed)ef d*d+id,ej()d-d.d/gd0d1 Z0ed)ef d*d2id,ej()d-d.d/gd3d4 Z1d5d6 Z2d7d8 Z3d9d: Z4d;d< Z5dS )=zD
Testing for Isolation Forest algorithm (sklearn.ensemble.iforest).
    N)Mockpatch)
csc_matrix
csr_matrix)load_diabetes	load_irismake_classification)IsolationForest)_average_path_length)roc_auc_score)ParameterGridtrain_test_split)check_random_state)assert_allcloseassert_array_almost_equalassert_array_equalignore_warningsc              	   C   s   t ddgddgg}t ddgddgg}tdgdddgddgd	}t . |D ]"}tf d
| i||| qTW 5 Q R X dS )z6Check Isolation Forest for various parameter settings.r                  ?      ?TF)n_estimatorsmax_samples	bootstraprandom_stateN)nparrayr   r   r	   fitpredict)global_random_seedX_trainX_testgridparams r%   G/tmp/pip-unpacked-wheel-qu3nn_q2/sklearn/ensemble/tests/test_iforest.pytest_iforest"   s    r'   c                 C   s   t | }ttjdd |d\}}tddgddgd}ttfD ]p}||}||}|D ]V}tf d	| d
||}	|		|}
tf d	| d
||}|	|}t
|
| qXq@dS )z=Check IForest for various parameter settings on sparse input.N2   r   r   r   TF)r   r   
   )r   r   )r   r   diabetesdatar   r   r   r	   r   r   r   )r    rngr!   r"   r#   Zsparse_formatZX_train_sparseZX_test_sparser$   Zsparse_classifierZsparse_resultsZdense_classifierZdense_resultsr%   r%   r&   test_iforest_sparse2   s4     
 
r.   c               	   C   s   t j} d}tjt|d tdd|  W 5 Q R X t " t	dt tdd|  W 5 Q R X t ( t	dt tt
dd|  W 5 Q R X tt( t | | ddd	df  W 5 Q R X dS )
z7Test that it gives proper exception on deficient input.3max_samples will be set to n_samples for estimationmatch  r   errorautor   Nr   )irisr,   pytestwarnsUserWarningr	   r   warningscatch_warningssimplefilterr   Zint64Zraises
ValueErrorr   )Xwarn_msgr%   r%   r&   test_iforest_errorL   s    

 r@   c               	   C   sF   t j} t | }|jD ](}|jttt	| j
d kstqdS )zDCheck max_depth recalculation when max_samples is reset to n_samplesr   N)r6   r,   r	   r   estimators_Z	max_depthintr   ceillog2shapeAssertionError)r>   clfZestr%   r%   r&   test_recalculate_max_depthb   s    
rH   c               	   C   s   t j} t | }|j| jd ks&ttdd}d}tjt	|d ||  W 5 Q R X |j| jd kslttdd| }|jd| jd  kstd S )Nr   i  r3   r/   r0   g?)
r6   r,   r	   r   max_samples_rE   rF   r7   r8   r9   )r>   rG   r?   r%   r%   r&   test_max_samples_attributej   s    
rJ   c                 C   s   t | }ttj|d\}}td| d|}|jdd ||}|jdd ||}t|| td| d|}||}t|| dS )zCheck parallel regression.r)   r   )n_jobsr   r   )rK   r   N)	r   r   r+   r,   r	   r   
set_paramsr   r   )r    r-   r!   r"   Zensembley1y2Zy3r%   r%   r&    test_iforest_parallel_regressiony   s    



rO   c           	      C   s   t | }d|dd }|t|d |d f}|dd }|jdddd	}t|dd |f}td
gd dgd  }td|d|}|	| }t
||dkstdS )z#Test Isolation Forest performs wellg333333?iX  r   Nr2   r   )   r   )lowhighsizer   rQ   d   )r   r   g\(\?)r   randnZpermutationr   Zvstackuniformr   r	   r   decision_functionr   rF   )	r    r-   r>   r!   Z
X_outliersr"   y_testrG   Zy_predr%   r%   r&   test_iforest_performance   s    rZ   contamination      ?r5   c              	   C   s   ddgddgddgddgddgddgddgddgg}t || d	}|| || }||}t|dd  t|d d kstt|d
dg ddg   d S )NrP   r   r         	   r   r[      )	r	   r   rX   r   r   minmaxrF   r   )r[   r    r>   rG   Zdecision_funcpredr%   r%   r&   test_iforest_works   s    4

(rg   c                  C   s&   t j} t | }|j|jks"td S N)r6   r,   r	   r   rI   Z_max_samplesrF   )r>   rG   r%   r%   r&   test_max_samples_consistency   s    ri   c                  C   sV   t d} ttjd d tjd d | d\}}}}tdd}||| || d S )Nr   r(   r)   g?)Zmax_features)r   r   r+   r,   targetr	   r   r   )r-   r!   r"   Zy_trainrY   rG   r%   r%   r&    test_iforest_subsampled_features   s      
rk   c                  C   s   dt dt j  d } dt dt j  d }ttdgdg ttdgdg ttd	gd
g ttdg| g ttdg|g ttt dd	ddgdd
| |g tt d}t|t | d S )N       @g      @g?g     0@g}?r   g        r   r   r      i  )	r   logZeuler_gammar   r
   r   Zaranger   sort)Z
result_oneZ
result_twoZavg_path_lengthr%   r%   r&    test_iforest_average_path_length   s    
rp   c                  C   s   ddgddgddgg} t dd| }t  | }t|ddgg|ddgg|j  t|ddgg|ddgg|j  t|ddgg|ddgg d S )Nr   r   g?)r[   rl   )r	   r   r   Zscore_samplesrX   Zoffset_)r!   Zclf1Zclf2r%   r%   r&   test_score_samples   s     rq   c                  C   sv   t d} | dd}tdd| dd}|| |jd }|jdd || t|jdks`t|jd |ksrtdS )	z/Test iterative addition of iTrees to an iForestr      r   r*   T)r   r   r   Z
warm_start)r   N)r   rV   r	   r   rA   rL   lenrF   )r-   r>   rG   Ztree_1r%   r%   r&   test_iforest_warm_start   s       


rt   z*sklearn.ensemble._iforest.get_chunk_n_rowsZreturn_valuer   )Zside_effectzcontamination, n_predict_calls)r\   r   )r5   r   c                 C   s   t || | j|kstd S rh   rg   Z
call_countrF   Zmocked_get_chunkr[   Zn_predict_callsr    r%   r%   r&   test_iforest_chunks_works1  s    
rw   r*   c                 C   s   t || | j|kstd S rh   ru   rv   r%   r%   r&   test_iforest_chunks_works2  s    
rx   c                  C   s|  t d} t }||  t jd}t|| dks<tt||	dddksZtt|| d dksttt|| d dkstt 
|	dddd} t }||  t|| dkstt||	dddkstt|t ddkst|	dd} t }||  t|| dks:tt||	dddksZtt|t ddksxtdS )z=Test whether iforest predicts inliers when using uniform data)rU   r*   r   r   rU   r*   N)r   Zonesr	   r   randomRandomStateallr   rF   rV   repeat)r>   Ziforestr-   r%   r%   r&   test_iforest_with_uniform_data  s(    



 r}   c                  C   s2   t dddd\} }t| } tdddd|  d	S )
zdCheck that Isolation Forest does not segfault with n_jobs=2

    Non-regression test for #23252
    iL rU   r   )Z	n_samplesZ
n_featuresr   r*      r   )r   r   rK   N)r   r   r	   r   )r>   _r%   r%   r&   *test_iforest_with_n_jobs_does_not_segfault=  s    r   c               	   C   s^   t ddgddgg} t ddg}t }|| | d}tjt|d |j W 5 Q R X d S )Nr   r   r   r_   r   zoAttribute `base_estimator_` was deprecated in version 1.2 and will be removed in 1.4. Use `estimator_` instead.r0   )r   r   r	   r   r7   r8   FutureWarningZbase_estimator_)r>   ymodelr?   r%   r%   r&   'test_base_estimator_property_deprecatedH  s    r   c               	   C   sf   t d} tjd}| j|ddgd}tddd}t	  t
dt || W 5 Q R X d	S )
zCheck that feature names are preserved when contamination is not "auto".

    Feature names are required for consistency checks during scoring.

    Non-regression test for Issue #25844
    Zpandasr   r_   a)r,   columnsg?rb   r4   N)r7   Zimportorskipr   ry   rz   Z	DataFramerV   r	   r:   r;   r<   r9   r   )pdr-   r>   r   r%   r%   r&   #test_iforest_preserve_feature_namesV  s    

r   )6__doc__r:   Zunittest.mockr   r   Znumpyr   r7   Zscipy.sparser   r   Zsklearn.datasetsr   r   r   Zsklearn.ensembler	   Zsklearn.ensemble._iforestr
   Zsklearn.metricsr   Zsklearn.model_selectionr   r   Zsklearn.utilsr   Zsklearn.utils._testingr   r   r   r   r6   r+   r'   r.   r@   rH   rJ   rO   rZ   markZparametrizerg   ri   rk   rp   rq   rt   rw   rx   r}   r   r   r   r%   r%   r%   r&   <module>   sX   
"