U
    Kvf3                     @   s   d dl Zd dlZd dlZd dlmZ d dlmZ	 d dl
mZmZ d dlZzd dlmZ W n ek
rl   Y nX dZerd dlmZ edZndZdd Zd	d
 Zdd ZG dd dZG dd dZdd ZdS )    N)mice)assert_equalassert_allcloseF)PdfPagesztest_mice.pdfc                 C   s   t r| | d S N)
pdf_outputZsavefig)pdffig r
   J/tmp/pip-unpacked-wheel-2v6byqio/statsmodels/imputation/tests/test_mice.pyclose_or_save   s    r   c                   C   s   t rt  d S r   )r   r   closer
   r
   r
   r   teardown_module   s    r   c                  C   sl  t jd} d}d}| j||fd}|dddf |dddf  d|dddf   |ddd	f< |ddd	f  | j|d7  < d|dddf d	k |dddf< |d| j|d }t|}d
d td|d D |_||d< t j	|j
d	ddf< t j	|j
d	ddf< t j	|j
ddddf< t j	|j
ddddf< t j	|j
dddf< t j	|j
ddddf< |S )z0
    Create a data set with missing values.
    iÅ        sizeN         r   c                 S   s   g | ]}d | qS )x%dr
   ).0kr
   r
   r   
<listcomp>4   s     zgendat.<locals>.<listcomp>y;   x1'   x2
      x3   1      x4(   ,   x5   c   )nprandomRandomStatenormalsumpd	DataFramerangecolumnsnanloc)gennpZexogZendogdfr
   r
   r   gendat"   s$    @ $
r:   c                   @   s|   e Zd Zdd Zdd Zdd Zdd Zd	d
 Zdd Ze	j
jdd Ze	j
jdd Ze	j
jdd Ze	j
jdd ZdS )TestMICEDatac              
      s  t     }t }t  j\}}tjd t	
d tjd t	
dd tjd t	
d tjd t	
ddd	 tjd t	t	
dt	
d
dd	t	
ddf tfddjD  fdd D  tdD ]l}  tjjd | tjjd | t|| j|  tfddjD  fdd D  qd}tjd | tjdkst jk	std\}}}	}
}tt|d t|jddg t|	jddg d S )Nr   <   r   r   r&   r!   r   r)   r      c                    s   g | ]}t  j| qS r
   setdatar   colimp_datar
   r   r   V   s     z-TestMICEData.test_default.<locals>.<listcomp>c                    s   g | ]}t  |  qS r
   r?   ZdropnarA   r9   r
   r   r   W   s     r$   r   r   c                    s   g | ]}t  j| qS r
   r>   rA   rC   r
   r   r   ^   s     c                    s   g | ]}t  |  qS r
   rE   rA   rF   r
   r   r   _   s     zx1 ~ x2 + x3 + x4 + x5 + y)r(   r!   r%   r   r   r   )r(   r%   r!   r   r   r         )r:   copyr0   notnullr   MICEDatashaper   ix_missr+   arangeZix_obsZconcatenater   r@   r2   
update_allconditional_formulatuple_cycle_orderAssertionErrorZget_split_datalen)selforigmxnrowncolr   fmlZ	endog_obsZexog_obsZ	exog_missZpredict_obs_kwdsZpredict_miss_kwdsr
   )r9   rD   r   test_defaultE   sJ    




 zTestMICEData.test_defaultc              
   C   s   t  }tt||d< |jd|d< t|}t	ddN t
jdd6}t
d |  dd	 |D }t|d
ks~tW 5 Q R X W 5 Q R X dS )zTest that MICEData does not throw a SettingWithCopyWarning when imputing (https://github.com/statsmodels/statsmodels/issues/5430)intcolZint32zmode.chained_assignmentwarnT)recordalwaysc                 S   s   g | ]}d |j kr|qS )z\pandas\)filename)r   wr
   r
   r   r      s     
 z<TestMICEData.test_settingwithcopywarning.<locals>.<listcomp>r   N)r:   r+   rO   rU   r]   Zastyper   rL   r0   Zoption_contextwarningscatch_warningssimplefilterrP   rT   )rV   r9   ZmiceDatawsr
   r
   r   test_settingwithcopywarningt   s    

z(TestMICEData.test_settingwithcopywarningc                 C   sj   t  }t|}g }tdD ]4}| }t|tjs8tt	|j
|j
 || q|d |d ksftd S )Nr   r   r   )r:   r   rL   r2   next_sample
isinstancer0   r1   rT   r   rM   append)rV   r9   rD   Zall_xjxr
   r
   r   test_next_sample   s    
zTestMICEData.test_next_samplec           	      C   s   t  }| }t|}|j\}}dD ]`}tj||d}tdD ]D}|  t	|j
jd | t	|j
jd | t|| |j
|  q@q&t|jdkstd S )NZgaussianbootperturbation_methodr   r   r   rG   )r:   rJ   r0   rK   rM   r   rL   r2   rP   r   r@   r   rR   rS   rT   )	rV   r9   rW   rX   rY   rZ   Z	pert_methrD   r   r
   r
   r   test_pertmeth   s    

zTestMICEData.test_pertmethc              	      s`  t jd}d}|j|d}|j|d}|j|dt | }|j|d}t ||k ||}t ||kdd}t||||d}	t j	|	j
ddd	f< t j	|	j
ddd
f< t j	|	j
dddf< t j	|	j
dddf< ddlm}
 g   fdd}dD ]P}tj|	||d}|jd	d|
d
td
iddi|d | }t|tjstqtdd  D s\td S )Ni&"  ,  r   r   r   )timestatusr   r   r   r&   rt   ru   r)   2   r   r<   r   )PHRegc                    s     | jj d S r   )rj   r@   rM   )imphistr
   r   cb   s    z#TestMICEData.test_phreg.<locals>.cbrn   )rq   Zhistory_callbackz0 + x1 + x2Z	pred_typehr)model_class	init_kwdsZpredict_kwdsrq   c                 S   s   g | ]}|d kqS ))i+  r   r
   )r   valr
   r
   r   r      s     z+TestMICEData.test_phreg.<locals>.<listcomp>)r+   r,   r-   r.   Zexponentialexpwherer0   r1   r4   r5   Z&statsmodels.duration.hazard_regressionrw   r   rL   set_imputerZPatsyFormularh   ri   rT   all)rV   r6   r7   r   r   Z
event_timeZobs_timert   ru   r9   rw   r{   Zpmidatarl   r
   ry   r   
test_phreg   s4    
zTestMICEData.test_phregc                 C   s  ddl m} ddlm} t }| }t|}|j\}}t	
|}|dd |dd |jdtjd	tj id
 |  t|jjd | t|jjd | t|| |j|  tddD ]}	|	dkrtt|jd tjd tt|jd jtjjd tt|jd |d qtt|jd|	  tjd tt|jd|	  |d qd}
t|jd |
 d}
t|jd |
 t|jdkstd S )Nr   RegressionResultsWrapperGLMResultsWrapperr   zx3 + x4 + x3*x4r   zx4 + I(x5**2)r!   family)r}   r~   r   rI   r$   Tr   zx1 ~ x3 + x4 + x3*x4zx4 ~ x1 + x2 + x3 + x5 + yr%   rG   )#statsmodels.regression.linear_modelr   +statsmodels.genmod.generalized_linear_modelr   r:   rJ   r0   rK   rM   r   rL   r   smGLMfamiliesBinomialrP   r   r@   r   r2   ri   modelsr   resultsOLSrQ   rR   rS   rT   )rV   r   r   r9   rW   rX   rY   rZ   rD   rk   r[   r
   r
   r   test_set_imputer   s8    




zTestMICEData.test_set_imputerc                 C   sZ   t  }t|}dD ]@}dD ]6}dD ],}t  |j|||d}tt| |  q$qqd S )N)patternrawFT)	row_orderhide_complete_rowscolor_row_patterns)r:   r   rL   pltclfZplot_missing_patternr   r   )rV   close_figuresr9   rD   r   r   r   r	   r
   r
   r   test_plot_missing_pattern  s    

z&TestMICEData.test_plot_missing_patternc                 C   s`   t  }t|}|  t  dD ]6}|jdd|d}| d d t	t
| |  q$d S )Nr   r   r%   plot_pointsr   plot_bivariate)r:   r   rL   rP   r   r   r   get_axes	set_titler   r   rV   r   r9   rD   r   r	   r
   r
   r   test_plot_bivariate  s    

z TestMICEData.test_plot_bivariatec                 C   s^   t  }t|}|  t  dD ]4}|jd|d}| d d t	t
| |  q$d S )Nr   r%   r   r   Zplot_fit_scatterplot)r:   r   rL   rP   r   r   Zplot_fit_obsr   r   r   r   r   r
   r
   r   test_fit_obs!  s    

zTestMICEData.test_fit_obsc                 C   sZ   t  }t|}|  t  dD ]0}|d}| d d t	t
| |  q$d S )Nr   r%   r   plot_imputed_hist)r:   r   rL   rP   r   r   r   r   r   r   r   r   r
   r
   r   test_plot_imputed_hist0  s    


z#TestMICEData.test_plot_imputed_histN)__name__
__module____qualname__r\   rg   rm   rr   r   r   pytestmarkZ
matplotlibr   r   r   r   r
   r
   r
   r   r;   C   s   /$-


r;   c                   @   s<   e Zd Zdd Zdd Zdd Zdd Zejj	d	d
 Z
dS )TestMICEc                 C   sJ   t  }t|}tdtj|}|dd}t|jtj	s>t
| }d S )Ny ~ x1 + x2 + x1:x2r   r$   )r:   r   rL   MICEr   r   fit
issubclass	__class__ZMICEResultsrT   summary)rV   r9   rD   miresultZsmrr
   r
   r   	test_MICEA  s    
zTestMICE.test_MICEc                 C   sV   t  }t|}tdtj|}ddlm} tdD ]}|	 }t
|j|s4tq4d S )Nr   r   r   r$   )r:   r   rL   r   r   r   r   r   r2   rh   r   r   rT   )rV   r9   rD   r   r   rk   rl   r
   r
   r   
test_MICE1N  s    
zTestMICE.test_MICE1c                 C   s6   t  }tj|dd}|jdddddd |  d S )	Nro   rp   r   zx2 + yr   r   )alphaZL1_wt)Zfit_kwds)r:   r   rL   r   rP   )rV   r9   rx   r
   r
   r   test_MICE1_regularized[  s    zTestMICE.test_MICE1_regularizedc                 C   sv   ddl m} t }t|}tjdtj|dtj	 id}t
dD ].}| }t||s\tt|jtjj	sBtqBd S )Nr   r   zx3 ~ x1 + x2r   )r~   r$   )r   r   r:   r   rL   r   r   r   r   r   r2   rh   ri   rT   r   )rV   r   r9   rD   r   rk   rl   r
   r
   r   
test_MICE2c  s    
zTestMICE.test_MICE2c                 C   s   t jd}|jdd}|jdd}|| |jdd }t j|dd< t j|dd < t|||d}t|}tj	dt
j|d	d
}|dd	}t dddg}	t|j|	dd t dddg}
t|j|
dd t dddg}t|j|dd d S )N9  rs   r   r   d      )r   r   r   zy ~ x1 + x2r"   )Zn_skipr   glu?g7|N?gZ?gh㈵>)ZatolgmCM١g$?g ,Y?gk(~g)Kӏ.@g߽'(:+@)r+   r,   r-   r.   r4   r0   r1   r   rL   r   r   r   r   Zasarrayr   Zfrac_miss_infoparamstvalues)rV   r6   r   r   r   r9   r   r   r   Zfmir   r   r
   r
   r   t_est_combineq  s     
zTestMICE.t_est_combineN)r   r   r   r   r   r   r   r   r   Zslowr   r
   r
   r
   r   r   ?  s   r   c                  C   s   t jd} t| dd}ddddg|_t j|jd< t j|jd	d
gdf< t	
|}|  t|j j d t jg t jdt jd	gt jdt jd	d
gt jdt jg t jdd}|D ]}t|j| ||  qd S )Nr   rv   r   var1var2var3var4)r   r   r   r$   r   r   )Zdtype)r   r   r   r   )r+   r,   r-   r0   r1   Zrandr3   r4   Zilocr   rL   rP   r   r@   Zisnullvaluesr/   arrayZint64rN   )r6   r@   Zdata_imprN   r   r
   r
   r   test_micedata_miss1  s    
r   )Znumpyr+   Zpandasr0   r   Zstatsmodels.imputationr   Zstatsmodels.apiapir   Znumpy.testingr   r   rc   Zmatplotlib.pyplotZpyplotr   ImportErrorr   Zmatplotlib.backends.backend_pdfr   r   r   r   r:   r;   r   r   r
   r
   r
   r   <module>   s,   
! }J