U
    Kvf-                     @   s   d Z ddlZddlmZmZ ddlZddlm	Z	m
Z
 ddlmZ ddlmZ ddlmZ ddlmZ G d	d
 d
ee	ZG dd dee
ZG dd deeZdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd  ZdS )!z>
Created on Wed May 23 12:53:27 2018

Author: Josef Perktold

    N)assert_allcloseassert_equal)PoissonLogit)GLMfamily)PenalizedMixin)VariableScreeningc                   @   s   e Zd ZdS )PoissonPenalizedN__name__
__module____qualname__ r   r   I/tmp/pip-unpacked-wheel-2v6byqio/statsmodels/base/tests/test_screening.pyr      s   r   c                   @   s   e Zd ZdS )LogitPenalizedNr   r   r   r   r   r      s   r   c                   @   s   e Zd ZdS )GLMPenalizedNr   r   r   r   r   r      s   r   c            	      C   s   t jd d\} }d}t j| |dt j| dd   d d }|d9 }||d	 |d	 }d|d d d	f< t |}d	d
dddg}dt d|d  ||< t |}|	|}t 
|}t j|}||||fS )N d              ?         ?   333333?r   r   ,      )nprandomseedrandmeanstdzerosarangesqrtdotexppoisson	nobsk_vars	k_nonzeroxbetaidx_nonzero_truelinpredmuyr   r   r   _get_poisson_data    s*    



r7   c                  C   sR  t jd t \} }}}t| }dd |D }d|d< tj|| |dgd}tj|d d |f |d}t| | }|j	|d	< t
| t ||d
 d}	t|	}
|d d dd f }|
j|dd}tt |j| dd |jD }d|d< |jj|d |j  t|jjd d tj|jj	|dd}|j|dd}t|d	 |d dd d S )Nr   c                 S   s   g | ]}d | qS zvar%4dr   .0iir   r   r   
<listcomp><   s     z*test_poisson_screening.<locals>.<listcomp>constr   trueindexcolumnsrA   oracler   
pen_weightr   
   maxiterc                 S   s   g | ]}d | qS r8   r   r9   r   r   r   r<   M   s     Zxname	convergedTfinalr@   nameouterhowh㈵>Zatol)r!   r"   r#   r7   lenpd	DataFramer   fitparamsr   onesr
   screen_exogr   sortidx_nonzeroresults_finalsummaryresults_penmle_retvalsSeriesjoinr   r6   r1   r3   r2   r.   xnames_true
parametersxframe_true
res_oraclemod_initialscreenerexog_candidates
res_screenxnamespsr   r   r   test_poisson_screening5   s0    

rm   c                     s  t jd d\t jd dt jdd   d d } | d9 } | | d | d } t t | f} dt dd  }t 	|}| 
|}t |}t j|}| d d dd f dd d d f  |  fd	d
}t }d|d d < t t |f}dD ]}t||d d d |f d d}	t|	}
d|
_|
| }ddddg}t|j| t jddgddgddgddggt jd}t|j| q$d S )Nr   )r   r   r   r   r   r   r   r   c                  3   s   d} d}t |D ]}d  tj|  dtjdd   d d }|d9 }|d k r|d d |d f |d d d	f< ||d
 |d
 }|V  qd S )Nr      g?r   r   r   r   r   rF   r   )ranger!   r"   r$   r%   r&   )r/   Z	n_batchesir1   commonr0   r.   Z	x_nonzeror   r   exog_iteratorp   s     z+test_screen_iterated.<locals>.exog_iteratorr   r   r   rD      Zvar0_10Zvar1_10Zvar2_10Zvar3_10rF      )Zdtype)r!   r"   r#   r$   r%   r&   Zcolumn_stackrX   r(   r)   r*   r+   r,   sumr   r
   	k_max_addZscreen_exog_iteratorr   Zexog_final_namesarrayZint64Zidx_nonzero_batches)r1   r2   r4   r5   r6   rs   dummyZ	exog_keepkrg   rh   rK   namesZidx_fullr   rq   r   test_screen_iterated[   sL    


&
"
r}   c                  C   sN  t  \} }}}t| }dd |D }d|d< tj|| |dgd}tj|d d |f |d}t| |t d }|j|d	< t| t	
|t d}	t|	}
|d d d
d f }|
j|dd}tt	|j| dd |jD }d|d< |jj|d |j  t|jjd d tj|jj|dd}|j|dd}t|d	 |d dd d S )Nc                 S   s   g | ]}d | qS r8   r   r9   r   r   r   r<      s     z-test_glmpoisson_screening.<locals>.<listcomp>r=   r   r>   r?   rB   r   rC   r   rF   rG   c                 S   s   g | ]}d | qS r8   r   r9   r   r   r   r<      s     rI   rJ   TrK   rL   rN   rO   rQ   rR   )r7   rS   rT   rU   r   r   r   rV   rW   r!   rX   r
   rY   r   rZ   r[   r\   r]   r^   r_   r`   ra   r   rb   r   r   r   test_glmpoisson_screening   s*    

r~   c            	      C   s   t jd d\} }d}t j| |dt j| dd   d d }|d9 }||d	 |d	 }d|d d d	f< t |}d	d
dddg}dt d|d d  ||< t |}|	|}ddt 
|   }t jt||k t}||||fS )Nr   )r   r   r   {Gz?r   r   r   r   r   r   r   r   r    r   )r!   r"   r#   r$   r%   r&   r'   r(   r)   r*   r+   rS   Zastypeintr-   r   r   r   _get_logit_data   s*    


r   c               	   C   s  t  \} }}}t| }t|d dd}dd |D }d|d< tj|| |dgd	}tj|d d |f |d
}t| | }	|	j|d< t| t	
||d d}
t|
f|}d|_|d d dd f }|j|dd}t	|jjdk}tt	|j| | t	dddddddg}tt	|j| dd |jD }d|d< |jj|d |j  t|jjd d tj|jj|dd}||d< t|d |d dd d S ) Ngffffff?MbP?)rE   threshold_trimc                 S   s   g | ]}d | qS r8   r   r9   r   r   r   r<      s     z(test_logit_screening.<locals>.<listcomp>r=   r   r>   r?   rB   rC   r   rD   ru   r   rG   皙?J   r      r   r   r    c                 S   s   g | ]}d | qS r8   r   r9   r   r   r   r<      s     rI   rJ   TrK   rL   {Gzt?rR   )r   rS   dictrT   rU   r   rV   rW   r   r!   rX   r
   rx   rY   absr\   r   rZ   r[   ry   r]   r^   r_   r`   r   )r6   r1   r3   r2   r.   screener_kwdsrc   rd   re   rf   rg   rh   ri   rj   maskZidx_rrk   rl   r   r   r   test_logit_screening   s8    

r   c                  C   s^  t  \} }}}t| }t|d ddd}dd |D }d|d< tj|| |d	gd
}tj|d d |f |d}t| |t d }	|	j	|d< t| t
|t d}
t|
f|}d|_|d d dd f }|j|dd}|j |j dd |jD }d|d< |jj|d |j  t|jjd d tj|jj	|dd}||d< t|d |d dd d S )N      ?r   model.score_factorrE   r   Zranking_attrc                 S   s   g | ]}d | qS r8   r   r9   r   r   r   r<      s     z+test_glmlogit_screening.<locals>.<listcomp>r=   r   r>   r?   rB   r   rC   rF   r   ru   rG   c                 S   s   g | ]}d | qS r8   r   r9   r   r   r   r<     s     rI   rJ   TrK   rL   r   rR   )r   rS   r   rT   rU   r   r   ZBinomialrV   rW   r!   rX   r
   rx   rY   r[   r\   r]   r^   r   r_   r`   r   )r6   r1   r3   r2   r.   r   rc   rd   re   rf   rg   rh   ri   rj   rk   rl   r   r   r   test_glmlogit_screening   s8    


r   c                  C   s   t jd d\} }d}t j| |dt j| dd   d d }|d9 }||d	 |d	 }d|d d d	f< t |}d	dd
ddg}dt d|d  ||< t |}|	|}|dt j
t|  }||||fS )Nr   r   r   r   r   r   r   r   r   r   r   r    r   r   )r!   r"   r#   r$   r%   r&   r'   r(   r)   r*   ZrandnrS   )r.   r/   r0   r1   r2   r3   r4   r6   r   r   r   _get_gaussian_data#  s(    


r   c                  C   s  t  \} }}}t| }| | d } t|d ddd}dd |D }d|d< tj|| |d	gd
}tj|d d |f |d}t| |t d	 }	|	j
|d< dD ]}
t| |d d d |
f t d}t|f|}|d d |
d f }|j|dd}tt|j| dd |jD }d|d< |jj|d |j  t|jjd d tj|jj
|dd}|j|dd}t|d |d dd |d= qd S )Nr   r   r   r   r   c                 S   s   g | ]}d | qS r8   r   r9   r   r   r   r<   C  s     z.test_glmgaussian_screening.<locals>.<listcomp>r=   r>   r?   rB   r   rC   rt   ru   rG   c                 S   s   g | ]}d | qS r8   r   r9   r   r   r   r<   T  s     rI   rJ   TrK   rL   rN   rO   gh㈵>rR   )r   rS   r%   r   rT   rU   r   r   ZGaussianrV   rW   r
   rY   r   r!   rZ   r[   r\   r]   r^   r_   r`   ra   r   )r6   r1   r3   r2   r.   r   rc   rd   re   rf   Zk_keeprg   rh   ri   rj   rk   rl   r   r   r   test_glmgaussian_screening8  s:    

"
r   )__doc__Znumpyr!   Znumpy.testingr   r   ZpandasrT   Z#statsmodels.discrete.discrete_modelr   r   Z+statsmodels.genmod.generalized_linear_modelr   Zstatsmodels.genmod.familiesr   Zstatsmodels.base._penalizedr	   Zstatsmodels.base._screeningr
   r   r   r   r7   rm   r}   r~   r   r   r   r   r   r   r   r   r   <module>   s(   &5#-.