U
    Kvf=f                     @   s  d Z ddlZddlZddlmZ ddlZddlm	Z	 ddl
Z
ddlmZ ddlmZmZmZmZmZmZ ddlmZmZmZmZ ddlmZmZmZ dd	lmZmZ dd
l m!Z! ddl"m#Z# ddl$m%Z% ddl"m&Z& e'dd Z(dd Z)dd Z*dd Z+dd Z,dd Z-dd Z.dd Z/dd Z0d d! Z1d"d# Z2d$d% Z3dMd'd(Z4d)d* Z5d+d, Z6d-d. Z7d/d0 Z8d1d2 Z9d3d4 Z:d5d6 Z;d7d8 Z<d9d: Z=d;d< Z>d=d> Z?d?d@ Z@dAdB ZAdCdD ZBdEdF ZCdGdH ZDe
jEjFdIdJ ZGdKdL ZHdS )Nz@
unit test for GAM

Author: Luca Puggini

Created on 08/07/2015
    N)assert_allclose)
block_diag)matrix_sqrt)UnivariatePolynomialSmootherPolynomialSmootherBSplinesGenericSmoothersUnivariateCubicSplinesCyclicCubicSplines)GLMGamLogitGammake_augmented_matrixpenalized_wls)MultivariateGAMCVMultivariateGAMCVPath_split_train_test_smoothers)UnivariateGamPenaltyMultivariateGamPenalty)KFold)GLM)Gaussian)lmc                 C   s   ddt |    S )N      ?)npexp)x r   B/tmp/pip-unpacked-wheel-2v6byqio/statsmodels/gam/tests/test_gam.py<lambda>        r   c                  C   sF   d} t dd| }d|d  | }|| 8 }dg}t||}||fS )a  A polynomial of degree 4

    poly = ax^4 + bx^3 + cx^2 + dx + e
    second der = 12ax^2 + 6bx + 2c
    integral from -1 to 1 of second der^2 is
        (288 a^2)/5 + 32 a c + 8 (3 b^2 + c^2)
    the gradient of the integral is der
        [576*a/5 + 32 * c, 48*b, 32*a + 16*c, 0, 0]

    Returns
    -------
    poly : smoother instance
    y : ndarray
        generated function values, demeaned
    '              )r   linspacemeanr   )nr   ydegreepolr   r   r   polynomial_sample_data#   s    
r,   c                 C   sL   | \}}}}d|d  d d| |  dd|d  |d    }|d }|S )Ni   r#             r$   r   )paramsdcbaitgr   r   r   integral>   s    4r6   c                 C   sT   | \}}}}t d| d d|  d| d| d|  dg}|d d d }|d S )	Ni@  r-   r.   0      r   r!   r#   r   array)r0   r1   r2   r3   r4   grdr   r   r   gradE   s    2r<   c              	   C   s:   t ddddgddddgddddgddddgg}|d S )Ng\@r   r.   r7   r8   r#   r9   )r0   hessr   r   r   hessianL   s    


r>   c           	      C   sF   t |j| }t }|j|}|||}t| }|||  ||fS N)r   dotbasisr   linkZinverselogliker6   )	r0   r+   r)   alphaZlin_predZgaussianZexpvalrC   r5   r   r   r   cost_functionU   s    rE   c            	      C   sf   t  \} }| jd }d}t||d}tdD ]4}tjddd}||}t|}t	||dd	 q,d
S )z>
    test the func method of the gam penalty
    :return:
    r   r"   rD   Zunivariate_smoother
   r#   r%   皙?atolN)
r,   	smoothersr   ranger   randomrandintfuncr6   r   )	r+   r)   univ_polrD   gp_r0   Zgp_scorer5   r   r   r   test_gam_penaltyc   s    


rT   c            	      C   s   t jd t \} }d}| jd }t||d}tdD ]H}t jddd}t ddddg}|	|}t
|}t||ddd	 q8d S )
Nr"   r   rF   rG   rH   r#   r%   {Gz?rtolrK   )r   rN   seedr,   rL   r   rM   uniformr:   derivr<   r   )	r+   r)   rD   smootherrR   rS   r0   Zgam_gradr;   r   r   r   test_gam_gradientt   s    


r\   c            	      C   s   t jd t \} }| jd }d}t||d}tdD ]J}t jddd}||}t	|}t 
|}t |}t||dd	d
 q8d S )Nr"   r   rF   rG   rH   r#   r-   vIh%<=MbP?rK   rW   )r   rN   rX   r,   rL   r   rM   rO   deriv2r>   ZflipudZfliplrr   )	r+   r)   rQ   rD   rR   rS   r0   Zgam_der2r=   r   r   r   test_gam_hessian   s    




ra   c            
      C   s   t jd t \} }d}tdD ]f}t jddd}t|| ||\}}}t|| |d}|j|ddd}	t	|| |dd t	|	|d	d q"d S )
Nr"   rG   r!   r%   r[   rD   )scaleZ
pen_weight绽|=rW   rI   )
r   rN   rX   r,   rM   rY   rE   r   rC   r   )
polyr)   rD   rS   r0   costerrr5   glm_gamZgam_logliker   r   r   test_approximation   s    
rj   c                  C   s   t jt jt} t j| dd}t|}|jj	}|j
j	}dg}dg}t|||dd}t|j}d}	t|||	d}
|
jd	d
ddd}t|j|j}t|j}t||dd t||dd d S )Nresultsprediction_from_mgcv.csvrG   r$   Tr*   dfZinclude_interceptrI   rb   bfgsr   r"   r    methodmax_start_irlsdispmaxiterrd   re   rU   rJ   )ospathdirnameabspath__file__joinpdread_csvr   valuesr)   r   r   Zasarrayy_estr   fitr@   rA   r0   fittedvaluesr   )cur_dir	file_pathdata_from_rr   r)   rn   r*   bsplinesy_mgcvrD   ri   res_glm_gamZy_gam0y_gamr   r   r   test_gam_glm   s&    
 r   c                  C   s   t jt jt} t j| dd}t|}|jj	}|j
j	}dg}dg}t|||dd}|j}d}	t|||	d}
|
jd	d
}t|j|j}t|}t|}t||ddd d S )Nrk   rl   rG   r-   Trm   gh㈵>rD   r    )rt   rd   rI   rV   )ru   rv   rw   rx   ry   rz   r{   r|   r   r}   Zybinr   Zybin_estr   r   r   r@   rA   r0   sigmoidr   )r   r   r   r   r)   rn   r*   r   r   rD   Zlg_gamZ
res_lg_gamr   r   r   r   test_gam_discrete   s     
	r   r"   c           
      C   s   d}t dd|}t dd|}t ||gj}t j|  || | | t jdd| }d}d	}||g}t||}	|||	fS )
N  r!   r"   irG   r   rU   r%   r$   )r   r&   vstackTrN   rX   normalr   )
rX   r(   x1x2r   r)   Zdegree1Zdegree2degreesr+   r   r   r   multivariate_sample_data   s     
r   c               	   C   s  ddg} ddg}t jd t \}}}t|d d df |jd d}t|d d df |jd d}t| d |d}t| d |d}tjt	dd t
|| |d}	W 5 Q R X td	D ]}
t jd
d|jd j}t jd
d|jd j}t ||g}||}||}|	|}t||| ddd ||}||}t ||g}|	|}t|| ||}||}t||}|	|}t|| qd S )Nr"   r#   r   )r*   rF   zweights is currently ignored)match)Zmultivariate_smootherrD   weightsrG   r$   rd   r_   )r   rN   rX   r   r   r   r   pytestZwarnsUserWarningr   rM   rO   rL   Z	dim_basisconcatenaterP   r   rZ   r`   r   )alphasr   r   r)   r+   Z	univ_pol1Z	univ_pol2Zgp1Zgp2ZmgpiZparams1Zparams2r0   Zc1c2r2   d1Zd2Zd12r1   h1h2Zh12hr   r   r   test_multivariate_penalty   s<    










r   c            
      C   sh   t  \} }}ddg}ddg}t|j|j}t|||d}| }t|||d}| }	t|j|	j d S )N皙?gffffff?r"   rb   )r   r   r   rL   r   r   r   r0   )
r   r)   rf   r   r   Zgsgam_gs
gam_gs_resZgam_polyZgam_poly_resr   r   r   test_generic_smoother  s    r   c                  C   s   t jt jt} t j| dd}t|}|jj	}|j
}dg}dg}t|||d}|j}dg}	t||	d}
t|tt|df||	d	}|jd
dddd}|j}t||dd d S )Nrk   rl   rG   r$   r*   rn   gZ%X?r   r"   exogr[   rD   pirlsr   r    rp   rU   rJ   )ru   rv   rw   rx   ry   rz   r{   r|   r   r}   r)   r   r~   r   r   r   oneslenr   r   r   )r   r   r   r   r)   rn   r*   r   r   rD   rR   ri   r   r   r   r   r   test_multivariate_gam_1d_data,  s(    
 r   c               	   C   s   dd } t jt jt}t j|dd}t|}|jj	}|j
j	}dg}dg}t|||d}dg}	d	g}	td
}
t||	d}t||	t| |d |
d}| }d S )Nc                 S   s   t j| | t|  S r?   r   ZlinalgZnormr   )r   r   r   r   r   rg   U  s    z&test_multivariate_gam_cv.<locals>.costrk   rl   rG   r-   r   gݵ|г?r#   r$   r   r[   r   gamrg   Zendogr   Zcv_iterator)ru   rv   rw   rx   ry   rz   r{   r|   r   r}   r)   r   r   r   r   r   r   )rg   r   r   r   r   r)   rn   r*   r   r   cvrR   gam_cv
gam_cv_resr   r   r   test_multivariate_gam_cvQ  s(    
   r   c               	   C   s.  dd } t jt jt}t j|dd}t|}|jj	}|j
j	}|j}|j}dg}dg}	t||	|dd}
t}td	d
dg}d}t|dd}tjd t|
||| |d |d}| }t||
|jd}|jdd	ddd}||
j}t|j|ddd tjd |j|dd\}}t||jdd d S )Nc                 S   s   t j| | t|  S r?   r   )y1y2r   r   r   sample_metricp  s    z4test_multivariate_gam_cv_path.<locals>.sample_metricrk   rl   rG      Trm   r   r#   r$   )k_foldsshuffle{   r   rb   Zirlsr"   r    rp   rI   r_   )r   r   g-q=re   )ru   rv   rw   rx   ry   rz   r{   r|   r   r}   r)   y_est_seZ
y_mgcv_gcvr   r   r   r&   r   rN   rX   r   r   alpha_cvpredictrA   r   Zselect_penweight_kfold)r   r   r   r   r   r)   se_from_mgcvr   rn   r*   r   r   r   kr   r   r   ri   r   r~   r   Zres_cvr   r   r   test_multivariate_gam_cv_patho  sB    
   
r   c            	      C   s   d} t j| dfd}td|d d df< tdd|d d df< t|ddgd}ttd}ttdd}t|j|||\}}d	d	d	d
ddgddddddgddddddgg}t|j| ddddddgddddddgdd d!d"d#d$gg}t|j| d S )%Nr   r#   shaper      r"   r$   )r           g      @g      B@g      k@r   g      @g     H@g     pu@g       @g      @g       @g      P@g      @g      @g      "@g      ;@g     @T@g     Ȇ@g      0@g      $@g      Y@g     @@g      @g      9@g     @_@g      &@g     @^@g     ̔@)	r   zerosrM   r   listr   r   r   rA   )	r(   r   rf   Ztrain_indexZ
test_indexZtrain_smootherZtest_smootherZexpected_train_basisZexpected_test_basisr   r   r   test_train_test_smoothers  s*    r   c                  C   sV   d} t jd t jdd| df}t |j|}t|}t |j|}t|| d S )Nr   r"   r   r$   )r   rN   rX   r   r@   r   r   r   )r(   r   r   Zsqrt_x2Zx2_reconstructionr   r   r   test_get_sqrt  s    r   c                  C   sR  t jd d} t jdd| df}t |j|}t tt| }t jdd| }|j	\}}d}t
|||| |\}}	}
|}t|	| |}||d |< t|| |}t|
| d}t
||||\}}	}
t|| }tt |j|||  t ||g}t|	| t j|| fd}||d |< t|| t j|dg| fdd}t|
| d S )Nr"     r!   r$   r   r   )Zaxis)r   rN   rX   rY   r@   r   r:   r   rM   r   r   r   r   r   r   r   )r(   r   sr)   wZnobsZ	n_columnsrD   Zaug_yZaug_xZaug_wZexpected_aug_xZexpected_aug_yZexpected_aug_wrsr   r   r   test_make_augmented_matrix  s6    





r   c                  C   s   t jd d} d}t jdd| df}|d d df |d d df  t jdd|  }|| 8 }t j| fd}t jdd||f}t||d| |}t||	 }t
|j|j d S )Nr"      r$   r   r#   rI   r   )r   rN   rX   r   r'   r   r   r   ZOLSr   r   r0   )r(   pr   r)   r   r   Zpen_wls_resZls_resr   r   r   test_penalized_wls  s    0r   c                  C   s>  t jt jt} t j| dd}t|}|ddg j}|d j}|dg j}|ddg j}d	d	g}t	||d
}ddg}	t
|||	d}
|
jdd}t|jd d |jd f |j|jd  }|| 8 }t|jd d |jd f |j|jd  }|| 8 }t||d d df dd t||d d df dd d S )Nrk   z"cubic_cyclic_splines_from_mgcv.csvZx0r   r)   r~   zs(x0)zs(x2)rG   rn   g?gMb0?rb   r   rq   r   r"   g{Gz?rJ   gQ?)ru   rv   rw   rx   ry   rz   r{   r|   r}   r
   r   r   r   r@   rA   maskr0   r'   r   )r   r   r   r   r)   Z
y_est_mgcvZs_mgcvdfsZccsrD   r   gam_ress0s1r   r   r   test_cyclic_cubic_splines  s0    


r   c                  C   s$  t jd ddlm}  d}t dd|}t dd|d }t ||gj}t || }|| }|| }|t j	dd| }||
 8 }||
 8 }d	d	g}	| |d
d
gdd}
t|t |df|
|	d}|jdd}|j}||
 8 }ttd|d }|| }|| }|| }t||dd d S )Nr   )CubicSplinesr   r   r$   r"   r#   g333333?r^   rG   center)rn   constraintsr   r   r   2   g{Gz?rJ   )r   rN   rX   statsmodels.gam.smooth_basisr   r&   r   r   sinr   r'   r   r   r   r   r   rM   r   )r   r(   r   r   r   r   r   y0r)   r   csr   r   r~   indexr   r   r   test_multivariate_cubic_splines(  s.    r   c                  C   sZ  t jd d} t dd| }t j| }t ||gj}t || }|| }|| }|t jdd|  }||	 8 }||	 8 }dgd }dgd }	t
|d	d	gddgd
d}
t||
|d}t||
|d}|jddddd}|j|jddddd}| }t |
j|j}||	 8 }t |
j|j}||	 8 }t|j|jddd t||dd d S )Nr   r   r   r$   g333333?g      @r#   g333333?rG   r   )rn   r*   r   rb   nmr"   i N  rp   ro   )Zstart_paramsrq   rr   rs   rt   g-C6
?r_   rJ   )r   rN   rX   r&   Zrandr   r   r   r   r'   r   r   r   r0   r@   rA   r   )r(   r   r   r   r   r   r   r)   r   Z
alphas_glmr   Z	gam_pirlsZgam_glmZgam_res_glmZgam_res_pirlsZ	y_est_glmZy_est_pirlsr   r   r   test_glm_pirls_compatibilityQ  sF    

   r   c            	      C   sX   t  \} }}ddg}t|||d}| }| }t||j }| }t|| d S )Nr   rb   )r   r   r   r   r   rA   r   )	r   r)   rf   r   r   r   Z	y_est_gamglmr~   r   r   r   test_zero_penalty~  s    r   c                  C   s   ddddddgddddddgddddddgddddddgddddddgddddddgg} t jd	 t jdd	d
}t ddddg}t|dd}||_| }t| |dd d S )Nr   r   gǺV?g-C6*?g
RgMbPg6df?gZN[?r"   rG   g?r   g333333?g?r%   r   g|=rJ   )	r   rN   rX   r   r:   r	   ZknotsZ
_splines_sr   )Zspl_s_Rr   Zxkr   Zspl_sr   r   r   
test_spl_s  s    r   c                  C   s   t jd d} t jdd| df}||  }|d d df |d d df  t jdd|  }|| 8 }d}t|dgd dgd d	d
gd}t|||d}|jddddd}t	||j
}t |jd j
t |jd j
f}|j|d
d}	|d\}
}t|	|
dd | dk std S )Nr   r   r"   r#   rU   r   r$   rG   TFrm   rb   r     rp   )Z	transformg?rJ   d   )r   rN   rX   rY   r'   r   r   r   r   r   rA   Zcolumn_stackrL   Z
zeros_liker   partial_valuesr   minAssertionError)r(   r   r)   rD   r   ri   r   r   exr~   Zy_partial_estser   r   r   test_partial_values2  s.    0 r   c                  C   s   t jt jt} t j| dd}t|}|jj	}|j
j	}|j}dg}dg}t|||dd}d}	t|||	d}
|
jd	d
d}|jd }|d\}}t||d ddd t|jd }t||| ddd d S )Nrk   rl   rG   r   Trm   gMozӻ?rb   r    ro   rt   rq   r   r~   gMb?rV   gZd;?)ru   rv   rw   rx   ry   rz   r{   r|   r   r}   r)   r   r   r   r   rL   r   r   r   sqrtrc   )r   r   r   r   r)   r   rn   r*   r   rD   ri   r   Zuniv_bsplineshat_yr   Zbug_factr   r   r   test_partial_values  s"    

r   c                  C   s   t jt jt} t j| dd}t|}|jj	}|j
j	}|j}dg}dg}t|||d}d}	t|||	d}
|
jdd	d
}|d}|jd  d  \}}t|}|d\}}t|||  t|||  d S )Nrk   rl   rG   r   r   gQ?rb   r    ro   r   r   )ru   rv   rw   rx   ry   rz   r{   r|   r   r}   r)   r   r   r   r   Zplot_partialZaxesZget_childrenget_datar   Zargsortr   r   )r   r   r   r   r)   r   rn   r*   r   rD   ri   r   ZfigZxpZypZsort_idxr   r   r   r   r   test_partial_plot  s$    


r   c            	      C   sB  t jd d} t jdd| df}||  }|d d df |d d df  t jdd|  }|| 8 }t|dgd dgd dd	}ddg}t|||d
}|jddddd}t	||j
}| }t| | dd d}t|||d
}|jddddd}t| | dd |jddddd}t| | ddd d S )Nr   r   r"   r#   rU   r$   rG   r   )r*   rn   r   rb   r   r   rp   g{Gzd?re   r]   rd   rJ   ro   g-C6?g:0yE>rV   )r   rN   rX   rY   r'   r   r   r   r   r   rA   r   Z
cov_params)	r(   r   r)   r   rD   ri   r   r   Zres_glmr   r   r   test_cov_params  sF    0    r   )r"   )I__doc__ru   Znumpyr   Znumpy.testingr   Zpandasr{   Zscipy.linalgr   r   Zstatsmodels.tools.linalgr   r   r   r   r   r   r	   r
   Z*statsmodels.gam.generalized_additive_modelr   r   r   r   Z9statsmodels.gam.gam_cross_validation.gam_cross_validationr   r   r   Zstatsmodels.gam.gam_penaltiesr   r   Z5statsmodels.gam.gam_cross_validation.cross_validatorsr   Z+statsmodels.genmod.generalized_linear_modelr   Z"statsmodels.genmod.families.familyr   r   Z	vectorizer   r,   r6   r<   r>   rE   rT   r\   ra   rj   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   markZ
matplotlibr   r   r   r   r   r   <module>   s^    	!$
%%5$,)-"
&