U
    Kvf7                     @   s   d Z ddlZddlmZ ddlmZ ddlmZm	Z	 ddl
mZmZmZmZ ddd	gZG d
d	 d	ZG dd de	ZG dd de	ZdS )aY  
Multivariate Conditional and Unconditional Kernel Density Estimation
with Mixed Data Types

References
----------
[1] Racine, J., Li, Q. Nonparametric econometrics: theory and practice.
    Princeton University Press. (2007)
[2] Racine, Jeff. "Nonparametric Econometrics: A Primer," Foundation
    and Trends in Econometrics: Vol 3: No 1, pp1-88. (2008)
    http://dx.doi.org/10.1561/0800000009
[3] Racine, J., Li, Q. "Nonparametric Estimation of Distributions
    with Categorical and Continuous Data." Working Paper. (2000)
[4] Racine, J. Li, Q. "Kernel Estimation of Multivariate Conditional
    Distributions Annals of Economics and Finance 5, 211-235 (2004)
[5] Liu, R., Yang, L. "Kernel estimation of multivariate
    cumulative distribution function."
    Journal of Nonparametric Statistics (2008)
[6] Li, R., Ju, G. "Nonparametric Estimation of Multivariate CDF
    with Categorical and Continuous Data." Working Paper
[7] Li, Q., Racine, J. "Cross-validated local linear nonparametric
    regression" Statistica Sinica 14(2004), pp. 485-512
[8] Racine, J.: "Consistent Significance Testing for Nonparametric
        Regression" Journal of Business & Economics Statistics
[9] Racine, J., Hart, J., Li, Q., "Testing the Significance of
        Categorical Predictor Variables in Nonparametric Regression
        Models", 2006, Econometric Reviews 25, 523-544

    N)optimize)
mquantiles)KDEMultivariate	KernelReg)gpkeLeaveOneOut_get_type_pos_adjust_shapeSingleIndexModel
SemiLinear	TestFFormc                   @   s*   e Zd ZdZd
ddZdd Zdd Zd	S )r   a]  
    Nonparametric test for functional form.

    Parameters
    ----------
    endog : list
        Dependent variable (training set)
    exog : list of array_like objects
        The independent (right-hand-side) variables
    bw : array_like, str
        Bandwidths for exog or specify method for bandwidth selection
    fform : function
        The functional form ``y = g(b, x)`` to be tested. Takes as inputs
        the RHS variables `exog` and the coefficients ``b`` (betas)
        and returns a fitted ``y_hat``.
    var_type : str
        The type of the independent `exog` variables:

            - c: continuous
            - o: ordered
            - u: unordered

    estimator : function
        Must return the estimated coefficients b (betas). Takes as inputs
        ``(endog, exog)``.  E.g. least square estimator::

            lambda (x,y): np.dot(np.pinv(np.dot(x.T, x)), np.dot(x.T, y))

    References
    ----------
    See Racine, J.: "Consistent Significance Testing for Nonparametric
    Regression" Journal of Business & Economics Statistics.

    See chapter 12 in [1]  pp. 355-357.
    d   c                 C   sD   || _ || _|| _|| _|| _|| _t|||dj| _|  | _	d S )N)bwvar_type)
endogexogr   fform	estimatornbootr   r   _compute_sigsig)selfr   r   r   r   r   r   r    r   S/tmp/pip-unpacked-wheel-2v6byqio/statsmodels/sandbox/nonparametric/kernel_extras.py__init__P   s    zTestFForm.__init__c                 C   sd  | j }| j}| ||}| ||}t|d }|| }|t| }| || _t	d}d| d }d| d }	|| }
|	| }|	| }t
| jdf}t| jD ]j}| }tjjdd|fd}||k }|
| ||< || }| ||}| ||}|| }| |||< q|| _d}| jt|dkr4d}| jt|d	krJd
}| jt|dkr`d}|S )Nr   g      @   g       @sizezNot Significantg?*gffffff?z**gGz?z***)r   r   r   r   npshapemean_compute_test_statZ	test_statsqrtemptyr   rangecopyrandomuniformZboots_resultsr   )r   YXbmnZresidZsqrt5Zfct1Zfct2u1u2rZI_distjZu_bootZprobindZY_bootZb_hatZm_hatZ
u_boot_hatr   r   r   r   r   Z   sD    
zTestFForm._compute_sigc                 C   sF  t |d }t| j}t|d d d f  }d}d}t|D ]\}}t|}	t |	}	t| j	| | j|d d f  | j
dd}
|| |	 |
 }|	j|
jkst|| 7 }||d  7 }t |dkstt |dks@tq@|d||d   9 }t| j
d }| j	|  }|d| ||d   9 }|| t ||  }|S )Nr   F)datadata_predictr   Ztosum   r   g      ?)r   r    r   r   __iter__	enumeratenextsqueezer   r   r   AssertionErrorsumr   r   prodr#   )r   ur-   ZXLOOZuLOOZivalZS2iX_not_iZu_jKZf_iZix_conthpTr   r   r   r"      s0    

 zTestFForm._compute_test_statN)r   )__name__
__module____qualname____doc__r   r   r"   r   r   r   r   r   ,   s   #

&c                   @   s:   e Zd ZdZdd Zdd Zdd Zdd	d
Zdd ZdS )r
   a  
    Single index semiparametric model ``y = g(X * b) + e``.

    Parameters
    ----------
    endog : array_like
        The dependent variable
    exog : array_like
        The independent variable(s)
    var_type : str
        The type of variables in X:

            - c: continuous
            - o: ordered
            - u: unordered

    Attributes
    ----------
    b : array_like
        The linear coefficients b (betas)
    bw : array_like
        Bandwidths

    Methods
    -------
    fit(): Computes the fitted values ``E[Y|X] = g(X * b)``
           and the marginal effects ``dY/dX``.

    References
    ----------
    See chapter on semiparametric models in [1]

    Notes
    -----
    This model resembles the binary choice models. The user knows
    that X and b interact linearly, but ``g(X * b)`` is unknown.
    In the parametric binary choice models the user usually assumes
    some distribution of g() such as normal or logistic.
    c                 C   s~   || _ t|| _| j d | _ t|d| _t|| j| _t| jd | _| j | _	d| _
d| _d| _| j| _|  \| _| _d S )Nr   r   gaussian	wangryzinaitchisonaitken)r   lenr@   r	   r   r   r   r    nobs	data_typeckertypeokertypeukertype_est_loc_linearfunc	_est_b_bwr+   r   )r   r   r   r   r   r   r   r      s    
zSingleIndexModel.__init__c                 C   sV   t jj| jd fd}tj| j|dd}|d| j }|| jd  }| |}||fS )Nr   r   r   Zdisp)r   r'   r(   r@   r   fmincv_looZ_set_bw_boundsr   Zparams0Zb_bwr+   r   r   r   r   rR      s    
zSingleIndexModel._est_b_bwc                 C   s   t |}|d| j }|| jd  }t| j}t| j }d}t|D ]r\}}t|}	| j	||	t 
||d d d f  t 
| j||d d d f | dd }
|| j| |
 d 7 }qJ|| j S )Nr   r   r   r   r4   r5   )r   asarrayr@   r   r   r   r6   r7   r8   rQ   dotrK   )r   paramsr+   r   LOO_XLOO_YLr>   r?   r)   Gr   r   r   rU      s    

 "zSingleIndexModel.cv_looNc                 C   s   |d kr| j }nt|| j}t|d }t|f}t|| jf}t|D ]z}| j| j| j	t
| j | jd d d f t
|||d d d f | jd}|d ||< t|d }|||d d f< qN||fS )Nr   r   r4   )r   r	   r@   r   r    r$   r%   rQ   r   r   rY   r+   r9   )r   r4   N_data_predictr!   mfxr>   mean_mfxmfx_cr   r   r   fit   s     zSingleIndexModel.fitc                 C   sV   d}|dt | j d 7 }|dt | j d 7 }|d| j d 7 }|d7 }|d7 }|S ) Provide something sane to print.zSingle Index Model 
Number of variables: K = 
zNumber of samples:   nobs = Variable types:      BW selection method: cv_ls
Estimator type: local constant
strr@   rK   r   r   reprr   r   r   __repr__  s    zSingleIndexModel.__repr__)N	rC   rD   rE   rF   r   rR   rU   rd   ro   r   r   r   r   r
      s   '
c                   @   s:   e Zd ZdZdd Zdd Zdd Zdd	d
Zdd ZdS )r   a}  
    Semiparametric partially linear model, ``Y = Xb + g(Z) + e``.

    Parameters
    ----------
    endog : array_like
        The dependent variable
    exog : array_like
        The linear component in the regression
    exog_nonparametric : array_like
        The nonparametric component in the regression
    var_type : str
        The type of the variables in the nonparametric component;

            - c: continuous
            - o: ordered
            - u: unordered

    k_linear : int
        The number of variables that comprise the linear component.

    Attributes
    ----------
    bw : array_like
        Bandwidths for the nonparametric component exog_nonparametric
    b : array_like
        Coefficients in the linear component
    nobs : int
        The number of observations.
    k_linear : int
        The number of variables that comprise the linear component.

    Methods
    -------
    fit
        Returns the fitted mean and marginal effects dy/dz

    Notes
    -----
    This model uses only the local constant regression estimator

    References
    ----------
    See chapter on Semiparametric Models in [1]
    c                 C   s   t |d| _t ||| _t|| _t || j| _|| _t| jd | _	|| _
| j
| _d| _d| _d| _| j| _|  \| _| _d S )Nr   r   rG   rH   rI   )r	   r   r   rJ   r@   exog_nonparametrick_linearr   r    rK   r   rL   rM   rN   rO   rP   rQ   rR   r+   r   )r   r   r   rq   r   rr   r   r   r   r   ;  s    
zSemiLinear.__init__c                 C   sN   t jj| j| j fd}tj| j|dd}|d| j }|| jd }||fS )z
        Computes the (beta) coefficients and the bandwidths.

        Minimizes ``cv_loo`` with respect to ``b`` and ``bw``.
        r   r   rS   N)r   r'   r(   rr   r@   r   rT   rU   rV   r   r   r   rR   K  s
    zSemiLinear._est_b_bwc              	   C   s  t |}|d| j }|| jd }t| j}t| j }t| j }t | j|dddf }d}t	|D ]\}	}
t
|}t
|}t |
|dddf }|| }| j||| | j|	ddf  dd }||	ddf }|| j|	 | | d 7 }qr|S )a  
        Similar to the cross validation leave-one-out estimator.

        Modified to reflect the linear components.

        Parameters
        ----------
        params : array_like
            Vector consisting of the coefficients (b) and the bandwidths (bw).
            The first ``k_linear`` elements are the coefficients.

        Returns
        -------
        L : float
            The value of the objective function

        References
        ----------
        See p.254 in [1]
        r   NrW   r5   )r   rX   rr   r   r   r   r6   rq   rY   r7   r8   rQ   )r   rZ   r+   r   r[   r\   ZLOO_ZZXbr]   iir?   r)   ZZXb_jZYxr^   ltr   r   r   rU   X  s*    

zSemiLinear.cv_looNc           
   	   C   s   |dkr| j }nt|| j}|dkr,| j}nt|| j}t|d }t|f}t|| jf}| jt	|| j
dddf  }t|D ]P}| j| j|| j||ddf d}|d ||< t|d }	|	||ddf< q||fS )z+Computes fitted values and marginal effectsNr   r_   r   )r   r	   rr   rq   r@   r   r    r$   r   rY   r+   r%   rQ   r   r9   )
r   Zexog_predictZexog_nonparametric_predictr`   r!   ra   r)   r>   rb   rc   r   r   r   rd     s$     zSemiLinear.fitc                 C   sV   d}|dt | j d 7 }|dt | j d 7 }|d| j d 7 }|d7 }|d7 }|S )re   z'Semiparamatric Partially Linear Model 
rf   rg   zNumber of samples:   N = rh   ri   rj   rk   rm   r   r   r   ro     s    zSemiLinear.__repr__)NNrp   r   r   r   r   r     s   .)
)rF   Znumpyr   Zscipyr   Zscipy.stats.mstatsr   Zstatsmodels.nonparametric.apir   r   Z&statsmodels.nonparametric._kernel_baser   r   r   r	   __all__r   r
   r   r   r   r   r   <module>   s   
oq