U
    KvfU  ã                   @   s†   d Z ddlZddlmZ ddd„Zdd„ Zdd	„ Zd
d„ Zdd„ Z	dd„ Z
dd„ Zdd„ Zdd„ Zdd„ Zdd„ Zdd„ Zdd„ ZdS )a°  
Module of kernels that are able to handle continuous as well as categorical
variables (both ordered and unordered).

This is a slight deviation from the current approach in
statsmodels.nonparametric.kernels where each kernel is a class object.

Having kernel functions rather than classes makes extension to a multivariate
kernel density estimation much easier.

NOTE: As it is, this module does not interact with the existing API
é    N)Úerfc                 C   s^   |  |j¡}|dkr&t t |¡j¡}t |j¡|  |d  }||k}|d|   | ||< |S )a!  
    The Aitchison-Aitken kernel, used for unordered discrete random variables.

    Parameters
    ----------
    h : 1-D ndarray, shape (K,)
        The bandwidths used to estimate the value of the kernel function.
    Xi : 2-D ndarray of ints, shape (nobs, K)
        The value of the training set.
    x : 1-D ndarray, shape (K,)
        The value at which the kernel density is being estimated.
    num_levels : bool, optional
        Gives the user the option to specify the number of levels for the
        random variable.  If False, the number of levels is calculated from
        the data.

    Returns
    -------
    kernel_value : ndarray, shape (nobs, K)
        The value of the kernel function at each training point for each var.

    Notes
    -----
    See p.18 of [2]_ for details.  The value of the kernel L if :math:`X_{i}=x`
    is :math:`1-\lambda`, otherwise it is :math:`\frac{\lambda}{c-1}`.
    Here :math:`c` is the number of levels plus one of the RV.

    References
    ----------
    .. [*] J. Aitchison and C.G.G. Aitken, "Multivariate binary discrimination
           by the kernel method", Biometrika, vol. 63, pp. 413-420, 1976.
    .. [*] Racine, Jeff. "Nonparametric Econometrics: A Primer," Foundation
           and Trends in Econometrics: Vol 3: No 1, pp1-88., 2008.
    Né   )ÚreshapeÚsizeÚnpZasarrayÚuniqueÚones)ÚhÚXiÚxÚ
num_levelsÚkernel_valueÚidx© r   úE/tmp/pip-unpacked-wheel-2v6byqio/statsmodels/nonparametric/kernels.pyÚaitchison_aitken   s    #r   c                 C   sH   |  |j¡}dd|   | t|| ƒ  }||k}|d|   | ||< |S )a•  
    The Wang-Ryzin kernel, used for ordered discrete random variables.

    Parameters
    ----------
    h : scalar or 1-D ndarray, shape (K,)
        The bandwidths used to estimate the value of the kernel function.
    Xi : ndarray of ints, shape (nobs, K)
        The value of the training set.
    x : scalar or 1-D ndarray of shape (K,)
        The value at which the kernel density is being estimated.

    Returns
    -------
    kernel_value : ndarray, shape (nobs, K)
        The value of the kernel function at each training point for each var.

    Notes
    -----
    See p. 19 in [1]_ for details.  The value of the kernel L if
    :math:`X_{i}=x` is :math:`1-\lambda`, otherwise it is
    :math:`\frac{1-\lambda}{2}\lambda^{|X_{i}-x|}`, where :math:`\lambda` is
    the bandwidth.

    References
    ----------
    .. [*] Racine, Jeff. "Nonparametric Econometrics: A Primer," Foundation
           and Trends in Econometrics: Vol 3: No 1, pp1-88., 2008.
           http://dx.doi.org/10.1561/0800000009
    .. [*] M.-C. Wang and J. van Ryzin, "A class of smooth estimators for
           discrete distributions", Biometrika, vol. 68, pp. 301-309, 1981.
    ç      à?r   )r   r   Úabs)r	   r
   r   r   r   r   r   r   Ú
wang_ryzinD   s
    !r   c                 C   s4   dt  dt j ¡ t  || d  | d d  ¡ S )a÷  
    Gaussian Kernel for continuous variables
    Parameters
    ----------
    h : 1-D ndarray, shape (K,)
        The bandwidths used to estimate the value of the kernel function.
    Xi : 1-D ndarray, shape (K,)
        The value of the training set.
    x : 1-D ndarray, shape (K,)
        The value at which the kernel density is being estimated.

    Returns
    -------
    kernel_value : ndarray, shape (nobs, K)
        The value of the kernel function at each training point for each var.
    ç      ð?é   g       @©r   ÚsqrtÚpiÚexp©r	   r
   r   r   r   r   Úgaussianl   s    r   c                 C   s8   || |  }d|t  |¡dk< ddt  |¡d  d  S )aö  
    Tricube Kernel for continuous variables
    Parameters
    ----------
    h : 1-D ndarray, shape (K,)
        The bandwidths used to estimate the value of the kernel function.
    Xi : 1-D ndarray, shape (K,)
        The value of the training set.
    x : 1-D ndarray, shape (K,)
        The value at which the kernel density is being estimated.

    Returns
    -------
    kernel_value : ndarray, shape (nobs, K)
        The value of the kernel function at each training point for each var.
    r   r   gÍ‹”§ë?é   )r   r   )r	   r
   r   Úur   r   r   Útricube€   s    r   c                 C   s4   dt  dt j ¡ t  || d  | d d  ¡ S )z, Calculates the Gaussian Convolution Kernel r   é   r   g      @r   r   r   r   r   Úgaussian_convolution–   s    r!   c                 C   s<   t  |j¡}t  |¡D ] }|t| ||ƒt| ||ƒ 7 }q|S ©N©r   Úzerosr   r   r   )r	   r
   ÚXjÚorderedr   r   r   r   Úwang_ryzin_convolution›   s    r'   c              	   C   sN   t  |¡}t  |j¡}|j}|D ](}|t| |||dt| |||d 7 }q |S ©N)r   )r   r   r$   r   r   )r	   r
   r%   ÚXi_valsr&   r   r   r   r   r   Úaitchison_aitken_convolution¦   s    
ÿr*   c                 C   s&   d|  dt || | t d¡  ƒ  S )Nr   r   r   )r   r   r   r   r   r   r   Úgaussian_cdf±   s    r+   c                 C   sN   t |ƒ}t |¡}t |j¡}|j}|D ] }||kr(|t| |||d7 }q(|S r(   )Úintr   r   r$   r   r   )r	   r
   Úx_ur)   r&   r   r   r   r   r   Úaitchison_aitken_cdfµ   s    
r.   c                 C   s8   t  |j¡}t  |¡D ]}||kr|t| ||ƒ7 }q|S r"   r#   )r	   r
   r-   r&   r   r   r   r   Úwang_ryzin_cdfÁ   s
    r/   c                 C   s    d||  t | ||ƒ | d  S )Nr   )r   r   r   r   r   Ú
d_gaussianÊ   s    r0   c                 C   s,   t  |j¡}||k}||  }|| ||< |S )zr
    A version for the Aitchison-Aitken kernel for nonparametric regression.

    Suggested by Li and Racine.
    )r   r   r   )r	   r
   r   r   ÚixZinDomr   r   r   Úaitchison_aitken_regÏ   s
    r2   c                 C   s   | t || ƒ S )zw
    A version for the Wang-Ryzin kernel for nonparametric regression.

    Suggested by Li and Racine in [1] ch.4
    )r   r   r   r   r   Úwang_ryzin_regÜ   s    r3   )N)Ú__doc__Znumpyr   Zscipy.specialr   r   r   r   r   r!   r'   r*   r+   r.   r/   r0   r2   r3   r   r   r   r   Ú<module>   s   
-(	