U
    KvfK<  ã                   @   sØ   d Z ddlZddlZddlZddlmZ ddl	m
Z
 dd„ Zd-dd„Zd.d
d„Zd/dd„Zd0dd„Zdd„ Zd1dd„Zdd„ Zdd„ Zdd„ Zd2dd„Zd d!„ Zd"d#„ Zd$d%„ ZG d&d'„ d'eƒZd3d(d)„Zd4d+d,„ZdS )5z
Utility functions models code
é    N)Ú_is_using_pandas)Ú
array_likec                 C   s.   t | tƒr| S t | tƒr"|  d¡S t| ƒS d S )NÚlatin1)Ú
isinstanceÚstrÚbytesÚdecode)Ús© r
   ú;/tmp/pip-unpacked-wheel-2v6byqio/statsmodels/tools/tools.pyÚasstr2   s
    


r   c                 C   s&   i }t | ƒD ]\}}|||| < q|S )zd
    Helper function to create a dictionary mapping a column number
    to the name in tmp_arr.
    )Ú	enumerate)Ztmp_arrÚoffsetZcol_mapÚiZcol_namer
   r
   r   Ú_make_dictnames   s    r   é   c                 C   s¦   t  | ¡} | jdkr$| dd…df } |dk	rˆt  |¡}|jdkrP|dd…df }t  t  | ¡ |¡ t  |¡ |¡ ¡}| | || fS t  | ¡ |¡ }| | S dS )a  
    Returns views on the arrays Y and X where missing observations are dropped.

    Y : array_like
    X : array_like, optional
    axis : int
        Axis along which to look for missing observations.  Default is 1, ie.,
        observations in rows.

    Returns
    -------
    Y : ndarray
        All Y where the
    X : ndarray

    Notes
    -----
    If either Y or X is 1d, it is reshaped to be 2d.
    r   N)ÚnpÚasarrayÚndimÚarrayÚlogical_andÚisnanÚany)ÚYÚXÚaxisZkeepidxr
   r
   r   Údrop_missing    s    



ÿr   Fc                 C   s   t dƒ‚dS )ax  
    Construct a dummy matrix from categorical variables

    .. deprecated:: 0.12

       Use pandas.get_dummies instead.

    Parameters
    ----------
    data : array_like
        An array, Series or DataFrame.  This can be either a 1d vector of
        the categorical variable or a 2d array with the column specifying
        the categorical variable specified by the col argument.
    col : {str, int, None}
        If data is a DataFrame col must in a column of data. If data is a
        Series, col must be either the name of the Series or None. For arrays,
        `col` can be an int that is the (zero-based) column index
        number.  `col` can only be None for a 1d array.  The default is None.
    dictnames : bool, optional
        If True, a dictionary mapping the column number to the categorical
        name is returned.  Used to have information about plain arrays.
    drop : bool
        Whether or not keep the categorical variable in the returned matrix.

    Returns
    -------
    dummy_matrix : array_like
        A matrix of dummy (indicator/binary) float variables for the
        categorical data.
    dictnames :  dict[int, str], optional
        Mapping between column numbers and categorical names.

    Notes
    -----
    This returns a dummy variable for *each* distinct variable.  If a
    a DaataFrame is provided, the names for the new variable is the
    old variable name - underscore - category name.  So if the a variable
    'vote' had answers as 'yes' or 'no' then the returned array would have to
    new variables-- 'vote_yes' and 'vote_no'.  There is currently
    no name checking.

    Examples
    --------
    >>> import numpy as np
    >>> import statsmodels.api as sm

    Univariate examples

    >>> import string
    >>> string_var = [string.ascii_lowercase[0:5],
    ...               string.ascii_lowercase[5:10],
    ...               string.ascii_lowercase[10:15],
    ...               string.ascii_lowercase[15:20],
    ...               string.ascii_lowercase[20:25]]
    >>> string_var *= 5
    >>> string_var = np.asarray(sorted(string_var))
    >>> design = sm.tools.categorical(string_var, drop=True)

    Or for a numerical categorical variable

    >>> instr = np.floor(np.arange(10,60, step=2)/10)
    >>> design = sm.tools.categorical(instr, drop=True)

    With a structured array

    >>> num = np.random.randn(25,2)
    >>> struct_ar = np.zeros((25,1),
    ...                      dtype=[('var1', 'f4'),('var2', 'f4'),
    ...                             ('instrument','f4'),('str_instr','a5')])
    >>> struct_ar['var1'] = num[:,0][:,None]
    >>> struct_ar['var2'] = num[:,1][:,None]
    >>> struct_ar['instrument'] = instr[:,None]
    >>> struct_ar['str_instr'] = string_var[:,None]
    >>> design = sm.tools.categorical(struct_ar, col='instrument', drop=True)

    Or

    >>> design2 = sm.tools.categorical(struct_ar, col='str_instr', drop=True)
    zcategorical has been removedN)ÚNotImplementedError)ÚdataÚcolZ	dictnamesZdropr
   r
   r   ÚcategoricalG   s    Pr    TÚskipc           	      C   s&  t | dƒr&ddlm} || d||dS t | ¡}|j}|dkrP|dd…df }n|jdkrbtdƒ‚tj|dd	dk}|tj|d
kdd	M }| 	¡ rð|dkrž|S |dkrð|dkr¸tdƒ‚n8t 
|jd ¡}d dd„ || D ƒ¡}td|› dƒ‚t |jd ¡|g}|r|n|ddd… }t |¡S )aq  
    Add a column of ones to an array.

    Parameters
    ----------
    data : array_like
        A column-ordered design matrix.
    prepend : bool
        If true, the constant is in the first column.  Else the constant is
        appended (last column).
    has_constant : str {'raise', 'add', 'skip'}
        Behavior if ``data`` already has a constant. The default will return
        data without adding another constant. If 'raise', will raise an
        error if any column has a constant value. Using 'add' will add a
        column of 1s if a constant column is present.

    Returns
    -------
    array_like
        The original values with a constant (column of ones) as the first or
        last column. Returned value type depends on input type.

    Notes
    -----
    When the input is a pandas Series or DataFrame, the added column's name
    is 'const'.
    Nr   )Ú	add_trendÚc)ZtrendÚprependÚhas_constantr   é   z)Only implemented for 2-dimensional arrays©r   ç        r!   Úraisezdata is constant.ú,c                 S   s   g | ]}t |ƒ‘qS r
   )r   )Ú.0r#   r
   r
   r   Ú
<listcomp>Í   s     z add_constant.<locals>.<listcomp>z
Column(s) z are constant.éÿÿÿÿ)r   Zstatsmodels.tsa.tsatoolsr"   r   r   r   Ú
ValueErrorZptpÚallr   ZarangeÚshapeÚjoinZonesZcolumn_stack)	r   r$   r%   r"   Úxr   Zis_nonzero_constÚcolumnsÚcolsr
   r
   r   Úadd_constant›   s.    



r5   c                 C   sŽ   t | ddd} t |ddd}| jdkr6| ddd…f n| } | jd |jd kr`td|jd  ƒ‚t | |g¡}tj |¡tj |¡krŠd	S d
S )a6  
    True if (Q, P) contrast `c` is estimable for (N, P) design `d`.

    From an Q x P contrast matrix `C` and an N x P design matrix `D`, checks if
    the contrast `C` is estimable by looking at the rank of ``vstack([C,D])``
    and verifying it is the same as the rank of `D`.

    Parameters
    ----------
    c : array_like
        A contrast matrix with shape (Q, P). If 1 dimensional assume shape is
        (1, P).
    d : array_like
        The design matrix, (N, P).

    Returns
    -------
    bool
        True if the contrast `c` is estimable on design `d`.

    Examples
    --------
    >>> d = np.array([[1, 1, 1, 0, 0, 0],
    ...               [0, 0, 0, 1, 1, 1],
    ...               [1, 1, 1, 1, 1, 1]]).T
    >>> isestimable([1, 0, 0], d)
    False
    >>> isestimable([1, -1, 0], d)
    True
    r#   r&   )ZmaxdimÚd©r   r   NzContrast should have %d columnsFT)r   r   r0   r.   r   ZvstackÚlinalgÚmatrix_rank)r#   r6   Únewr
   r
   r   ÚisestimableÕ   s    r;   çVçž¯Ò<c              	   C   sÆ   t  | ¡} |  ¡ } t j | d¡\}}}t  |¡}|jd }|jd }|t j |¡ }t	t
||ƒƒD ]*}	||	 |kr„d||	  ||	< qbd||	< qbt  t  |¡t  |dd…t jjf t  |¡¡¡}
|
|fS )z}
    Return the pinv of an array X as well as the singular values
    used in computation.

    Code adapted from numpy.
    Fr   r   ç      ð?r(   N)r   r   Ú	conjugater8   ÚsvdÚcopyr0   ÚmaximumÚreduceÚrangeÚminÚdotÚ	transposeÚmultiplyÚcoreZnewaxis)r2   ZrcondÚur	   ZvtZs_origÚmÚnÚcutoffr   Úresr
   r
   r   Úpinv_extendedÿ   s    




"ÿrN   c                 C   sj   t  | ¡} t j| t jd}t  | j¡}| }|| | j| dk@ ||< d| j|  |j|< t j|j|< |S )zì
    Reciprocal of an array with entries less than or equal to 0 set to 0.

    Parameters
    ----------
    x : array_like
        The input array.

    Returns
    -------
    ndarray
        The array with 0-filled reciprocals.
    ©Zdtyper   r=   ©r   r   Z
zeros_likeÚfloat64r   ZflatÚnan)r2   ÚoutÚnansÚposr
   r
   r   Úrecipr  s    
rV   c                 C   sj   t  | ¡} t j| t jd}t  | j¡}| }|| | j| dk@ ||< d| j|  |j|< t j|j|< |S )zà
    Reciprocal of an array with entries less than 0 set to 0.

    Parameters
    ----------
    x : array_like
        The input array.

    Returns
    -------
    ndarray
        The array with 0-filled reciprocals.
    rO   r   r=   rP   )r2   rS   rT   Znon_zeror
   r
   r   Úrecipr0/  s    
rW   c                    s:   t j ˆ d d¡}‡ fdd„t  |¡D ƒ}t  t  |¡¡S )zÖ
    Erase columns of zeros: can save some time in pseudoinverse.

    Parameters
    ----------
    matrix : ndarray
        The array to clean.

    Returns
    -------
    ndarray
        The cleaned array.
    r&   r   c                    s   g | ]}ˆ d d …|f ‘qS ©Nr
   )r+   r   ©Úmatrixr
   r   r,   V  s     zclean0.<locals>.<listcomp>)r   ÚaddrB   Zflatnonzeror   rF   )rZ   ZcolsumÚvalr
   rY   r   Úclean0G  s    r]   c                 C   s†   |dkrt j | ¡}t jj| dd\}}}t  |¡}|ddd… }g }t|ƒD ]}| |dd…|| f ¡ qNt  t  |¡¡ 	t j
¡S )aÖ  
    Return an array whose column span is the same as x.

    Parameters
    ----------
    x : ndarray
        The array to adjust, 2d.
    r : int, optional
        The rank of x. If not provided, determined by `np.linalg.matrix_rank`.

    Returns
    -------
    ndarray
        The array adjusted to have full rank.

    Notes
    -----
    If the rank of x is known it can be specified as r -- no check
    is made to ensure that this really is the rank of x.
    NF)Zfull_matricesr-   )r   r8   r9   r?   ZargsortrC   Úappendr   rF   ZastyperQ   )r2   ÚrÚvr6   rI   ÚorderÚvaluer   r
   r
   r   ÚfullrankZ  s    
rc   c                 C   s   t |ƒ}d||< |  |¡S )aW  
    Unsqueeze a collapsed array.

    Parameters
    ----------
    data : ndarray
        The data to unsqueeze.
    axis : int
        The axis to unsqueeze.
    oldshape : tuple[int]
        The original shape before the squeeze or reduce operation.

    Returns
    -------
    ndarray
        The unsqueezed array.

    Examples
    --------
    >>> from numpy import mean
    >>> from numpy.random import standard_normal
    >>> x = standard_normal((3,4,5))
    >>> m = mean(x, axis=1)
    >>> m.shape
    (3, 5)
    >>> m = unsqueeze(m, 1, x.shape)
    >>> m.shape
    (3, 1, 5)
    >>>
    r   )ÚlistZreshape)r   r   ZoldshapeZnewshaper
   r
   r   Ú	unsqueeze{  s    re   c                 C   sZ   t  t  | ¡|dk¡}t  | dkt  |¡¡}|| }t  t  | ¡t  |¡¡}t j||< |S )z«
    Returns np.dot(left_matrix, right_matrix) with the convention that
    nan * 0 = 0 and nan * x = nan if x != 0.

    Parameters
    ----------
    A, B : ndarray
    r   )r   rE   r   Z
nan_to_numrR   )ÚAÚBZshould_be_nan_1Zshould_be_nan_2Zshould_be_nanÚCr
   r
   r   Únan_dotŸ  s    

ri   c                 C   s   t | d| ƒS )zˆ
    Gets raw results back from wrapped results.

    Can be used in plotting functions or other post-estimation type
    routines.
    Z_results)Úgetattr)Úresultsr
   r
   r   Úmaybe_unwrap_results¶  s    rl   c                       s    e Zd ZdZ‡ fdd„Z‡  ZS )ÚBuncha  
    Returns a dict-like object with keys accessible via attribute lookup.

    Parameters
    ----------
    *args
        Arguments passed to dict constructor, tuples (key, value).
    **kwargs
        Keyword agument passed to dict constructor, key=value.
    c                    s   t t| ƒj||Ž | | _d S rX   )Úsuperrm   Ú__init__Ú__dict__)ÚselfÚargsÚkwargs©Ú	__class__r
   r   ro   Ë  s    zBunch.__init__)Ú__name__Ú
__module__Ú__qualname__Ú__doc__ro   Ú__classcell__r
   r
   rt   r   rm   À  s   
rm   c                 C   sˆ   | dkr| S t | dƒ}| jdkr8|r.| | jfS | dfS n| jdkrJtdƒ‚|rT| jnd}|rvt | ¡dd…df |fS t | ¡|fS dS )aå  

    Parameters
    ----------
    x : ndarray, Series, DataFrame or None
        Input to verify dimensions, and to transform as necesary
    ndarray : bool
        Flag indicating whether to always return a NumPy array. Setting False
        will return an pandas DataFrame when the input is a Series or a
        DataFrame.

    Returns
    -------
    out : ndarray, DataFrame or None
        array or DataFrame with 2 dimensiona.  One dimensional arrays are
        returned as nobs by 1. None is returned if x is None.
    names : list of str or None
        list containing variables names when the input is a pandas datatype.
        Returns None if the input is an ndarray.

    Notes
    -----
    Accepts None for simplicity
    Nr&   zx mst be 1 or 2-dimensional.)	r   r   r3   r.   Únamer   r   ÚpdZ	DataFrame)r2   ZndarrayZ	is_pandasr{   r
   r
   r   Ú
_ensure_2dÐ  s    


r}   Úqrc                 C   sÜ   t | ddd} |dkrh| dd…tj| dkddf } | t | d  d¡¡ } | j|  } tjj| |dd	S |d
krÈtjj	| dd\}t 
t |¡¡}|dkr¸|d | jd  t t¡j }t||k ¡ ƒS tjj| |dS dS )a(  
    Matrix rank calculation using QR or SVD

    Parameters
    ----------
    m : array_like
        A 2-d array-like object to test
    tol : float, optional
        The tolerance to use when testing the matrix rank. If not provided
        an appropriate value is selected.
    method : {"ip", "qr", "svd"}
        The method used. "ip" uses the inner-product of a normalized version
        of m and then computes the rank using NumPy's matrix_rank.
        "qr" uses a QR decomposition and is the default. "svd" defers to
        NumPy's matrix_rank.

    Returns
    -------
    int
        The rank of m.

    Notes
    -----
    When using a QR factorization, the rank is determined by the number of
    elements on the leading diagonal of the R matrix that are above tol
    in absolute value.
    rJ   r&   r7   ÚipNr   r'   T)ÚtolZ	hermitianr~   r_   )Úmoder   )r€   )r   r   r   ÚsqrtÚsumÚTr8   r9   Úscipyr~   ÚabsZdiagr0   ZfinfoÚfloatZepsÚint)rJ   r€   Úmethodr_   Zabs_diagr
   r
   r   r9   û  s    
r9   )r   )Nr   )NFF)Tr!   )r<   )N)F)Nr~   )ry   Znumpyr   Zpandasr|   Zscipy.linalgr…   Zstatsmodels.tools.datar   Zstatsmodels.tools.validationr   r   r   r   r    r5   r;   rN   rV   rW   r]   rc   re   ri   rl   Údictrm   r}   r9   r
   r
   r
   r   Ú<module>   s,   	

'
T
:*

!$

+