U
    KvfV                     @   s   d dl mZ d dlmZmZmZ d dlmZ d dlm	Z	 d dl
ZdddZddd	Zdd
dZdd Zdd ZdddZdddZi fddZG dd dZG dd deZdS )    )RegularizedResults)_calc_nodewise_row_calc_nodewise_weight_calc_approx_inv_cov)LikelihoodModelResults)OLSNc                 C   s   |dkrt d| jf |jS )a  estimates the regularized fitted parameters.

    Parameters
    ----------
    mod : statsmodels model class instance
        The model for the current partition.
    pnum : scalar
        Index of current partition
    partitions : scalar
        Total number of partitions
    fit_kwds : dict-like or None
        Keyword arguments to be given to fit_regularized

    Returns
    -------
    An array of the parameters for the regularized fit
    NzD_est_regularized_naive currently requires that fit_kwds not be None.)
ValueErrorfit_regularizedparamsmodpnum
partitionsfit_kwds r   K/tmp/pip-unpacked-wheel-2v6byqio/statsmodels/base/distributed_estimation.py_est_regularized_naiveK   s    r   c                 C   s   |dkrt d| jf |jS )a  estimates the unregularized fitted parameters.

    Parameters
    ----------
    mod : statsmodels model class instance
        The model for the current partition.
    pnum : scalar
        Index of current partition
    partitions : scalar
        Total number of partitions
    fit_kwds : dict-like or None
        Keyword arguments to be given to fit

    Returns
    -------
    An array of the parameters for the fit
    NzF_est_unregularized_naive currently requires that fit_kwds not be None.)r   fitr
   r   r   r   r   _est_unregularized_naivee   s    r   c                 C   sN   t | d }t | }t|}| D ]}||7 }q"|| }d|t||k < |S )a   joins the results from each run of _est_<type>_naive
    and returns the mean estimate of the coefficients

    Parameters
    ----------
    params_l : list
        A list of arrays of coefficients.
    threshold : scalar
        The threshold at which the coefficients will be cut.
    r   )lennpzerosabs)Zparams_l	thresholdpr   	params_mnr
   r   r   r   _join_naive   s    

r   c                 C   s*   | j t|f| }||d|  7 }|S )a  calculates the log-likelihood gradient for the debiasing

    Parameters
    ----------
    mod : statsmodels model class instance
        The model for the current partition.
    params : array_like
        The estimated coefficients for the current partition.
    alpha : scalar or array_like
        The penalty weight.  If a scalar, the same penalty weight
        applies to all variables in the model.  If a vector, it
        must have the same length as `params`, and contains a
        penalty weight for each coefficient.
    L1_wt : scalar
        The fraction of the penalty given to the L1 penalty term.
        Must be between 0 and 1 (inclusive).  If 0, the fit is
        a ridge fit, if 1 it is a lasso fit.
    score_kwds : dict-like or None
        Keyword arguments for the score function.

    Returns
    -------
    An array-like object of the same dimension as params

    Notes
    -----
    In general:

    gradient l_k(params)

    where k corresponds to the index of the partition

    For OLS:

    X^T(y - X^T params)
       )Zscorer   asarray)r   r
   alphaL1_wt
score_kwdsgradr   r   r   
_calc_grad   s    &r#   c                 C   s0   t | jt |f|}|dddf | j S )a  calculates the weighted design matrix necessary to generate
    the approximate inverse covariance matrix

    Parameters
    ----------
    mod : statsmodels model class instance
        The model for the current partition.
    params : array_like
        The estimated coefficients for the current partition.
    hess_kwds : dict-like or None
        Keyword arguments for the hessian function.

    Returns
    -------
    An array-like object, updated design matrix, same dimension
    as mod.exog
    N)r   sqrtZhessian_factorr   exog)r   r
   	hess_kwdsZrhessr   r   r   _calc_wdesign_mat   s    r'   c                 C   s  |dkri n|}|dkri n|}|dkr2t dn|d }d|krL|d }nd}| jj\}}	ttd|	 | }
| jf |j}t| ||||| }t	| ||}g }g }t
||
 t|d |
 |	D ]2}t|||}|| t||||}|| q||||fS )a  estimates the regularized fitted parameters, is the default
    estimation_method for class DistributedModel.

    Parameters
    ----------
    mod : statsmodels model class instance
        The model for the current partition.
    mnum : scalar
        Index of current partition.
    partitions : scalar
        Total number of partitions.
    fit_kwds : dict-like or None
        Keyword arguments to be given to fit_regularized
    score_kwds : dict-like or None
        Keyword arguments for the score function.
    hess_kwds : dict-like or None
        Keyword arguments for the Hessian function.

    Returns
    -------
    A tuple of parameters for regularized fit
        An array-like object of the fitted parameters, params
        An array-like object for the gradient
        A list of array like objects for nodewise_row
        A list of array like objects for nodewise_weight
    NzG_est_regularized_debiased currently requires that fit_kwds not be None.r   r    r   g      ?)r   r%   shapeintr   ceilr	   r
   r#   r'   rangeminr   appendr   )r   Zmnumr   r   r!   r&   r   r    Znobsr   Zp_partr
   r"   Zwexognodewise_row_lnodewise_weight_lidxZnodewise_rowZnodewise_weightr   r   r   _est_regularized_debiased   s.    

 
r1   c                 C   s   t | d d }t | }t|}t|}g }g }| D ]8}||d 7 }||d 7 }||d  ||d  q8t|}t|}|| }|d| 9 }t||}	||	| }
d|
t|
|k < |
S )a  joins the results from each run of _est_regularized_debiased
    and returns the debiased estimate of the coefficients

    Parameters
    ----------
    results_l : list
        A list of tuples each one containing the params, grad,
        nodewise_row and nodewise_weight values for each partition.
    threshold : scalar
        The threshold at which the coefficients will be cut.
    r   r         g      )r   r   r   extendarrayr   dotr   )	results_lr   r   r   r   Zgrad_mnr.   r/   rZapprox_inv_covZdebiased_paramsr   r   r   _join_debiased  s&    




r9   c           	      C   sF   | j  }|| | j||f|}| j||| jfd|i| j}|S )a  handles the model fitting for each machine. NOTE: this
    is primarily handled outside of DistributedModel because
    joblib cannot handle class methods.

    Parameters
    ----------
    self : DistributedModel class instance
        An instance of DistributedModel.
    pnum : scalar
        index of current partition.
    endog : array_like
        endogenous data for current partition.
    exog : array_like
        exogenous data for current partition.
    fit_kwds : dict-like
        Keywords needed for the model fitting.
    init_kwds_e : dict-like
        Additional init_kwds to add for each partition.

    Returns
    -------
    estimation_method result.  For the default,
    _est_regularized_debiased, a tuple.
    r   )	init_kwdscopyupdatemodel_classestimation_methodr   estimation_kwds)	selfr   endogr%   r   init_kwds_eZtemp_init_kwdsmodelresultsr   r   r   _helper_fit_partitionH  s    

rE   c                   @   s8   e Zd ZdZdddZdddZddd	Zdd
dZdS )DistributedModela  
    Distributed model class

    Parameters
    ----------
    partitions : scalar
        The number of partitions that the data will be split into.
    model_class : statsmodels model class
        The model class which will be used for estimation. If None
        this defaults to OLS.
    init_kwds : dict-like or None
        Keywords needed for initializing the model, in addition to
        endog and exog.
    init_kwds_generator : generator or None
        Additional keyword generator that produces model init_kwds
        that may vary based on data partition.  The current usecase
        is for WLS and GLS
    estimation_method : function or None
        The method that performs the estimation for each partition.
        If None this defaults to _est_regularized_debiased.
    estimation_kwds : dict-like or None
        Keywords to be passed to estimation_method.
    join_method : function or None
        The method used to recombine the results from each partition.
        If None this defaults to _join_debiased.
    join_kwds : dict-like or None
        Keywords to be passed to join_method.
    results_class : results class or None
        The class of results that should be returned.  If None this
        defaults to RegularizedResults.
    results_kwds : dict-like or None
        Keywords to be passed to results class.

    Attributes
    ----------
    partitions : scalar
        See Parameters.
    model_class : statsmodels model class
        See Parameters.
    init_kwds : dict-like
        See Parameters.
    init_kwds_generator : generator or None
        See Parameters.
    estimation_method : function
        See Parameters.
    estimation_kwds : dict-like
        See Parameters.
    join_method : function
        See Parameters.
    join_kwds : dict-like
        See Parameters.
    results_class : results class
        See Parameters.
    results_kwds : dict-like
        See Parameters.

    Notes
    -----

    Examples
    --------
    Nc
           
      C   s   || _ |d krt| _n|| _|d kr,i | _n|| _|d krBt| _n|| _|d krXi | _n|| _|d krnt| _n|| _|d kri | _	n|| _	|d krt
| _n|| _|	d kri | _n|	| _d S N)r   r   r=   r:   r1   r>   r?   r9   join_method	join_kwdsr   results_classresults_kwds)
r@   r   r=   r:   r>   r?   rH   rI   rJ   rK   r   r   r   __init__  s2    zDistributedModel.__init__
sequentialc           	      C   s   |dkri }|dkr$|  |||}n&|dkr>| ||||}ntd| | j|f| j}| jdgdgf| j}| j||f| jS )ae  Performs the distributed estimation using the corresponding
        DistributedModel

        Parameters
        ----------
        data_generator : generator
            A generator that produces a sequence of tuples where the first
            element in the tuple corresponds to an endog array and the
            element corresponds to an exog array.
        fit_kwds : dict-like or None
            Keywords needed for the model fitting.
        parallel_method : str
            type of distributed estimation to be used, currently
            "sequential", "joblib" and "dask" are supported.
        parallel_backend : None or joblib parallel_backend object
            used to allow support for more complicated backends,
            ex: dask.distributed
        init_kwds_generator : generator or None
            Additional keyword generator that produces model init_kwds
            that may vary based on data partition.  The current usecase
            is for WLS and GLS

        Returns
        -------
        join_method result.  For the default, _join_debiased, it returns a
        p length array.
        NrM   Zjoblibz.parallel_method: %s is currently not supportedr   )	fit_sequential
fit_joblibr   rH   rI   r=   r:   rJ   rK   )	r@   data_generatorr   Zparallel_methodparallel_backendinit_kwds_generatorr7   r
   Zres_modr   r   r   r     s"    zDistributedModel.fitc                 C   s   g }|dkr>t |D ]&\}\}}t| ||||}|| qn@t t||}	|	D ],\}\\}}}
t| |||||
}|| qP|S )a*  Sequentially performs the distributed estimation using
        the corresponding DistributedModel

        Parameters
        ----------
        data_generator : generator
            A generator that produces a sequence of tuples where the first
            element in the tuple corresponds to an endog array and the
            element corresponds to an exog array.
        fit_kwds : dict-like
            Keywords needed for the model fitting.
        init_kwds_generator : generator or None
            Additional keyword generator that produces model init_kwds
            that may vary based on data partition.  The current usecase
            is for WLS and GLS

        Returns
        -------
        join_method result.  For the default, _join_debiased, it returns a
        p length array.
        N)	enumeraterE   r-   zip)r@   rP   r   rR   r7   r   rA   r%   rD   tup_genrB   r   r   r   rN     s"    

 zDistributedModel.fit_sequentialc           
   	      s  ddl m} |tj\} }|dkrN|dkrN| fddt|D }n|dk	r|dkr|$ | fddt|D }W 5 Q R X n|dkr|dk	rtt||}	| fdd|	D }nL|dk	r|dk	rtt||}	|  | fdd|	D }W 5 Q R X |S )	a  Performs the distributed estimation in parallel using joblib

        Parameters
        ----------
        data_generator : generator
            A generator that produces a sequence of tuples where the first
            element in the tuple corresponds to an endog array and the
            element corresponds to an exog array.
        fit_kwds : dict-like
            Keywords needed for the model fitting.
        parallel_backend : None or joblib parallel_backend object
            used to allow support for more complicated backends,
            ex: dask.distributed
        init_kwds_generator : generator or None
            Additional keyword generator that produces model init_kwds
            that may vary based on data partition.  The current usecase
            is for WLS and GLS

        Returns
        -------
        join_method result.  For the default, _join_debiased, it returns a
        p length array.
        r   )parallel_funcNc                 3   s&   | ]\}\}} |||V  qd S rG   r   .0r   rA   r%   fr   r@   r   r   	<genexpr>c  s   
z.DistributedModel.fit_joblib.<locals>.<genexpr>c                 3   s&   | ]\}\}} |||V  qd S rG   r   rW   rY   r   r   r[   i  s   
c                 3   s,   | ]$\}\\}}} ||||V  qd S rG   r   rX   r   rA   r%   r:   rY   r   r   r[   o  s   c                 3   s,   | ]$\}\\}}} ||||V  qd S rG   r   r\   rY   r   r   r[   v  s   )Zstatsmodels.tools.parallelrV   rE   r   rS   rT   )
r@   rP   r   rQ   rR   rV   parZn_jobsr7   rU   r   rY   r   rO   D  s.    

zDistributedModel.fit_joblib)NNNNNNNN)NrM   NN)N)N)__name__
__module____qualname____doc__rL   r   rN   rO   r   r   r   r   rF   m  s$   ?            
/    
: 
0 rF   c                       s(   e Zd ZdZ fddZdd Z  ZS )DistributedResultsaT  
    Class to contain model results

    Parameters
    ----------
    model : class instance
        Class instance for model used for distributed data,
        this particular instance uses fake data and is really
        only to allow use of methods like predict.
    params : ndarray
        Parameter estimates from the fit model.
    c                    s   t t| || d S rG   )superrb   rL   )r@   rC   r
   	__class__r   r   rL     s    zDistributedResults.__init__c                 O   s   | j j| j|f||S )a  Calls self.model.predict for the provided exog.  See
        Results.predict.

        Parameters
        ----------
        exog : array_like NOT optional
            The values for which we want to predict, unlike standard
            predict this is NOT optional since the data in self.model
            is fake.
        *args :
            Some models can take additional arguments. See the
            predict method of the model for the details.
        **kwargs :
            Some models can take additional keywords arguments. See the
            predict method of the model for the details.

        Returns
        -------
            prediction : ndarray, pandas.Series or pandas.DataFrame
            See self.model.predict
        )rC   predictr
   )r@   r%   argskwargsr   r   r   rf     s    zDistributedResults.predict)r^   r_   r`   ra   rL   rf   __classcell__r   r   rd   r   rb   }  s   rb   )N)N)r   )NNN)r   )Zstatsmodels.base.elastic_netr   Z(statsmodels.stats.regularized_covariancer   r   r   Zstatsmodels.base.modelr   Z#statsmodels.regression.linear_modelr   Znumpyr   r   r   r   r#   r'   r1   r9   rE   rF   rb   r   r   r   r   <module>   s(   E


+    
A
.
%  