U
    KvfI                     @   s   d Z ddlZddlm  mZ ddlm  mZ	 ddl
m  mZ ddlmZmZ ddlZddlZddlZG dd dejZG dd deZG dd	 d	eZG d
d dejZG dd deZG dd de	jZeee dS )zA
Conditional logistic, Poisson, and multinomial logit regression
    N)MultinomialResultsMultinomialResultsWrapperc                	       sP   e Zd Zd fdd	Zdd Zd fdd	ZdddZed fdd	Z  Z	S )_ConditionalModelnonec                    s  d|krt d|d }|j|jkr0d}t ||jd |jkrLd}t |tt| j||fd|i| | jjd k	rd}t || j}|jd | _	i }t
|D ]&\}}	|	|krg ||	< ||	 | qt|t| }}|d	}
g | _g | _g | _|
d k	rt|
}
g | _g | _g | _d| _ddg}| D ]\}	}|| j}t|dkr|d  d7  < |d  t|7  < q6|  jt|7  _| j| |
d k	r| j|
|  | jt| | j||d d f  | jt| q6|d dkrd
t| }t| |
d k	rZg | _t
| jD ]$\}}| jt| j| | q4t| j| _ g | _!g | _"t#| j D ]>}	| j!t| j|	 | j|	  | j"t| j|	  q|d S )Ngroups'groups' is a required argumentz4'endog' and 'groups' should have the same dimensionsr   zBThe leading dimension of 'exog' should equal the length of 'endog'missingzDConditional models should not have an intercept in the design matrix   offsetzIDropped %d groups and %d observations for having no within-group variance)$
ValueErrorsizeshapesuperr   __init__dataZ	const_idxexogk_params	enumerateappendnpZasarrayget
_endog_grp	_exog_grp
_groupsize_offset_grp_offset_sumynobsitemsZflatZstdlensumtuplewarningswarn_endofsdot	_n_groups_xy_n1range)selfendogr   r   kwargsr   msgZrow_ixigr
   Zdropsixykofs	__class__ K/tmp/pip-unpacked-wheel-2v6byqio/statsmodels/discrete/conditional_models.pyr      s    
 






 z_ConditionalModel.__init__c                 C   s&   ddl m} ||| j}t|}|S )Nr   )approx_fprime)Zstatsmodels.tools.numdiffr8   scorer   Z
atleast_2d)r*   paramsr8   Zhessr6   r6   r7   hessianb   s    
z_ConditionalModel.hessianNBFGSd   TFr6   c
                    s~   t t| j||||||	d}t| |j| d}||_| j|_| j|_	dt
| j dt| j dt| j g|_t|}|S )Nstart_paramsmethodmaxiterfull_outputdispskip_hessianr	   z%dz%.1f)r   r   fitConditionalResultsr:   Z
cov_paramsr@   r   r&   n_groupsminr   maxr   Zmean_group_statsConditionalResultsWrapper)r*   r?   r@   rA   rB   rC   fargscallbackretallrD   r,   rsltZcrsltr4   r6   r7   rE   i   s$    
z_ConditionalModel.fitelastic_net        c                 K   sN   ddl m} |dkrtdddddd}|| || f||||d	|S )
a  
        Return a regularized fit to a linear regression model.

        Parameters
        ----------
        method : {'elastic_net'}
            Only the `elastic_net` approach is currently implemented.
        alpha : scalar or array_like
            The penalty weight.  If a scalar, the same penalty weight
            applies to all variables in the model.  If a vector, it
            must have the same length as `params`, and contains a
            penalty weight for each coefficient.
        start_params : array_like
            Starting values for `params`.
        refit : bool
            If True, the model is refit using only the variables that
            have non-zero coefficients in the regularized fit.  The
            refitted model is not regularized.
        **kwargs
            Additional keyword argument that are used when fitting the model.

        Returns
        -------
        Results
            A results instance.
        r   )fit_elasticnetrP   z.method for fit_regularized must be elastic_net2   r	   g|=)rA   ZL1_wtZ	cnvrg_tolZzero_tol)r@   alphar?   refit)Zstatsmodels.base.elastic_netrR   r   update)r*   r@   rT   r?   rU   r,   rR   defaultsr6   r6   r7   fit_regularized   s    !
z!_ConditionalModel.fit_regularizedc           	         s   z|d }|d= W n t k
r.   tdY nX t|trB|| }d|ddkr\td tt| j	|f|||d|}|S )Nr   r   z0+  z2Conditional models should not include an intercept)r   r   )
KeyErrorr   
isinstancestrreplacer"   r#   r   r   from_formula)	clsZformular   ZsubsetZ	drop_colsargsr,   r   modelr4   r6   r7   r_      s(    	



 z_ConditionalModel.from_formula)r   )	Nr<   r=   TFr6   NFF)rP   rQ   NF)NN)
__name__
__module____qualname__r   r;   rE   rX   classmethodr_   __classcell__r6   r6   r4   r7   r      s*   P         !    
1  r   c                       sV   e Zd ZdZd fdd	Zdd Zdd Zdd
dZdddZdd Z	dd Z
  ZS )ConditionalLogita  
    Fit a conditional logistic regression model to grouped data.

    Every group is implicitly given an intercept, but the model is fit using
    a conditional likelihood in which the intercepts are not present.  Thus,
    intercept estimates are not given, but the other parameter estimates can
    be interpreted as being adjusted for any group-level confounders.

    Parameters
    ----------
    endog : array_like
        The response variable, must contain only 0 and 1.
    exog : array_like
        The array of covariates.  Do not include an intercept
        in this array.
    groups : array_like
        Codes defining the groups. This is a required keyword parameter.
    r   c                    sX   t t| j||fd|i| tt| jtjd krFd}t|| j	j
d | _d S )Nr   )r   r	   zendog must be coded as 0, 1r	   )r   rh   r   r   anyuniquer+   Zr_r   r   r   K)r*   r+   r   r   r,   r-   r4   r6   r7   r      s    
 zConditionalLogit.__init__c                 C   s,   d}t t| jD ]}|| ||7 }q|S Nr   )r)   r   r   loglike_grp)r*   r:   llr/   r6   r6   r7   loglike   s    zConditionalLogit.loglikec                 C   s(   d}t | jD ]}|| ||7 }q|S rl   )r)   r&   	score_grp)r*   r:   r9   r/   r6   r6   r7   r9      s    zConditionalLogit.scoreNc                    sR   |d krd}t t | j| ||  i  fdd| j| | j| S )Nr   c                    sx   | |k rdS |dkrdS z| |f W S  t k
r:   Y nX | d || d |d  | d    }|| |f< |S )Nr   r	   )r[   )tr2   vexbfmemor6   r7   ru     s    ,z"ConditionalLogit._denom.<locals>.f)r   expr%   r   r   r(   r*   grpr:   r3   r6   rs   r7   _denom  s    zConditionalLogit._denomc                    sZ   |d krd}j |  tt || i  fddj| j| S )Nr   c           
         s   | |k rdt jfS |dkr$dS z| |f W S  tk
rF   Y nX | d  }| d |\}}| d |d \}}||  | d d d f  }|||  || ||   }}	||	f| |f< ||	fS )Nr   )r	   r   r	   )r   zerosr   r[   )
rq   r2   habcedurr   exrt   rv   sr*   r6   r7   r   .  s    z'ConditionalLogit._denom_grad.<locals>.s)r   r   rw   r%   r   r(   rx   r6   r   r7   _denom_grad"  s    
zConditionalLogit._denom_gradc                 C   s\   d }t | dr| j| }t| j| |}|d k	r@|| j| 7 }|t| |||8 }|S )Nr
   )hasattrr   r   r%   r'   r$   logrz   )r*   ry   r:   r3   Zllgr6   r6   r7   rm   F  s    

zConditionalLogit.loglike_grpc                 C   s<   d}t | dr| j| }| |||\}}| j| ||  S )Nr   r
   )r   r   r   r'   )r*   ry   r:   r3   r   r|   r6   r6   r7   rp   U  s
    

zConditionalLogit.score_grp)r   )N)N)rc   rd   re   __doc__r   ro   r9   rz   r   rm   rp   rg   r6   r6   r4   r7   rh      s   

$rh   c                   @   s    e Zd ZdZdd Zdd ZdS )ConditionalPoissonaU  
    Fit a conditional Poisson regression model to grouped data.

    Every group is implicitly given an intercept, but the model is fit using
    a conditional likelihood in which the intercepts are not present.  Thus,
    intercept estimates are not given, but the other parameter estimates can
    be interpreted as being adjusted for any group-level confounders.

    Parameters
    ----------
    endog : array_like
        The response variable
    exog : array_like
        The covariates
    groups : array_like
        Codes defining the groups. This is a required keyword parameter.
    c           	      C   s   d }t | dr| j}d}tt| jD ]n}t| j| |}|d k	rP||| 7 }t|}| j| }|t||7 }|	 }|| j
| t| 8 }q&|S Nr
   rQ   )r   r   r)   r   r   r   r%   r   rw   r    r   r   )	r*   r:   r3   rn   r.   xbrt   r1   r   r6   r6   r7   ro   r  s    


zConditionalPoisson.loglikec           
      C   s   d }t | dr| j}d}tt| jD ]x}| j| }t||}|d k	rT||| 7 }t|}|	 }| j| }	|t|	|7 }|| j
| t|| | 8 }q&|S r   )r   r   r)   r   r   r   r   r%   rw   r    r   )
r*   r:   r3   r9   r.   xr   rt   r   r1   r6   r6   r7   r9     s    



 zConditionalPoisson.scoreN)rc   rd   re   r   ro   r9   r6   r6   r6   r7   r   _  s   r   c                       s&   e Zd Z fddZdddZ  ZS )rF   c                    s   t t| j||||d d S )N)normalized_cov_paramsscale)r   rF   r   )r*   rb   r:   r   r   r4   r6   r7   r     s    
zConditionalResults.__init__N皙?c           	      C   s   dddd| j gfddg}dd| jgfd	| jd
 gfd| jd gfd| jd gfg}|dkr^d}d
dlm} | }|j| |||||d |j| |||| jd |S )a<  
        Summarize the fitted model.

        Parameters
        ----------
        yname : str, optional
            Default is `y`
        xname : list[str], optional
            Names for the exogenous variables, default is "var_xx".
            Must match the number of parameters in the model
        title : str, optional
            Title for the top table. If not None, then this replaces the
            default title
        alpha : float
            Significance level for the confidence intervals

        Returns
        -------
        smry : Summary instance
            This holds the summary tables and text, which can be printed or
            converted to various output formats.

        See Also
        --------
        statsmodels.iolib.summary.Summary : class to hold summary
            results
        )zDep. Variable:N)zModel:N)zLog-Likelihood:NzMethod:)zDate:N)zTime:N)zNo. Observations:NzNo. groups:zMin group size:r   zMax group size:r	   zMean group size:   Nz*Conditional Logit Model Regression Results)Summary)ZgleftZgrightynamexnametitle)r   r   rT   use_t)r@   rG   rJ   Zstatsmodels.iolib.summaryr   Zadd_table_2colsZadd_table_paramsr   )	r*   r   r   r   rT   Ztop_leftZ	top_rightr   Zsmryr6   r6   r7   summary  sB    


    zConditionalResults.summary)NNNr   )rc   rd   re   r   r   rg   r6   r6   r4   r7   rF     s   rF   c                	       s<   e Zd ZdZd fdd	ZdddZdd Zdd Z  ZS )ConditionalMNLogita  
    Fit a conditional multinomial logit model to grouped data.

    Parameters
    ----------
    endog : array_like
        The dependent variable, must be integer-valued, coded
        0, 1, ..., c-1, where c is the number of response
        categories.
    exog : array_like
        The independent variables.
    groups : array_like
        Codes defining the groups. This is a required keyword parameter.

    Notes
    -----
    Equivalent to femlogit in Stata.

    References
    ----------
    Gary Chamberlain (1980).  Analysis of covariance with qualitative
    data. The Review of Economic Studies.  Vol. 47, No. 1, pp. 225-238.
    r   c                    s  t t| j||fd|i| | jt| _| j d | _| jd | jj	d  | _
| j| j
 | _dd t| jD | _| j| _| jj	d | _| j dk rd}t|tt t| jD ]\}} | | qt  | _| j   fdd| jD | _d S )	Nr   r	   c                 S   s   i | ]}|t |qS r6   )r]   ).0jr6   r6   r7   
<dictcomp>  s      z/ConditionalMNLogit.__init__.<locals>.<dictcomp>r   z%endog may not contain negative valuesc                    s   g | ]} | qS r6   r6   )r   r2   Zgrxr6   r7   
<listcomp>  s     z/ConditionalMNLogit.__init__.<locals>.<listcomp>)r   r   r   r+   ZastypeintrI   k_catr   r   Zdf_modelr   Zdf_residr)   Z_ynames_mapJrk   rH   r   collectionsdefaultdictlistr   r   r   keysZ_group_labelssort_grp_ix)r*   r+   r   r   r,   r-   r2   rr   r4   r   r7   r      s0    
 

zConditionalMNLogit.__init__Nr<   r=   TFr6   c
              	   K   s   |d kr0| j jd }| jd }tjj|| d}tjj| ||||||	d}|j	
| j jd df|_	t| |}|jtjd t|S )Nr	   )r   r>   )Zllnull)r   r   r   r   randomnormalbaseLikelihoodModelrE   r:   reshaper   Zset_null_optionsnanr   )r*   r?   r@   rA   rB   rC   rL   rM   rN   rD   r,   qr   rO   r6   r6   r7   rE     s"    
	
zConditionalMNLogit.fitc                 C   s   | j jd }| jd }|||f}tjt|df|fdd}t| j |}d}| jD ]~}||d d f }tj	|jd t
d}	| j| }
d}t|
D ]}|t||	|f  7 }q|||	|
f  t| 7 }qX|S )Nr	   ZaxisrQ   r   Zdtype)r   r   r   r   r   concatenater{   r%   r   aranger   r+   	itertoolspermutationsrw   r    r   )r*   r:   r   r   pmatlprrn   iir   jjr1   denompr6   r6   r7   ro   ?  s    


 zConditionalMNLogit.loglikec                 C   s  | j jd }| jd }|||f}tjt|df|fdd}t| j |}t||f}| jD ]}||d d f }tj	|jd t
d}	| j| }
d}t||f}t|
D ]n}t||	|f  }||7 }t|D ]B\}}|dkr|d d |d f  || j || d d f  7  < qqt|
D ]B\}}|dkr*|d d |d f  | j || d d f 7  < q*||| 8 }qb| S )Nr	   r   r   r   rQ   )r   r   r   r   r   r   r{   r%   r   r   r   r+   r   r   rw   r    r   flatten)r*   r:   r   r   r   r   Zgradr   r   r   r1   r   Zdenomgr   rr   r.   rr6   r6   r7   r9   U  s.    

6
2zConditionalMNLogit.score)r   )	Nr<   r=   TFr6   NFF)	rc   rd   re   r   r   rE   ro   r9   rg   r6   r6   r4   r7   r     s            
%r   c                   @   s   e Zd ZdS )rK   N)rc   rd   re   r6   r6   r6   r7   rK   v  s   rK   )r   Znumpyr   Zstatsmodels.base.modelr   rb   Z#statsmodels.regression.linear_modelZ
regressionZlinear_modelZlmZstatsmodels.base.wrapperwrapperwrapZ#statsmodels.discrete.discrete_modelr   r   r   r"   r   r   r   rh   r   ZLikelihoodModelResultsrF   r   ZRegressionResultsWrapperrK   Zpopulate_wrapperr6   r6   r6   r7   <module>   s$    F ?I 