U
    Kvf^p  ã                	   @   sÊ  d Z ddlmZ ddlZdd„ Zdd„ Zdd	„ Zd@dd„ZdAdd„Z	dBdd„Z
dCdd„ZdDdd„Zdd„ ZdEdd„ZG dd„ dƒZdd„ ZG dd„ dƒZG d d!„ d!ƒZed"krÆeƒ Ze ¡  e ¡  e ¡  ddlmZ d#d$dgd% Zd&d'd(gZd)d*gZe
eeƒZeeƒ e  d+¡Z!e  d+¡d, Z"e  d+¡d- d- Z#d#ekrXee"ƒ\Z$Z%ee#ƒ\Z&Z'd$ekr‚ej(e"d.dd/Z"ej(e#d.dd/Z#e"j)d Z*ee"ƒ\Z$Z%ee#ƒ\Z&Z'ee$e&dd0Z+ee$e&d1d0Z,ee$e&d2d0Z-ee .e+j/e+¡e 0e+ 1d¡¡k 2¡ ƒ d3d4gd% Z3d5d6gd Z4e4ej5 6e*¡ Z7e3e  d%d7¡ Z8e+e8  1d%¡e7 Z9e,e8  1d%¡e7 Z:e-e8  1d%¡e7 Z;e <e9e+¡ =¡ Z>e <e9e-¡ =¡ Z?e>j@ZAe?j@ZBe <e;e+¡ =¡ ZCe <e;e-¡ =¡ ZDeCj@ZEeDj@ZFejGjHe-e+d8d9d ZIejGjHe+e-d8d9d ZJee .eJeF¡eE ƒ ee .eIeA¡eB ƒ ee+e-ƒZKee Le Me+eK Ne-¡ ¡¡ƒ ee Le Me-eK Oe+¡ ¡¡ƒ ee Le MeFeK PeE¡ ¡¡ƒ e Le MeBeK PeA¡ ¡¡ e
eeƒ\ZQZRZSZTZUZVed:ƒ ed;ƒ eeSƒ eeC WeR¡jXƒ ed<ƒ ed=ƒ eeUƒ eeC WeT¡jXƒ eC YeT¡ e Ze [d>¡e \d-¡f¡Z]e?j^d?d… Z_e> WeK Ne]¡¡jXZ`ee Le Me_e` ¡¡ƒ ee;e"e#ƒZaeea b¡ jXƒ eea b¡ jcƒ eea d¡ jeƒ eea d¡ jcƒ eea fd%¡d jXƒ eea fd-¡d jXƒ eea g¡ ƒ dS )Faò  functions to work with contrasts for multiple tests

contrast matrices for comparing all pairs, all levels to reference level, ...
extension to 2-way groups in progress

TwoWay: class for bringing two-way analysis together and try out
various helper functions


Idea for second part
- get all transformation matrices to move in between different full rank
  parameterizations
- standardize to one parameterization to get all interesting effects.

- multivariate normal distribution
  - exploit or expand what we have in LikelihoodResults, cov_params, f_test,
    t_test, example: resols_dropf_full.cov_params(C2)
  - connect to new multiple comparison for contrast matrices, based on
    multivariate normal or t distribution (Hothorn, Bretz, Westfall)

é    )Úassert_equalNc                 C   sT   g }t | ƒD ]<}t |d | ƒD ](}t | ¡}d||< d||< | |¡ qqt |¡S )zæcontrast or restriction matrix for all pairs of nm variables

    Parameters
    ----------
    nm : int

    Returns
    -------
    contr : ndarray, 2d, (nm*(nm-1)/2, nm)
       contrast matrix for all pairwise comparisons

    é   éÿÿÿÿ)ÚrangeÚnpÚzerosÚappendÚarray)ÚnmÚcontrÚiÚjZ	contr_row© r   úL/tmp/pip-unpacked-wheel-2v6byqio/statsmodels/sandbox/stats/contrast_tools.pyÚcontrast_allpairs   s    
r   c                 C   s(   t  t  | d ¡t  | d ¡ f¡}|S )zçcontrast or restriction matrix for all against first comparison

    Parameters
    ----------
    nm : int

    Returns
    -------
    contr : ndarray, 2d, (nm-1, nm)
       contrast matrix for all against first comparisons

    r   )r   Úcolumn_stackÚonesÚeye)r
   r   r   r   r   Úcontrast_all_one5   s    $r   c                 C   s   t  | ¡t  | | f¡|   S )zåcontrast or restriction matrix for all against mean comparison

    Parameters
    ----------
    nm : int

    Returns
    -------
    contr : ndarray, 2d, (nm-1, nm)
       contrast matrix for all against mean comparisons

    )r   r   r   )r
   r   r   r   Úcontrast_diff_meanE   s    r   Fc                 C   sF   | dkr:|s"t  | ¡dkrdS dS t  | ¡dkr4dS dS nt| ƒS d S )N)r   r   r   r   ú+ú-Ú )r   ÚsignÚstr)ÚxÚnoplusr   r   r   ÚsignstrT   s
    r   c                    s2   |rt d d dƒ‰nt d ƒ‰‡ ‡fdd„| D ƒ}|S )Nr   c                    s*   g | ]"}d   dd„ t|ˆ ƒˆ D ƒ¡‘qS )r   c                 S   s,   g | ]$\}}|d krdt |dd|f ‘qS ©r   z%s%sT)r   ©r   ©Ú.0ÚcÚvr   r   r   Ú
<listcomp>c   s    ÿz.contrast_labels.<locals>.<listcomp>.<listcomp>©ÚjoinÚzip©r!   Úrow©ÚnamesÚslr   r   r$   c   s   þ
ÿz#contrast_labels.<locals>.<listcomp>)Úslice)Z	contrastsr+   ÚreverseÚlabelsr   r*   r   Úcontrast_labels^   s    þr0   c                    s\  t | ƒ}t ˆ ƒ}‡ fdd„| D ƒ‰t d|f¡}d|d< |sRtj|t|ƒ f }ntj|t|ƒ f }t |dd… t |¡¡}	t|	ˆdd}
‡fdd„|	D ƒ}t d|f¡}d|d< |sÐtj|t|ƒ f }ntj|t|ƒ f }t t |¡|dd… ¡}‡fd	d„|D ƒ}|dk	rH|dk	rHt	|ƒ\}}t	|ƒ\}}t
||ƒ}nd}ˆ|	||||fS )
aˆ  build contrast matrices for products of two categorical variables

    this is an experimental script and should be converted to a class

    Parameters
    ----------
    names1, names2 : lists of strings
        contains the list of level labels for each categorical variable
    intgroup1, intgroup2 : ndarrays     TODO: this part not tested, finished yet
        categorical variable


    Notes
    -----
    This creates a full rank matrix. It does not do all pairwise comparisons,
    parameterization is using contrast_all_one to get differences with first
    level.

    ? does contrast_all_pairs work as a plugin to get all pairs ?

    c                    s"   g | ]}ˆ D ]}d ||f ‘qqS )z%s_%sr   )r!   r   r   )Únames2r   r   r$      s       z$contrast_product.<locals>.<listcomp>r   )r   r   NT)r.   c              	      s0   g | ](}d   dd„ t|ˆ ƒddd… D ƒ¡‘qS )r   c                 S   s,   g | ]$\}}|d krdt |dd|f ‘qS r   r   r    r   r   r   r$   ‹   s    ÿú/contrast_product.<locals>.<listcomp>.<listcomp>Nr   r%   r(   ©Ú
names_prodr   r   r$   ‹   s   þ
ÿc              	      s0   g | ](}d   dd„ t|ˆ ƒddd… D ƒ¡‘qS )r   c                 S   s,   g | ]$\}}|d krdt |dd|f ‘qS r   r   r    r   r   r   r$   ˜   s    ÿr2   Nr   r%   r(   r3   r   r   r$   ˜   s   þ
ÿ)Úlenr   r   Úr_r   r   Zkronr   r0   Údummy_1dÚdummy_product)Znames1r1   Z	intgroup1Z	intgroup2ÚpairsZn1Zn2Zee1ÚddZcontrast_prodZnames_contrast_prod0Znames_contrast_prodZee2Zdd2Zcontrast_prod2Znames_contrast_prod2Úd1Ú_Úd2Údummyr   )r1   r4   r   Úcontrast_producth   s@    
þ
þ  ÿr?   c                    sŒ   ˆ dkrNdd„ t |  ¡ d ƒD ƒ}| dd…df t |  ¡ d ¡k t¡|fS t | ¡}‡ fdd„|D ƒ}| dd…df |k t¡|fS dS )a  dummy variable for id integer groups

    Parameters
    ----------
    x : ndarray, 1d
        categorical variable, requires integers if varname is None
    varname : str
        name of the variable used in labels for category levels

    Returns
    -------
    dummy : ndarray, 2d
        array of dummy variables, one column for each level of the
        category (full set)
    labels : list[str]
        labels for the columns, i.e. levels of each category


    Notes
    -----
    use tools.categorical instead for more more options

    See Also
    --------
    statsmodels.tools.categorical

    Examples
    --------
    >>> x = np.array(['F', 'F', 'M', 'M', 'F', 'F', 'M', 'M', 'F', 'F', 'M', 'M'],
          dtype='|S1')
    >>> dummy_1d(x, varname='gender')
    (array([[1, 0],
           [1, 0],
           [0, 1],
           [0, 1],
           [1, 0],
           [1, 0],
           [0, 1],
           [0, 1],
           [1, 0],
           [1, 0],
           [0, 1],
           [0, 1]]), ['gender_F', 'gender_M'])

    Nc                 S   s   g | ]}d | ‘qS )zlevel_%dr   ©r!   r   r   r   r   r$   Ù   s     zdummy_1d.<locals>.<listcomp>r   c                    s   g | ]}ˆ d t |ƒ  ‘qS )z_%s)r   r@   ©Úvarnamer   r   r$   Ý   s     )r   Úmaxr   ÚarangeZastypeÚintÚunique)r   rB   r/   Zgrouplabelsr   rA   r   r7   ª   s    .,
r7   Úfullc                 C   s:  |dkrD| dd…dd…df |dd…ddd…f    | jd d¡}nò|dkr¸t| dd…dd…f |dd…dd…f ƒ}t t | jd t¡| dd…dd…f |dd…dd…f |f¡}n~|dkr.t| dd…dd…f |dd…dd…f ƒ}t t | jd t¡| dd…dd…f |dd…dd…f |f¡}ntdƒ‚|S )	aŸ  dummy variable from product of two dummy variables

    Parameters
    ----------
    d1, d2 : ndarray
        two dummy variables, assumes full set for methods 'drop-last'
        and 'drop-first'
    method : {'full', 'drop-last', 'drop-first'}
        'full' returns the full product, encoding of intersection of
        categories.
        The drop methods provide a difference dummy encoding:
        (constant, main effects, interaction effects). The first or last columns
        of the dummy variable (i.e. levels) are dropped to get full rank
        dummy matrix.

    Returns
    -------
    dummy : ndarray
        dummy variable for product, see method

    rG   Nr   r   ú	drop-lastú
drop-firstr   úmethod not recognized)ÚreshapeÚshaper8   r   r   r   rE   Ú
ValueError)r;   r=   Úmethodr:   Úd12rlÚd12rr   r   r   r8   á   s    <*B
*Br8   c           
      C   s¸   | j \}}t tj| dddk¡\}}t tj| dddk¡\}}t |¡}tjdg|f |k ¡ r€tj||d gf |k ¡ sˆtdƒ‚tjdg|d f }tj|d |gf }	||	fS )a¬  start and endpoints of groups in a sorted dummy variable array

    helper function for nested categories

    Examples
    --------
    >>> d1 = np.array([[1, 0, 0],
                       [1, 0, 0],
                       [1, 0, 0],
                       [1, 0, 0],
                       [0, 1, 0],
                       [0, 1, 0],
                       [0, 1, 0],
                       [0, 1, 0],
                       [0, 0, 1],
                       [0, 0, 1],
                       [0, 0, 1],
                       [0, 0, 1]])
    >>> dummy_limits(d1)
    (array([0, 4, 8]), array([ 4,  8, 12]))

    get group slices from an array

    >>> [np.arange(d1.shape[0])[b:e] for b,e in zip(*dummy_limits(d1))]
    [array([0, 1, 2, 3]), array([4, 5, 6, 7]), array([ 8,  9, 10, 11])]
    >>> [np.arange(d1.shape[0])[b:e] for b,e in zip(*dummy_limits(d1))]
    [array([0, 1, 2, 3]), array([4, 5, 6, 7]), array([ 8,  9, 10, 11])]
    r   ©Zaxisr   r   zdummy variable is not sorted)rL   r   ZnonzeroZdiffrD   r6   ÚallrM   )
ÚdÚnobsÚnvarsÚstart1Zcol1Úend1Zcol1_ÚccÚstartÚendr   r   r   Údummy_limits  s    

ÿr[   c                 C   sL  |dkr|S t | ƒ\}}t |ƒ\}}t ||¡}t ||¡}||k}	| |	  }
| |	  }|dkrÊt| dd…dd…f |dd…dd…f ƒ}t t | jd t¡| dd…dd…f |dd…|f f¡}nx|dkr:t| dd…dd…f |dd…dd…f ƒ}t t | jd t¡| dd…dd…f |dd…|
f f¡}ntdƒ‚||
|fS )	aÙ  unfinished and incomplete mainly copy past dummy_product
    dummy variable from product of two dummy variables

    Parameters
    ----------
    d1, d2 : ndarray
        two dummy variables, d2 is assumed to be nested in d1
        Assumes full set for methods 'drop-last' and 'drop-first'.
    method : {'full', 'drop-last', 'drop-first'}
        'full' returns the full product, which in this case is d2.
        The drop methods provide an effects encoding:
        (constant, main effects, subgroup effects). The first or last columns
        of the dummy variable (i.e. levels) are dropped to get full rank
        encoding.

    Returns
    -------
    dummy : ndarray
        dummy variable for product, see method

    rG   rH   Nr   r   rI   r   rJ   )	r[   r   Zin1dr8   r   r   rL   rE   rM   )r;   r=   rN   rV   rW   Zstart2Zend2ÚfirstÚlastÚequalZ	col_dropfZ	col_droplrO   r:   rP   r   r   r   Údummy_nested2  s"    *<
*<r_   c                   @   s8   e Zd ZdZdd„ Zdd„ Zdd„ Zdd	„ Zd
d„ ZdS )ÚDummyTransforma  Conversion between full rank dummy encodings


    y = X b + u
    b = C a
    a = C^{-1} b

    y = X C a + u

    define Z = X C, then

    y = Z a + u

    contrasts:

    R_b b = r

    R_a a = R_b C a = r

    where R_a = R_b C

    Here C is the transform matrix, with dot_left and dot_right as the main
    methods, and the same for the inverse transform matrix, C^{-1}

    Note:
     - The class was mainly written to keep left and right straight.
     - No checking is done.
     - not sure yet if method names make sense


    c                 C   s4   t jj||ddd | _t jj||ddd | _dS )z\C such that d1 C = d2, with d1 = X, d2 = Z

        should be (x, z) in arguments ?
        r   ©Zrcondr   N)r   ÚlinalgÚlstsqÚtransf_matrixÚinvtransf_matrix)Úselfr;   r=   r   r   r   Ú__init__‚  s    zDummyTransform.__init__c                 C   s   t  | j|¡S )z b = C a
        ©r   Údotrd   )rf   Úar   r   r   Údot_leftŠ  s    zDummyTransform.dot_leftc                 C   s   t  || j¡S )z z = x C
        rh   )rf   r   r   r   r   Ú	dot_right  s    zDummyTransform.dot_rightc                 C   s   t  | j|¡S )z a = C^{-1} b
        ©r   ri   re   )rf   Úbr   r   r   Úinv_dot_left”  s    zDummyTransform.inv_dot_leftc                 C   s   t  || j¡S )z x = z C^{-1}
        rm   )rf   Úzr   r   r   Úinv_dot_right™  s    zDummyTransform.inv_dot_rightN)	Ú__name__Ú
__module__Ú__qualname__Ú__doc__rg   rk   rl   ro   rq   r   r   r   r   r`   a  s    r`   c                 C   sZ   t  | ¡} | jd }tdƒgdg|d   tdƒg }| d ||   d¡d | d¡ S )an  groupmeans using dummy variables

    Parameters
    ----------
    x : array_like, ndim
        data array, tested for 1,2 and 3 dimensions
    d : ndarray, 1d
        dummy variable, needs to have the same length
        as x in axis 0.

    Returns
    -------
    groupmeans : ndarray, ndim-1
        means for each group along axis 0, the levels
        of the groups are the last axis

    Notes
    -----
    This will be memory intensive if there are many levels
    in the categorical variable, i.e. many columns in the
    dummy variable. In this case it is recommended to use
    a more efficient version.

    r   Né   ).Nr   ç      ð?)r   ZasarrayÚndimr-   Úsum)r   rS   rU   Zslir   r   r   Úgroupmean_d¢  s    

"rz   c                   @   sB   e Zd ZdZddd„Zdd„ Zdd„ Zd	d
„ Zdd„ Zdd„ Z	dS )ÚTwoWayaý  a wrapper class for two way anova type of analysis with OLS


    currently mainly to bring things together

    Notes
    -----
    unclear: adding multiple test might assume block design or orthogonality

    This estimates the full dummy version with OLS.
    The drop first dummy representation can be recovered through the
    transform method.

    TODO: add more methods, tests, pairwise, multiple, marginal effects
    try out what can be added for userfriendly access.

    missing: ANOVA table

    Nc                 C   s<  |j d | _|d krd}d}n|\}}t||ƒ \| _| _\}}t||ƒ \| _| _\}	}
|j d  | _}|	j d  | _}t	||
ƒ}|\}}}}}}|\| _
| _| _| _| _}t||	dd}t||	dd}t||ƒ| _|j d | _|| _t ||¡ ¡ | _| jj| _| j | j¡| _d|d  |d  | _| j| j | _d S )Nr   rj   rn   r   rG   ©rN   rI   )rL   rT   r7   r;   Ú	d1_labelsr=   Ú	d2_labelsÚnlevel1Únlevel2r?   Z
prod_labelÚC1ÚC1_labelÚC2ÚC2_labelr8   r`   Ú	transformrU   ZexogÚsmÚOLSÚfitÚresolsÚparamsro   Zparams_dropfZstart_interactionÚn_interaction)rf   ZendogZfactor1Zfactor2ÚvarnamesZvname1Zvname2r;   r}   r=   r~   r   r€   ÚresÚprodlabr   ÚC1labrƒ   ÚC2labr<   Zdp_fullZdp_dropfr   r   r   rg   Ù  s,    

zTwoWay.__init__c                 C   s@   | j }t t || j| f¡t |¡f¡}| j |¡}|| _|S )z7contrast/restriction matrix for no interaction
        )	r‹   r   Úhstackr   rU   r   r…   rq   ÚR_nointer_transf)rf   ÚniaÚ	R_nointerr’   r   r   r   Ú	r_nointerû  s
    $zTwoWay.r_nointerc                 C   sL   | j }t t || j| f¡t |¡f¡}| j |¡}|| _| j	 
|¡}|S ©z1ttests for no-interaction terms are zero
        )r‹   r   r‘   r   rU   r   r…   rq   r’   r‰   Út_test)rf   r“   r”   r’   Zt_resr   r   r   Úttest_interaction  s    $zTwoWay.ttest_interactionc                 C   s   |   ¡ }| j |¡S r–   )r•   r‰   Zf_test)rf   r’   r   r   r   Úftest_interaction  s    zTwoWay.ftest_interactionc                 C   s4   |dkr| j  | j¡| jfS | j  | j¡| jfS d S )Nr   )r‰   r—   r   r‚   rƒ   r„   )rf   Z	factorindr   r   r   Úttest_conditional_effect  s    zTwoWay.ttest_conditional_effectc                 C   sR   ddl m} | j | j| j¡}| j}| j}d}tdg| j d}||||||dS )Nr   )ÚSimpleTablez!Estimated Coefficients by factorsz%#10.4g)Z	data_fmts)ÚtitleZtxt_fmt)	Zstatsmodels.iolibr›   rŠ   rK   r   r€   r}   r~   Údict)rf   r›   Z
params_arrZstubsÚheadersrœ   Z	table_fmtr   r   r   Úsummary_coeff  s    
ÿ
ÿzTwoWay.summary_coeff)N)
rr   rs   rt   ru   rg   r•   r˜   r™   rš   rŸ   r   r   r   r   r{   Å  s   
"
r{   c                   @   s,   e Zd Zdd„ Zdd„ Zdd„ Zdd„ Zd	S )
ÚTestContrastToolsc                 C   s†   dddg| _ ddg| _t dddgdddgdddgdddgdddgdddgdddgdddgdddgdddgdddgdddgg¡| _d S )NÚa0Úa1Úa2Úb0Úb1r   r   )Úv1nameÚv2namer   r	   r;   )rf   r   r   r   rg   .  s    
õzTestContrastTools.__init__c                 C   sª   t jddddddddddddgdd}t  ddgddgddgddgddgddgddgddgddgddgddgddgg¡ddg }}t|d	d
\}}t||ƒ t||ƒ d S )NÚFÚMz|S1)Zdtyper   r   Zgender_FZgender_MZgenderrA   )r   r	   r7   r   )rf   r   rS   r/   Zres_dZ
res_labelsr   r   r   Útest_dummy_1d>  s*    ÿ
õõ
zTestContrastTools.test_dummy_1dc                 C   sø   t | j| jƒ}dgd }ddddddg|d< t d	d
dd
d
d
gd
d	d
dd
d
gd	d
d
d
dd
gd
d	d
d
d
dgg¡|d< ddddg|d< t d	dd
d
d
d
gd
d
d	dd
d
gd
d
d
d
d	dgg¡|d< dddg|d< tdƒD ]"}tjj|| || t|ƒd qÐd S )Nr   é   Za0_b0Za0_b1Za1_b0Za1_b1Za2_b0Za2_b1g      ð¿g        rw   r   za1_b0-a0_b0za1_b1-a0_b1za2_b0-a0_b0za2_b1-a0_b1rv   é   za0_b1-a0_b0za1_b1-a1_b0za2_b1-a2_b0é   é   )Úerr_msg)	r?   r¦   r§   r   r	   r   Ztestingr   r   )rf   Úres_cpZres_tÚiir   r   r   Útest_contrast_productQ  s     
ý
þ
z'TestContrastTools.test_contrast_productc                 C   s>   t | jƒ\}}t|t dddg¡ƒ t|t dddg¡ƒ d S )Nr   r­   é   é   )r[   r;   r   r   r	   )rf   rn   Úer   r   r   Útest_dummy_limitsa  s    z#TestContrastTools.test_dummy_limitsN)rr   rs   rt   rg   rª   r²   r¶   r   r   r   r   r    ,  s   r    Ú__main__ÚsmallZlarger   r¡   r¢   r£   r¤   r¥   r´   r­   rv   r®   rQ   r|   rH   rI   rw   g{®Gáz„?gü©ñÒMbP?gš™™™™™¹?é   r   ra   z"
tvalues for no effect of factor 1z/each test is conditional on a level of factor 2z"
tvalues for no effect of factor 2z/each test is conditional on a level of factor 1)rv   r­   éþÿÿÿ)F)F)NNF)N)rG   )rG   )hru   Znumpy.testingr   Znumpyr   r   r   r   r   r0   r?   r7   r8   r[   r_   r`   rz   r{   r    rr   Úttr²   rª   r¶   Zstatsmodels.apiÚapir†   Zexamplesr¦   r§   r°   ÚprintrD   ÚyÚx1Zx2r;   r}   r=   r~   ÚrepeatrL   rT   Zdd_fullZdd_droplZdd_dropfri   ÚTZdiagry   rR   Zeffect_sizeZnoise_scaleÚrandomZrandnZnoiseÚbetaZ
ydata_fullZydata_droplZydata_dropfr‡   rˆ   Zresols_full_fullZresols_full_dropfrŠ   Z
params_f_fZparams_f_dfZresols_dropf_fullZresols_dropf_dropfZparams_df_fZparams_df_dfrb   rc   Ztr_ofZtr_foZtransf_f_dfrC   Úabsrq   rl   ro   rŽ   r   r   rƒ   r   r<   r—   ZtvalueZ
cov_paramsr‘   r   r   ZR_nointZtvaluesZinter_directZinter_transfZtwr˜   Zpvaluer™   Zfvaluerš   rŸ   r   r   r   r   Ú<module>   s¼   




B
7
%,
/A#g=






&
ÿ

ÿ
d