U
    QvfHD  ã                   @   sP   d dl Zd dlmZ dgZddd„Zddd	„Zdd
d„Zddd„Zddd„Z	dS )é    N)ÚSeriesÚ	multicompçš™™™™™©?Úfdr_bhc              	   C   s.  |  ¡ dkst‚t | ¡} | j}|  ¡ } t | ¡ ¡ }t | ¡}| | }| ¡ }| j	| }t 
d|d ¡t|ƒ }	|  ¡ dkr t dt 
d|d ¡ ¡}
|	|
 }	|d|… |	 }tj |ddd… ¡ddd… }t |dd¡}t |t |tj¡¡}||  |¡}tjdd t ||¡}W 5 Q R X ||fS )	u	  P-values FDR correction with Benjamini/Hochberg and
    Benjamini/Yekutieli procedure.

    This covers Benjamini/Hochberg for independent or positively correlated and
    Benjamini/Yekutieli for general or negatively correlated tests.

    Parameters
    ----------
    pvals : array_like
        Array of p-values of the individual tests.
    alpha : float
        Error rate (= alpha level).
    method : str
        FDR correction methods. Can be 'fdr_bh' or 'fdr_by'.

    Returns
    -------
    reject : array, bool
        True if a hypothesis is rejected, False if not
    pval_corrected : array
        P-values adjusted for multiple hypothesis testing using the BH or BY
        correction.

    See also
    --------
    bonf : Bonferroni correction
    holm : Holm-Bonferroni correction

    Notes
    -----
    From Wikipedia:

    The **Benjaminiâ€“Hochberg** procedure (BH step-up procedure) controls the
    false discovery rate (FDR) at level :math:`\alpha`. It works as follows:

    1. For a given :math:`\alpha`, find the largest :math:`k` such that
    :math:`P_{(k)}\leq \frac {k}{m}\alpha.`

    2. Reject the null hypothesis (i.e., declare discoveries) for all
    :math:`H_{(i)}` for :math:`i = 1, \ldots, k`.

    The BH procedure is valid when the m tests are independent, and also in
    various scenarios of dependence, but is not universally valid.

    The **Benjaminiâ€“Yekutieli** procedure (BY) controls the FDR under arbitrary
    dependence assumptions. This refinement modifies the threshold and finds
    the largest :math:`k` such that:

    .. math::
        P_{(k)} \leq \frac{k}{m \cdot c(m)} \alpha

    References
    ----------
    - Benjamini, Y., and Hochberg, Y. (1995). Controlling the false discovery
      rate: a practical and powerful approach to multiple testing. Journal of
      the Royal Statistical Society Series B, 57, 289â€“300.

    - Benjamini, Y., and Yekutieli, D. (2001). The control of the false
      discovery rate in multiple testing under dependency. Annals of
      Statistics, 29, 1165â€“1188.

    - https://en.wikipedia.org/wiki/False_discovery_rate

    Examples
    --------
    FDR correction of an array of p-values

    >>> import pingouin as pg
    >>> pvals = [.50, .003, .32, .054, .0003]
    >>> reject, pvals_corr = pg.multicomp(pvals, method='fdr_bh', alpha=.05)
    >>> print(reject, pvals_corr)
    [False  True False False  True] [0.5    0.0075 0.4    0.09   0.0015]
    )r   Úfdr_byé   r   ç      ð?NéÿÿÿÿÚignore©Úinvalid)ÚlowerÚAssertionErrorÚnpÚasarrayÚshapeÚravelÚisnanÚsumÚargsortÚsizeÚarangeÚfloatZminimumÚ
accumulateÚclipÚappendÚfullÚnanÚreshapeÚerrstateÚless)ÚpvalsÚalphaÚmethodÚ
shape_initÚnum_nanÚpvals_sortindÚpvals_sortedÚ
sortrevindÚntestsZ
ecdffactorÚcmÚ
pvals_corrÚpvals_correctedÚreject© r.   ú6/tmp/pip-unpacked-wheel-2te3nxqf/pingouin/multicomp.pyÚfdr   s*    J


 r0   c              	   C   sd   t  | ¡} t  | ¡ ¡ }| t| jƒ|  }t  |dd¡}t jdd t  ||¡}W 5 Q R X ||fS )aç  P-values correction with Bonferroni method.

    Parameters
    ----------
    pvals : array_like
        Array of p-values of the individual tests.
    alpha : float
        Error rate (= alpha level).

    Returns
    -------
    reject : array, bool
        True if a hypothesis is rejected, False if not
    pval_corrected : array
        P-values adjusted for multiple hypothesis testing using the Bonferroni
        procedure (= multiplied by the number of tests).

    See also
    --------
    holm : Holm-Bonferroni correction
    fdr : Benjamini/Hochberg and Benjamini/Yekutieli FDR correction

    Notes
    -----
    From Wikipedia:

    Statistical hypothesis testing is based on rejecting the null hypothesis
    if the likelihood of the observed data under the null hypotheses is low.
    If multiple hypotheses are tested, the chance of a rare event increases,
    and therefore, the likelihood of incorrectly rejecting a null hypothesis
    (i.e., making a Type I error) increases.
    The Bonferroni correction compensates for that increase by testing each
    individual hypothesis :math:`p_i` at a significance level of
    :math:`p_i = \alpha / n` where :math:`\alpha` is the desired overall
    alpha level and :math:`n` is the number of hypotheses. For example, if a
    trial is testing :math:`n=20` hypotheses with a desired
    :math:`\alpha=0.05`, then the Bonferroni correction would test each
    individual hypothesis at :math:`\alpha=0.05/20=0.0025``.

    The Bonferroni adjusted p-values are defined as:

    .. math::
        \widetilde {p}_{{(i)}}= n \cdot p_{{(i)}}

    The Bonferroni correction tends to be a bit too conservative.

    Note that NaN values are not taken into account in the p-values correction.

    References
    ----------
    - Bonferroni, C. E. (1935). Il calcolo delle assicurazioni su gruppi
      di teste. Studi in onore del professore salvatore ortu carboni, 13-60.

    - https://en.wikipedia.org/wiki/Bonferroni_correction

    Examples
    --------
    >>> import pingouin as pg
    >>> pvals = [.50, .003, .32, .054, .0003]
    >>> reject, pvals_corr = pg.multicomp(pvals, method='bonf', alpha=.05)
    >>> print(reject, pvals_corr)
    [False  True False False  True] [1.     0.015  1.     0.27   0.0015]
    Nr   r
   r   )	r   r   r   r   r   r   r   r   r    )r!   r"   r%   r,   r-   r.   r.   r/   Úbonfz   s    @
r1   c              	   C   sÎ   t  | ¡} | j}|  ¡ } t  | ¡ ¡ }t  | ¡}| | }| ¡ }| j| }|d|… t  |dd¡ }t j	 
|¡}t  |dd¡}t  |t  |t j¡¡}||  |¡}	t jdd t  |	|¡}
W 5 Q R X |
|	fS )u^  P-values correction with Holm method.

    Parameters
    ----------
    pvals : array_like
        Array of p-values of the individual tests.
    alpha : float
        Error rate (= alpha level).

    Returns
    -------
    reject : array, bool
        True if a hypothesis is rejected, False if not
    pvals_corrected : array
        P-values adjusted for multiple hypothesis testing using the Holm
        procedure.

    See also
    --------
    bonf : Bonferroni correction
    fdr : Benjamini/Hochberg and Benjamini/Yekutieli FDR correction

    Notes
    -----
    From Wikipedia:

    In statistics, the Holmâ€“Bonferroni method (also called the Holm method) is
    used to counteract the problem of multiple comparisons. It is intended to
    control the family-wise error rate and offers a simple test uniformly more
    powerful than the Bonferroni correction.

    The Holm adjusted p-values are the running maximum of the sorted p-values
    divided by the corresponding increasing alpha level:

    .. math::

        \frac{\alpha}{n}, \frac{\alpha}{n-1}, ..., \frac{\alpha}{1}

    where :math:`n` is the number of test.

    The full mathematical formula is:

    .. math::
        \widetilde {p}_{{(i)}}=\max _{{j\leq i}}\left\{(n-j+1)p_{{(j)}}
        \right\}_{{1}}

    Note that NaN values are not taken into account in the p-values correction.

    References
    ----------
    - Holm, S. (1979). A simple sequentially rejective multiple test procedure.
      Scandinavian journal of statistics, 65-70.

    - https://en.wikipedia.org/wiki/Holm%E2%80%93Bonferroni_method

    Examples
    --------
    >>> import pingouin as pg
    >>> pvals = [.50, .003, .32, .054, .0003]
    >>> reject, pvals_corr = pg.multicomp(pvals, method='holm', alpha=.05)
    >>> print(reject, pvals_corr)
    [False  True False False  True] [0.64   0.012  0.64   0.162  0.0015]
    Nr   r	   r   r
   r   )r   r   r   r   r   r   r   r   r   Úmaximumr   r   r   r   r   r   r   r    )r!   r"   r$   r%   r&   r'   r(   r)   r+   r,   r-   r.   r.   r/   ÚholmÃ   s     A


r3   c              	   C   st   t  | ¡} t  | ¡ ¡ }t| jƒ| }dt  d|  |¡ }t  |dd¡}t jdd t  	||¡}W 5 Q R X ||fS )u‚  P-values correction with Sidak method.

    Parameters
    ----------
    pvals : array_like
        Array of p-values of the individual tests.
    alpha : float
        Error rate (= alpha level).

    Returns
    -------
    reject : array, bool
        True if a hypothesis is rejected, False if not
    pval_corrected : array
        P-values adjusted for multiple hypothesis testing using the Sidak
        procedure.

    See also
    --------
    bonf, holm, fdr, multicomp

    Notes
    -----
    The Sidak adjusted p-values are defined as:

    .. math::
        \widetilde {p}_{{(i)}}= 1 - (1 - p_{{(i)}})^{n}

    The Sidak correction is slightly more liberal than the Bonferroni
    correction.

    Note that NaN values are not taken into account in the p-values correction.

    References
    ----------
    - Å idÃ¡k, Z. K. (1967). "Rectangular Confidence Regions for the Means of
      Multivariate Normal Distributions". Journal of the American Statistical
      Association. 62 (318): 626â€“633.

    - https://en.wikipedia.org/wiki/%C5%A0id%C3%A1k_correction

    Examples
    --------
    >>> import numpy as np
    >>> import pingouin as pg
    >>> pvals = [.50, .003, .32, .054, .0003]
    >>> reject, pvals_corr = pg.multicomp(pvals, method='sidak', alpha=.05)
    >>> print(reject, np.round(pvals_corr, 4))
    [False  True False False  True] [0.9688 0.0149 0.8546 0.2424 0.0015]
    r   r   Nr
   r   )
r   r   r   r   r   r   Úpowerr   r   r    )r!   r"   r%   r)   r,   r-   r.   r.   r/   Úsidak  s    3
r5   c              	   C   sJ  t | ttjtfƒstdƒ‚t |tƒs,tdƒ‚t |tƒs>tdƒ‚d|  k rRdk s\n tdƒ‚t | ¡} | 	¡ dkr„t
| |d\}}n¾| 	¡ d	kr¢t| |d\}}n | 	¡ d
krÀt| |d\}}n‚| 	¡ dkràt| |dd\}}nb| 	¡ dkrt| |dd\}}n@| 	¡ dkr:| }tjdd t ||¡}W 5 Q R X ntdƒ‚||fS )uÊ  P-values correction for multiple comparisons.

    Parameters
    ----------
    pvals : array_like
        Uncorrected p-values.
    alpha : float
        Significance level.
    method : string
        Method used for testing and adjustment of p-values. Can be either the
        full name or initial letters. Available methods are:

        * ``'bonf'``: one-step Bonferroni correction
        * ``'sidak'``: one-step Sidak correction
        * ``'holm'``: step-down method using Bonferroni adjustments
        * ``'fdr_bh'``: Benjamini/Hochberg FDR correction
        * ``'fdr_by'``: Benjamini/Yekutieli FDR correction
        * ``'none'``: pass-through option (no correction applied)

    Returns
    -------
    reject : array, boolean
        True for hypothesis that can be rejected for given alpha.
    pvals_corrected : array
        P-values corrected for multiple testing.

    Notes
    -----
    This function is similar to the `p.adjust
    <https://stat.ethz.ch/R-manual/R-devel/library/stats/html/p.adjust.html>`_
    R function.

    The correction methods include the Bonferroni correction (``'bonf'``)
    in which the p-values are multiplied by the number of comparisons.
    Less conservative methods are also included such as Sidak (1967)
    (``'sidak'``), Holm (1979) (``'holm'``), Benjamini & Hochberg (1995)
    (``'fdr_bh'``), and Benjamini & Yekutieli (2001) (``'fdr_by'``),
    respectively.

    The first three methods are designed to give strong control of the
    family-wise error rate. Note that the Holm's method is usually preferred.
    The ``'fdr_bh'`` and ``'fdr_by'`` methods control the false discovery rate,
    i.e. the expected proportion of false discoveries amongst the rejected
    hypotheses. The false discovery rate is a less stringent condition than
    the family-wise error rate, so these methods are more powerful than the
    others.

    The **Bonferroni** [1]_ adjusted p-values are defined as:

    .. math::
        \widetilde {p}_{{(i)}}= n \cdot p_{{(i)}}

    where :math:`n` is the number of *finite* p-values (i.e. excluding NaN).

    The **Sidak** [2]_ adjusted p-values are defined as:

    .. math::
        \widetilde {p}_{{(i)}}= 1 - (1 - p_{{(i)}})^{n}

    The **Holm** [3]_ adjusted p-values are the running maximum of the sorted
    p-values divided by the corresponding increasing alpha level:

    .. math::
        \widetilde {p}_{{(i)}}=\max _{{j\leq i}}\left\{(n-j+1)p_{{(j)}}
        \right\}_{{1}}

    The **Benjaminiâ€“Hochberg** procedure (BH step-up procedure, [4]_)
    controls the false discovery rate (FDR) at level :math:`\alpha`.
    It works as follows:

    1. For a given :math:`\alpha`, find the largest :math:`k` such that
    :math:`P_{(k)}\leq \frac {k}{n}\alpha.`

    2. Reject the null hypothesis for all
    :math:`H_{(i)}` for :math:`i = 1, \ldots, k`.

    The BH procedure is valid when the :math:`n` tests are independent, and
    also in various scenarios of dependence, but is not universally valid.

    The **Benjaminiâ€“Yekutieli** procedure (BY, [5]_) controls the FDR under
    arbitrary dependence assumptions. This refinement modifies the threshold
    and finds the largest :math:`k` such that:

    .. math::
        P_{(k)} \leq \frac{k}{n \cdot c(n)} \alpha

    References
    ----------
    .. [1] Bonferroni, C. E. (1935). Il calcolo delle assicurazioni su gruppi
       di teste. Studi in onore del professore salvatore ortu carboni, 13-60.

    .. [2] Å idÃ¡k, Z. K. (1967). "Rectangular Confidence Regions for the Means
       of Multivariate Normal Distributions". Journal of the American
       Statistical Association. 62 (318): 626â€“633.

    .. [3] Holm, S. (1979). A simple sequentially rejective multiple test
       procedure. Scandinavian Journal of Statistics, 6, 65â€“70.

    .. [4] Benjamini, Y., and Hochberg, Y. (1995). Controlling the false
       discovery rate: a practical and powerful approach to multiple testing.
       Journal of the Royal Statistical Society Series B, 57, 289â€“300.

    .. [5] Benjamini, Y., and Yekutieli, D. (2001). The control of the false
       discovery rate in multiple testing under dependency. Annals of
       Statistics, 29, 1165â€“1188.

    Examples
    --------
    FDR correction of an array of p-values

    >>> import pingouin as pg
    >>> pvals = [.50, .003, .32, .054, .0003]
    >>> reject, pvals_corr = pg.multicomp(pvals, method='fdr_bh')
    >>> print(reject, pvals_corr)
    [False  True False False  True] [0.5    0.0075 0.4    0.09   0.0015]

    Holm correction with missing values

    >>> import numpy as np
    >>> pvals[2] = np.nan
    >>> reject, pvals_corr = pg.multicomp(pvals, method='holm')
    >>> print(reject, pvals_corr)
    [False  True False False  True] [0.5    0.009     nan 0.108  0.0012]
    zpvals must be list or arrayzalpha must be a float.zmethod must be a string.r   r   zalpha must be between 0 and 1.)Úbr1   Z
bonferroni)r"   )Úhr3   )Úsr5   )r0   r   Zbhr   )r"   r#   )r   Zbyr   Únoner
   r   z)Multiple comparison method not recognized)Ú
isinstanceÚlistr   Zndarrayr   r   r   Ústrr   r   r1   r3   r5   r0   r   r    Ú
ValueError)r!   r"   r#   r-   r,   r.   r.   r/   r   ^  s*    ~
)r   r   )r   )r   )r   )r   r3   )
Znumpyr   Zpandasr   Ú__all__r0   r1   r3   r5   r   r.   r.   r.   r/   Ú<module>   s   
l
I
Y
B