U
    Gvf<                  
   @   s  d Z ddddddddd	d
g
ZddlZddlmZmZmZ ddlmZ ddlm	Z	 ddl
mZ ddlmZmZmZmZ edddgddfddZd$ddZedddgdfddZd%dd
ZdddgdfddZdddgddfddZd&d dZd'd!dZd(d"dZd)d#d	ZdS )*zB
Additional statistics functions with support for masked arrays.

compare_medians_mshdquantileshdmedianhdquantiles_sdidealfourthsmedian_cihsmjcimquantiles_cimjrshtrimmed_mean_ci    N)float_int_ndarray)MaskedArray   )_mstats_basic)normbetatbinomg      ?      ?g      ?Fc                 C   s   dd }t j| dtd} tj|ddd}|dks:| jdkrH|| ||}n*| jdkr`td	| j t ||| ||}t j|dd
S )a  
    Computes quantile estimates with the Harrell-Davis method.

    The quantile estimates are calculated as a weighted linear combination
    of order statistics.

    Parameters
    ----------
    data : array_like
        Data array.
    prob : sequence, optional
        Sequence of quantiles to compute.
    axis : int or None, optional
        Axis along which to compute the quantiles. If None, use a flattened
        array.
    var : bool, optional
        Whether to return the variance of the estimate.

    Returns
    -------
    hdquantiles : MaskedArray
        A (p,) array of quantiles (if `var` is False), or a (2,p) array of
        quantiles and variances (if `var` is True), where ``p`` is the
        number of quantiles.

    See Also
    --------
    hdquantiles_sd

    c                 S   sJ  t t |  t}|j}t dt|ft	}|dk rTt j
|_|rL|S |d S t |d t| }tj}t|D ]t\}}	|||d |	 |d d|	  }
|
dd |
dd  }t ||}||d|f< t ||| d |d|f< qx|d |d|dkf< |d |d|dkf< |rBt j
 |d|dkf< |d|dkf< |S |d S )zGComputes the HD quantiles for a 1D array. Returns nan for invalid data.   r   r   N)npsqueezesort
compressedviewr   sizeemptylenr   nanflatarangefloatr   cdf	enumeratedot)dataprobvarxsortednZhdvbetacdfip_wwZhd_mean r3   >/tmp/pip-unpacked-wheel-96ln3f52/scipy/stats/_mstats_extras.py_hd_1D;   s,     "zhdquantiles.<locals>._hd_1DFcopydtyper   r7   ZndminNr   DArray 'data' must be at most two dimensional, but got data.ndim = %dr7   )maarrayr   r   ndim
ValueErrorapply_along_axisfix_invalid)r(   r)   axisr*   r5   r0   resultr3   r3   r4   r      s    
r   c                 C   s   t | dg||d}| S )a9  
    Returns the Harrell-Davis estimate of the median along the given axis.

    Parameters
    ----------
    data : ndarray
        Data array.
    axis : int, optional
        Axis along which to compute the quantiles. If None, use a flattened
        array.
    var : bool, optional
        Whether to return the variance of the estimate.

    Returns
    -------
    hdmedian : MaskedArray
        The median values.  If ``var=True``, the variance is returned inside
        the masked array.  E.g. for a 1-D array the shape change from (1,) to
        (2,).

    r   )rB   r*   )r   r   )r(   rB   r*   rC   r3   r3   r4   r   g   s    c                 C   sv   dd }t j| dtd} tj|ddd}|dkr<|| |}n(| jdkrTtd	| j t ||| |}t j|dd
 S )a  
    The standard error of the Harrell-Davis quantile estimates by jackknife.

    Parameters
    ----------
    data : array_like
        Data array.
    prob : sequence, optional
        Sequence of quantiles to compute.
    axis : int, optional
        Axis along which to compute the quantiles. If None, use a flattened
        array.

    Returns
    -------
    hdquantiles_sd : MaskedArray
        Standard error of the Harrell-Davis quantile estimates.

    See Also
    --------
    hdquantiles

    c                 S   s  t |  }t|}t t|t}|dk r6t j|_t |t	|d  }t
j}t|D ]\}}|||| |d|  }	|	dd |	dd  }
t |}t |
|dd  |dd< |dd  t |
ddd |ddd  ddd 7  < t | |d  ||< qZ|S )z%Computes the std error for 1D arrays.r   r   Nr   r   )r   r   r   r    r   r   r!   r"   r#   r$   r   r%   r&   Z
zeros_likeZcumsumsqrtr*   )r(   r)   r+   r,   Zhdsdvvr.   r/   r0   r1   r2   Zmx_r3   r3   r4   _hdsd_1D   s    
<z hdquantiles_sd.<locals>._hdsd_1DFr6   r   r9   Nr   r:   r;   )	r<   r=   r   r   r>   r?   r@   rA   Zravel)r(   r)   rB   rF   r0   rC   r3   r3   r4   r      s    
皙?rH   TT皙?c           
      C   s|   t j| dd} tj| |||d}||}tj| |||d}||d }td|d  |}	t	||	|  ||	|  fS )a  
    Selected confidence interval of the trimmed mean along the given axis.

    Parameters
    ----------
    data : array_like
        Input data.
    limits : {None, tuple}, optional
        None or a two item tuple.
        Tuple of the percentages to cut on each side of the array, with respect
        to the number of unmasked data, as floats between 0. and 1. If ``n``
        is the number of unmasked data before trimming, then
        (``n * limits[0]``)th smallest data and (``n * limits[1]``)th
        largest data are masked.  The total number of unmasked data after
        trimming is ``n * (1. - sum(limits))``.
        The value of one limit can be set to None to indicate an open interval.

        Defaults to (0.2, 0.2).
    inclusive : (2,) tuple of boolean, optional
        If relative==False, tuple indicating whether values exactly equal to
        the absolute limits are allowed.
        If relative==True, tuple indicating whether the number of data being
        masked on each side should be rounded (True) or truncated (False).

        Defaults to (True, True).
    alpha : float, optional
        Confidence level of the intervals.

        Defaults to 0.05.
    axis : int, optional
        Axis along which to cut. If None, uses a flattened version of `data`.

        Defaults to None.

    Returns
    -------
    trimmed_mean_ci : (2,) ndarray
        The lower and upper confidence intervals of the trimmed data.

    Fr;   )limits	inclusiverB   r          @)
r<   r=   mstatsZtrimrZmeanZtrimmed_stdecountr   ppfr   )
r(   rK   rL   alpharB   ZtrimmedZtmeanZtstdeZdfZtppfr3   r3   r4   r
      s    *
c                 C   sd   dd }t j| dd} | jdkr.td| j tj|ddd}|d	krP|| |S t ||| |S d	S )
a  
    Returns the Maritz-Jarrett estimators of the standard error of selected
    experimental quantiles of the data.

    Parameters
    ----------
    data : ndarray
        Data array.
    prob : sequence, optional
        Sequence of quantiles to compute.
    axis : int or None, optional
        Axis along which to compute the quantiles. If None, use a flattened
        array.

    c                 S   s   t |  } | j}t || d t}tj}t 	t
|t}t jd|d td| }|d|  }t|D ]b\}}	|||	d ||	 |||	d ||	  }
t |
| }t |
| d }t ||d  ||< qn|S )Nr   r   )r8   g      ?r   )r   r   r   r   r=   Zastyper   r   r%   r   r    r   r#   r&   r'   rD   )r(   r0   r,   r)   r.   Zmjxyr/   mWZC1ZC2r3   r3   r4   _mjci_1D  s    (zmjci.<locals>._mjci_1DFr;   r   r:   r   r9   N)r<   r=   r>   r?   r   r@   )r(   r)   rB   rV   r0   r3   r3   r4   r      s    

c                 C   sZ   t |d| }td|d  }tj| |dd|d}t| ||d}|||  |||  fS )a  
    Computes the alpha confidence interval for the selected quantiles of the
    data, with Maritz-Jarrett estimators.

    Parameters
    ----------
    data : ndarray
        Data array.
    prob : sequence, optional
        Sequence of quantiles to compute.
    alpha : float, optional
        Confidence level of the intervals.
    axis : int or None, optional
        Axis along which to compute the quantiles.
        If None, use a flattened array.

    Returns
    -------
    ci_lower : ndarray
        The lower boundaries of the confidence interval.  Of the same length as
        `prob`.
    ci_upper : ndarray
        The upper boundaries of the confidence interval.  Of the same length as
        `prob`.

    r   rM   r   )ZalphapZbetaprB   rB   )minr   rP   rN   Z
mquantilesr   )r(   r)   rQ   rB   zZxqZsmjr3   r3   r4   r      s
    c                 C   sV   dd }t j| dd} |dkr*|| |}n(| jdkrBtd| j t ||| |}|S )aA  
    Computes the alpha-level confidence interval for the median of the data.

    Uses the Hettmasperger-Sheather method.

    Parameters
    ----------
    data : array_like
        Input data. Masked values are discarded. The input should be 1D only,
        or `axis` should be set to None.
    alpha : float, optional
        Confidence level of the intervals.
    axis : int or None, optional
        Axis along which to compute the quantiles. If None, use a flattened
        array.

    Returns
    -------
    median_cihs
        Alpha level confidence interval.

    c           	      S   s>  t |  } t| }t|d| }tt|d |d}t|| |dt|d |d }|d| k r|d8 }t|| |dt|d |d }t|| d |dt||d }|d | ||  }|| | t	||d|  |   }|| |  d| | |d    || || d   d| | ||    f}|S )Nr   rM   r   r   )
r   r   r   r    rX   intr   Z_ppfr%   r$   )	r(   rQ   r,   kZgkZgkkIlambdZlimsr3   r3   r4   _cihs_1DY  s    $$$$&zmedian_cihs.<locals>._cihs_1DFr;   Nr   r:   )r<   r=   r>   r?   r@   )r(   rQ   rB   r^   rC   r3   r3   r4   r   B  s    
c                 C   sn   t j| |dt j||d }}tj| |dtj||d }}t|| t |d |d   }dt| S )a"  
    Compares the medians from two independent groups along the given axis.

    The comparison is performed using the McKean-Schrader estimate of the
    standard error of the medians.

    Parameters
    ----------
    group_1 : array_like
        First dataset.  Has to be of size >=7.
    group_2 : array_like
        Second dataset.  Has to be of size >=7.
    axis : int, optional
        Axis along which the medians are estimated. If None, the arrays are
        flattened.  If `axis` is not None, then `group_1` and `group_2`
        should have the same shape.

    Returns
    -------
    compare_medians_ms : {float, ndarray}
        If `axis` is None, then returns a float, otherwise returns a 1-D
        ndarray of floats with a length equal to the length of `group_1`
        along `axis`.

    Examples
    --------

    >>> from scipy import stats
    >>> a = [1, 2, 3, 4, 5, 6, 7]
    >>> b = [8, 9, 10, 11, 12, 13, 14]
    >>> stats.mstats.compare_medians_ms(a, b, axis=None)
    1.0693225866553746e-05

    The function is vectorized to compute along a given axis.

    >>> import numpy as np
    >>> rng = np.random.default_rng()
    >>> x = rng.random(size=(3, 7))
    >>> y = rng.random(size=(3, 8))
    >>> stats.mstats.compare_medians_ms(x, y, axis=1)
    array([0.36908985, 0.36092538, 0.2765313 ])

    References
    ----------
    .. [1] McKean, Joseph W., and Ronald M. Schrader. "A comparison of methods
       for studentizing the sample median." Communications in
       Statistics-Simulation and Computation 13.6 (1984): 751-773.

    rW   r   r   )	r<   ZmedianrN   Zstde_medianr   absrD   r   r%   )Zgroup_1Zgroup_2rB   Zmed_1Zmed_2Zstd_1Zstd_2rU   r3   r3   r4   r   u  s    2$c                 C   s>   dd }t j| |dt} |dkr,|| S t ||| S dS )aC  
    Returns an estimate of the lower and upper quartiles.

    Uses the ideal fourths algorithm.

    Parameters
    ----------
    data : array_like
        Input array.
    axis : int, optional
        Axis along which the quartiles are estimated. If None, the arrays are
        flattened.

    Returns
    -------
    idealfourths : {list of floats, masked array}
        Returns the two internal values that divide `data` into four parts
        using the ideal fourths algorithm either along the flattened array
        (if `axis` is None) or along `axis` of `data`.

    c                 S   s   |   }t|}|dk r$tjtjgS t|d d d\}}t|}d| ||d   |||   }|| }d| ||  |||d    }||gS )N   g      @g?r   )r   r    r   r!   divmodrZ   )r(   rR   r,   jhZqlor[   Zqupr3   r3   r4   _idf  s      zidealfourths.<locals>._idfrW   N)r<   r   r   r   r@   )r(   rB   rd   r3   r3   r4   r     s
    c                 C   s   t j| dd} |dkr| }ntj|ddd}| jdkr>td|  }t| dd}d|d	 |d
   |d  }| dddf |dddf | kd
}| dddf |dddf | k d
}|| d| |  S )a  
    Evaluates Rosenblatt's shifted histogram estimators for each data point.

    Rosenblatt's estimator is a centered finite-difference approximation to the
    derivative of the empirical cumulative distribution function.

    Parameters
    ----------
    data : sequence
        Input data, should be 1-D. Masked values are ignored.
    points : sequence or None, optional
        Sequence of points where to evaluate Rosenblatt shifted histogram.
        If None, use the data.

    Fr;   Nr   r9   z#The input array should be 1D only !rW   g333333?r   r   rH   rM   )r<   r=   r   r>   AttributeErrorrO   r   sum)r(   Zpointsr,   rrc   ZnhiZnlor3   r3   r4   r	     s    
**)r   F)rG   rI   rJ   N)rJ   N)N)N)N)__doc____all__Znumpyr   r   r   r   Znumpy.mar<   r    r   rN   Zscipy.stats.distributionsr   r   r   r   listr   r   r   r
   r   r   r   r   r   r	   r3   r3   r3   r4   <module>   s<       K
?    
3-"
3
9
(