U
    QvfT9                  
   @   s   d Z ddlZddlZddlZddlZddlZ	ddl
m
Z
 ddlmZ dddd	d
dddddg
Zd#ddZd$ddZdd Zdd Zd%ddZd&dd
Zd'ddZdd	 Zd(ddZd)d dZd*d!dZd+d"dZdS ),zHelper functions.    N)tabulate   )options
_perm_pvalprint_table_postprocess_dataframe_check_eftype	remove_na_flatten_list_check_dataframe_is_sklearn_installed_is_statsmodels_installed_is_mpmath_installed	two-sidedc                 C   s   |dkst dt|ttfs"t t| } | jdks>t d| j}|dksTt d|dkrrt| |	 | }n<|dkrt
| |	 | }ntt| t|	 | }|S )ah  
    Compute p-values from a permutation test.

    Parameters
    ----------
    bootstat : 1D array
        Permutation distribution.
    estimate : float or int
        Point estimate.
    alternative : str
        Tail for p-value. Can be either `'two-sided'` (default), `'greater'` or `'less'`.

    Returns
    -------
    p : float
        P-value.
    )r   greaterlesszWrong tail argument.r   zbootstat must be a 1D array.z&bootstat must have at least one value.r   r   )AssertionError
isinstanceintfloatnpasarrayndimsizeZgreater_equalsumZ
less_equalfabsabs)ZbootstatZestimatealternativeZn_bootp r   2/tmp/pip-unpacked-wheel-2te3nxqf/pingouin/utils.pyr      s    
.3fsimplec                 C   sJ   d|   krtd d|   kr(td tt| dd||d td d	S )
a  Pretty display of table.

    Parameters
    ----------
    df : :py:class:`pandas.DataFrame`
        Dataframe to print (e.g. ANOVA summary)
    floatfmt : string
        Decimal number formatting
    tablefmt : string
        Table format (e.g. 'simple', 'plain', 'html', 'latex', 'grid', 'rst').
        For a full list of available formats, please refer to
        https://pypi.org/project/tabulate/
    Fz+
=============
ANOVA SUMMARY
=============
Az.
==============
POST HOC TESTS
==============
keysF)headersZ	showindexfloatfmttablefmt N)r%   printr   )dfr'   r(   r   r   r    r   >   s    c                 C   s  |   } t| j| jD ]\}}t||}|dkr4qt|rt|| j||f }| | t	|| |< || j||f< qt
| j||f trqt
| j||f tj}t
| j||f tj}t||gsq|rt| j||f jj	tj}|sqtj| j||f |d| j||f< q| S )a<  Apply some post-processing to an ouput dataframe (e.g. rounding).

    Whether and how rounding is applied is governed by options specified in
    `pingouin.options`. The default rounding (number of decimals) is
    determined by `pingouin.options['round']`. You can specify rounding for a
    given column name by the option `'round.column.<colname>'`, e.g.
    `'round.column.CI95%'`. Analogously, `'round.row.<rowname>'` also works
    (where `rowname`) refers to the pandas index), as well as
    `'round.cell.[<rolname>]x[<colname]'`. A cell-based option is used,
    if available; if not, a column-based option is used, if
    available; if not, a row-based option is used, if available; if not,
    the default is used. (Default `pingouin.options['round'] = None`,
    i.e. no rounding is applied.)

    If a round option is `callable` instead of `int`, then it will be called,
    and the return value stored in the cell.

    Post-processing is applied on a copy of the DataFrame, leaving the
    original DataFrame untouched.

    This is an internal function (no public API).

    Parameters
    ----------
    df : :py:class:`pandas.DataFrame`
        Dataframe to apply post-processing to (e.g. ANOVA summary)

    Returns
    ----------
    df : :py:class:`pandas.DataFrame`
        Dataframe with post-processing applied
    N)Zdecimals)copyitproductindexcolumns_get_round_setting_forcallableatZastypetyper   boolnumbersNumberr   Zndarrayany
issubclassdtypeZfloatinground)r+   rowcolZround_optionZnewvalZ	is_numberZis_arrayZis_float_arrayr   r   r    r   U   s,    !
$c              	   C   sZ   d|  d| dd| d|  f}|D ](}zt | W   S  tk
rN   Y q(X q(t d S )Nzround.cell.[z]x[]zround.column.z
round.row.r;   )r   KeyError)r<   r=   Zkeys_to_checkkeyr   r   r    r1      s    r1   rowsc                 C   s|   | j dkrt|  }n&|dkr$dnd}tjt| |d }|  rx|dkrTdnd}| j dkrfdn|}| j||d} | S )zMRemove NaN in a single array.
    This is an internal Pingouin function.
    r   rA   r   axis)r   r   isnanr8   allcompress)xrC   x_maskaxr   r   r    _remove_na_single   s    

rJ   Fc           
      C   s  t | } |dkstd|dkr.t| |dS t|tttfrNt| |d|fS t |}|jdksjtd|jdkrt| |d|fS | j	|j	ks|dkrt| |d}t||d}||fS | j	dkrt 
|  }t 
| }n<|d	krdnd}t jt 
| |d }t jt 
||d }|  s0|  r~|d	kr>dnd}| j	dkrRdn|}t ||}	| j|	|d} |j|	|d}| |fS )
a  Remove missing values along a given axis in one or more (paired) numpy arrays.

    Parameters
    ----------
    x, y : 1D or 2D arrays
        Data. ``x`` and ``y`` must have the same number of dimensions.
        ``y`` can be None to only remove missing values in ``x``.
    paired : bool
        Indicates if the measurements are paired or not.
    axis : str
        Axis or axes along which missing values are removed.
        Can be 'rows' or 'columns'. This has no effect if ``x`` and ``y`` are
        one-dimensional arrays.

    Returns
    -------
    x, y : np.ndarray
        Data without missing values

    Examples
    --------
    Single 1D array

    >>> import numpy as np
    >>> from pingouin import remove_na
    >>> x = [6.4, 3.2, 4.5, np.nan]
    >>> remove_na(x)
    array([6.4, 3.2, 4.5])

    With two paired 1D arrays

    >>> y = [2.3, np.nan, 5.2, 4.6]
    >>> remove_na(x, y, paired=True)
    (array([6.4, 4.5]), array([2.3, 5.2]))

    With two independent 2D arrays

    >>> x = np.array([[4, 2], [4, np.nan], [7, 6]])
    >>> y = np.array([[6, np.nan], [3, 2], [2, 2]])
    >>> x_no_nan, y_no_nan = remove_na(x, y, paired=False)
    )rA   r0   zaxis must be rows or columns.NrB   r   z#y cannot be an empty list or array.r   FrA   )r   r   r   rJ   r   r   r   strr   r   rD   r8   rE   logical_andrF   )
rG   yZpairedrC   Zx_no_nanZy_no_nanrH   Zy_maskrI   Zbothr   r   r    r	      s6    +



c                 C   s   t | tjjs| S g }| D ]h}t |tjj}|rxt |ttfsN|t| qt |trl|rl|t| q|| q|| qdd |D }|S )a|  Flatten an arbitrarily nested list into a new list.

    This can be useful to select pandas DataFrame columns.

    From https://stackoverflow.com/a/16176969/10581531

    Examples
    --------
    >>> from pingouin.utils import _flatten_list
    >>> x = ['X1', ['M1', 'M2'], 'Y1', ['Y2']]
    >>> _flatten_list(x)
    ['X1', 'M1', 'M2', 'Y1', 'Y2']

    >>> x = ['Xaa', 'Xbb', 'Xcc']
    >>> _flatten_list(x)
    ['Xaa', 'Xbb', 'Xcc']

    >>> x = ['Xaa', ('Xbb', 'Xcc'), (1, 2), (1)]
    >>> _flatten_list(x)
    ['Xaa', ('Xbb', 'Xcc'), (1, 2), 1]

    >>> _flatten_list(x, include_tuple=True)
    ['Xaa', 'Xbb', 'Xcc', 1, 2, 1]
    c                 S   s   g | ]}|d k	r|qS Nr   ).0rr   r   r    
<listcomp>=  s      z!_flatten_list.<locals>.<listcomp>)	r   collectionsabcIterablerK   tupleextendr
   append)rG   Zinclude_tupleresultelZ
el_is_iterr   r   r    r
     s    c                 C   s   |   dkrdS dS dS )zCheck validity of eftype)	noneZhedgesZcohenrP   Zpointbiserialrz
eta-squarez
odds-ratioZaucZclesTFN)lower)Zeftyper   r   r    r   A  s    c           	      C   sL  t | tjsr| jjdkrjzddlm}m} W n tk
rH   t	dY qrX t | |r`|
| } qrt	dnt	dtdd || fD rt	d| | jjd	krt	d
|dkrt	d|dkrt |tttfst	d|dkr|dkrt	d|dkrt |tttfst	d|dkrH||fD ] }t |tttfs&t	dq&| S )zChecks whether data is a dataframe or can be converted to a dataframe.
    If successful, a dataframe is returned. If not successful, a ValueError is
    raised.
    
DataMatrixr   )r\   convertzGFailed to convert object to pandas dataframe (DataMatrix not available)z5Data must be a pandas dataframe or compatible object.c                 s   s   | ]}|d kV  qd S rN   r   )rO   vr   r   r    	<genexpr>l  s     z#_check_dataframe.<locals>.<genexpr>zDV and data must be specifiedfizDV must be numeric.)withinbetweeninteractionrE   z2Effects must be: within, between, interaction, allra   z'within must be a string, int or a list.Nz-subject must be specified when effects=withinrb   z(between must be a string, int or a list.rc   z=within and between must be specified when effects=interaction)r   pdZ	DataFrame	__class____name__Z
datamatrixr\   r]   ImportError
ValueErrorZ	to_pandasr8   r:   kindrK   r   list)	dataZdvrb   ra   subjectZeffectsr\   Zcnvinputr   r   r    r   S  s:    



c                 C   s>   zddl }d}W n tk
r(   d}Y nX | r:|s:td|S )z"Check if statsmodels is installed.r   NTFzHstatsmodels needs to be installed. Please use `pip install statsmodels`.)statsmodelsOSError)raise_errorrn   is_installedr   r   r    r     s    
c                 C   s>   zddl }d}W n tk
r(   d}Y nX | r:|s:td|S )zCheck if sklearn is installed.r   NTFzEsklearn needs to be installed. Please use `pip install scikit-learn`.)sklearnro   )rp   rr   rq   r   r   r    r     s    
c                 C   s>   zddl }d}W n tk
r(   d}Y nX | r:|s:td|S )zCheck if mpmath is installed.r   NTFz>mpmath needs to be installed. Please use `pip install mpmath`.)mpmathro   )rp   rs   rq   r   r   r    r     s    
)r   )r!   r"   )rA   )NFrA   )F)NNNNNN)F)F)F)__doc__r6   Znumpyr   Zpandasrd   	itertoolsr-   collections.abcrR   r   configr   __all__r   r   r   r1   rJ   r	   r
   r   r   r   r   r   r   r   r   r    <module>   s<   
&
=

X
1
7

