U
    Ovfe                     @   s*  d Z ddlZddlmZmZ ddlZddlmZ ddl	m
Z
mZmZmZ ddlmZ ddlmZmZmZ dd	lmZmZmZmZ dd
lmZ ddgZdd Zdd Zdd Zdd Zd*ddZ dd Z!dd Z"edgdgdgdgdddd+d d!d"dd#d$dd%ddd&dd'd(dZ#G d)d deee
Z$dS ),z
Python implementation of the fast ICA algorithms.

Reference: Tables 8.3 and 8.4 page 196 in the book:
Independent Component Analysis, by  Hyvarinen et al.
    N)IntegralReal)linalg   )BaseEstimatorClassNamePrefixFeaturesOutMixinTransformerMixin_fit_context)ConvergenceWarning)as_float_arraycheck_arraycheck_random_state)IntervalOptions
StrOptionsvalidate_params)check_is_fittedfasticaFastICAc                 C   s,   | t j| |d| j|d| g8 } | S )a  
    Orthonormalize w wrt the first j rows of W.

    Parameters
    ----------
    w : ndarray of shape (n,)
        Array to be orthogonalized

    W : ndarray of shape (p, n)
        Null space definition

    j : int < p
        The no of (from the first) rows of Null space W wrt which w is
        orthogonalized.

    Notes
    -----
    Assumes that W is orthogonal
    w changed in place
    N)npr   	multi_dotT)wWj r   B/tmp/pip-unpacked-wheel-qu3nn_q2/sklearn/decomposition/_fastica.py_gs_decorrelation    s    (r   c                 C   sT   t t| | j\}}tj|t| jjdd}tj 	|dt
|  |j| gS )z@Symmetric decorrelation
    i.e. W <- (W * W.T) ^{-1/2} * W
    N)Za_minZa_max      ?)r   eighr   dotr   ZclipfinfodtypeZtinyr   sqrt)r   sur   r   r   _sym_decorrelation9   s    r&   c                 C   s  |j d }tj||f| jd}g }t|D ]}	||	ddf  }
|
t|
d   }
t|D ]}|t|
j	| |\}}| | j
dd|
 |
  }t|||	 |t|d   }tt||
  d }|}
||k r` qq`||d  |
||	ddf< q*|t|fS )zcDeflationary FastICA using fun approx to neg-entropy function

    Used internally by FastICA.
    r   r"   Nr      axis)shaper   zerosr"   rangecopyr#   sumr    r   meanr   absappendmax)Xtolgfun_argsmax_iterw_initn_componentsr   n_iterr   r   igwtxg_wtxZw1limr   r   r   _ica_defH   s$    
r@   c              	   C   s   t |}~t| jd }t|D ]x}|t|| |\}	}
t t|	| j| |
ddtjf |  }~	~
tt	t	t
d||d }|}||k r  qq tdt ||d fS )zCParallel FastICA.

    Used internally by FastICA --main loop

    r(   Nzij,ij->iz\FastICA did not converge. Consider increasing tolerance or the maximum number of iterations.)r&   floatr+   r-   r   r    r   newaxisr3   r1   Zeinsumwarningswarnr
   )r4   r5   r6   r7   r8   r9   r   Zp_iir=   r>   ZW1r?   r   r   r   _ica_park   s     ,rF   c                 C   sh   | dd}| |9 } t| | }tj| jd | jd}t|D ] \}}|d|d    ||< q>||fS )Nalphar   r   r'   r(   r   )getr   tanhemptyr+   r"   	enumerater0   )xr7   rG   gxg_xr<   Zgx_ir   r   r   _logcosh   s    rO   c                 C   s<   t | d  d }| | }d| d  | }||jddfS )Nr   r(   r)   )r   expr0   )rL   r7   rQ   rM   rN   r   r   r   _exp   s    rR   c                 C   s   | d d| d  j ddfS )N   r   rP   r)   )r0   rL   r7   r   r   r   _cube   s    rU   
array-likeboolean)r4   return_X_meancompute_sourcesreturn_n_iterFZprefer_skip_nested_validationparallelunit-variancelogcosh   -C6?svdT)	algorithmwhitenfunr7   r8   r5   r9   whiten_solverrandom_staterX   rY   rZ   c                C   s   t |||||||||	|
d
}|  |j| |d}|jdkrJ|j}|j}nd}d}||j|g}|rl|| |r|||j |S )a  Perform Fast Independent Component Analysis.

    The implementation is based on [1]_.

    Read more in the :ref:`User Guide <ICA>`.

    Parameters
    ----------
    X : array-like of shape (n_samples, n_features)
        Training vector, where `n_samples` is the number of samples and
        `n_features` is the number of features.

    n_components : int, default=None
        Number of components to use. If None is passed, all are used.

    algorithm : {'parallel', 'deflation'}, default='parallel'
        Specify which algorithm to use for FastICA.

    whiten : str or bool, default='unit-variance'
        Specify the whitening strategy to use.

        - If 'arbitrary-variance', a whitening with variance
          arbitrary is used.
        - If 'unit-variance', the whitening matrix is rescaled to ensure that
          each recovered source has unit variance.
        - If False, the data is already considered to be whitened, and no
          whitening is performed.

        .. versionchanged:: 1.3
            The default value of `whiten` changed to 'unit-variance' in 1.3.

    fun : {'logcosh', 'exp', 'cube'} or callable, default='logcosh'
        The functional form of the G function used in the
        approximation to neg-entropy. Could be either 'logcosh', 'exp',
        or 'cube'.
        You can also provide your own function. It should return a tuple
        containing the value of the function, and of its derivative, in the
        point. The derivative should be averaged along its last dimension.
        Example::

            def my_g(x):
                return x ** 3, (3 * x ** 2).mean(axis=-1)

    fun_args : dict, default=None
        Arguments to send to the functional form.
        If empty or None and if fun='logcosh', fun_args will take value
        {'alpha' : 1.0}.

    max_iter : int, default=200
        Maximum number of iterations to perform.

    tol : float, default=1e-4
        A positive scalar giving the tolerance at which the
        un-mixing matrix is considered to have converged.

    w_init : ndarray of shape (n_components, n_components), default=None
        Initial un-mixing array. If `w_init=None`, then an array of values
        drawn from a normal distribution is used.

    whiten_solver : {"eigh", "svd"}, default="svd"
        The solver to use for whitening.

        - "svd" is more stable numerically if the problem is degenerate, and
          often faster when `n_samples <= n_features`.

        - "eigh" is generally more memory efficient when
          `n_samples >= n_features`, and can be faster when
          `n_samples >= 50 * n_features`.

        .. versionadded:: 1.2

    random_state : int, RandomState instance or None, default=None
        Used to initialize ``w_init`` when not specified, with a
        normal distribution. Pass an int, for reproducible results
        across multiple function calls.
        See :term:`Glossary <random_state>`.

    return_X_mean : bool, default=False
        If True, X_mean is returned too.

    compute_sources : bool, default=True
        If False, sources are not computed, but only the rotation matrix.
        This can save memory when working with big data. Defaults to True.

    return_n_iter : bool, default=False
        Whether or not to return the number of iterations.

    Returns
    -------
    K : ndarray of shape (n_components, n_features) or None
        If whiten is 'True', K is the pre-whitening matrix that projects data
        onto the first n_components principal components. If whiten is 'False',
        K is 'None'.

    W : ndarray of shape (n_components, n_components)
        The square matrix that unmixes the data after whitening.
        The mixing matrix is the pseudo-inverse of matrix ``W K``
        if K is not None, else it is the inverse of W.

    S : ndarray of shape (n_samples, n_components) or None
        Estimated source matrix.

    X_mean : ndarray of shape (n_features,)
        The mean over features. Returned only if return_X_mean is True.

    n_iter : int
        If the algorithm is "deflation", n_iter is the
        maximum number of iterations run across all components. Else
        they are just the number of iterations taken to converge. This is
        returned only when return_n_iter is set to `True`.

    Notes
    -----
    The data matrix X is considered to be a linear combination of
    non-Gaussian (independent) components i.e. X = AS where columns of S
    contain the independent components and A is a linear mixing
    matrix. In short ICA attempts to `un-mix' the data by estimating an
    un-mixing matrix W where ``S = W K X.``
    While FastICA was proposed to estimate as many sources
    as features, it is possible to estimate less by setting
    n_components < n_features. It this case K is not a square matrix
    and the estimated A is the pseudo-inverse of ``W K``.

    This implementation was originally made for data of shape
    [n_features, n_samples]. Now the input is transposed
    before the algorithm is applied. This makes it slightly
    faster for Fortran-ordered input.

    References
    ----------
    .. [1] A. Hyvarinen and E. Oja, "Fast Independent Component Analysis",
           Algorithms and Applications, Neural Networks, 13(4-5), 2000,
           pp. 411-430.
    
r:   rb   rc   rd   r7   r8   r5   r9   re   rf   rY   )r]   arbitrary-varianceN)	r   Z_validate_params_fit_transformrc   
whitening_mean_	_unmixingr2   n_iter_)r4   r:   rb   rc   rd   r7   r8   r5   r9   re   rf   rX   rY   rZ   ZestSKX_meanZreturned_valuesr   r   r   r      s4     !

c                       s"  e Zd ZU dZeedddddgeddhgedd	heed
hgedddhe	ge
dgeeddddgeeddddgddgeddhgdgd
Ze
ed< d*dd	dddddddd	 fddZd+ddZeddd,ddZeddd-d d!Zd.d"d#Zd/d$d%Zed&d' Zd(d) Z  ZS )0r   a  FastICA: a fast algorithm for Independent Component Analysis.

    The implementation is based on [1]_.

    Read more in the :ref:`User Guide <ICA>`.

    Parameters
    ----------
    n_components : int, default=None
        Number of components to use. If None is passed, all are used.

    algorithm : {'parallel', 'deflation'}, default='parallel'
        Specify which algorithm to use for FastICA.

    whiten : str or bool, default='unit-variance'
        Specify the whitening strategy to use.

        - If 'arbitrary-variance', a whitening with variance
          arbitrary is used.
        - If 'unit-variance', the whitening matrix is rescaled to ensure that
          each recovered source has unit variance.
        - If False, the data is already considered to be whitened, and no
          whitening is performed.

        .. versionchanged:: 1.3
            The default value of `whiten` changed to 'unit-variance' in 1.3.

    fun : {'logcosh', 'exp', 'cube'} or callable, default='logcosh'
        The functional form of the G function used in the
        approximation to neg-entropy. Could be either 'logcosh', 'exp',
        or 'cube'.
        You can also provide your own function. It should return a tuple
        containing the value of the function, and of its derivative, in the
        point. The derivative should be averaged along its last dimension.
        Example::

            def my_g(x):
                return x ** 3, (3 * x ** 2).mean(axis=-1)

    fun_args : dict, default=None
        Arguments to send to the functional form.
        If empty or None and if fun='logcosh', fun_args will take value
        {'alpha' : 1.0}.

    max_iter : int, default=200
        Maximum number of iterations during fit.

    tol : float, default=1e-4
        A positive scalar giving the tolerance at which the
        un-mixing matrix is considered to have converged.

    w_init : array-like of shape (n_components, n_components), default=None
        Initial un-mixing array. If `w_init=None`, then an array of values
        drawn from a normal distribution is used.

    whiten_solver : {"eigh", "svd"}, default="svd"
        The solver to use for whitening.

        - "svd" is more stable numerically if the problem is degenerate, and
          often faster when `n_samples <= n_features`.

        - "eigh" is generally more memory efficient when
          `n_samples >= n_features`, and can be faster when
          `n_samples >= 50 * n_features`.

        .. versionadded:: 1.2

    random_state : int, RandomState instance or None, default=None
        Used to initialize ``w_init`` when not specified, with a
        normal distribution. Pass an int, for reproducible results
        across multiple function calls.
        See :term:`Glossary <random_state>`.

    Attributes
    ----------
    components_ : ndarray of shape (n_components, n_features)
        The linear operator to apply to the data to get the independent
        sources. This is equal to the unmixing matrix when ``whiten`` is
        False, and equal to ``np.dot(unmixing_matrix, self.whitening_)`` when
        ``whiten`` is True.

    mixing_ : ndarray of shape (n_features, n_components)
        The pseudo-inverse of ``components_``. It is the linear operator
        that maps independent sources to the data.

    mean_ : ndarray of shape(n_features,)
        The mean over features. Only set if `self.whiten` is True.

    n_features_in_ : int
        Number of features seen during :term:`fit`.

        .. versionadded:: 0.24

    feature_names_in_ : ndarray of shape (`n_features_in_`,)
        Names of features seen during :term:`fit`. Defined only when `X`
        has feature names that are all strings.

        .. versionadded:: 1.0

    n_iter_ : int
        If the algorithm is "deflation", n_iter is the
        maximum number of iterations run across all components. Else
        they are just the number of iterations taken to converge.

    whitening_ : ndarray of shape (n_components, n_features)
        Only set if whiten is 'True'. This is the pre-whitening matrix
        that projects data onto the first `n_components` principal components.

    See Also
    --------
    PCA : Principal component analysis (PCA).
    IncrementalPCA : Incremental principal components analysis (IPCA).
    KernelPCA : Kernel Principal component analysis (KPCA).
    MiniBatchSparsePCA : Mini-batch Sparse Principal Components Analysis.
    SparsePCA : Sparse Principal Components Analysis (SparsePCA).

    References
    ----------
    .. [1] A. Hyvarinen and E. Oja, Independent Component Analysis:
           Algorithms and Applications, Neural Networks, 13(4-5), 2000,
           pp. 411-430.

    Examples
    --------
    >>> from sklearn.datasets import load_digits
    >>> from sklearn.decomposition import FastICA
    >>> X, _ = load_digits(return_X_y=True)
    >>> transformer = FastICA(n_components=7,
    ...         random_state=0,
    ...         whiten='unit-variance')
    >>> X_transformed = transformer.fit_transform(X)
    >>> X_transformed.shape
    (1797, 7)
    r(   Nleft)closedr\   	deflationri   r]   Fr^   rQ   cubeg        rV   r   ra   rf   rg   _parameter_constraintsr_   r`   )	rb   rc   rd   r7   r8   r5   r9   re   rf   c       	            sJ   t    || _|| _|| _|| _|| _|| _|| _|| _	|	| _
|
| _d S N)super__init__r:   rb   rc   rd   r7   r8   r5   r9   re   rf   )selfr:   rb   rc   rd   r7   r8   r5   r9   re   rf   	__class__r   r   ry     s    
zFastICA.__init__c                    s   j | jtjtjgddj} jdkr,i n j}t j}|	dd}d|  kr\dksfn t
d jdkrvt}n6 jd	krt}n& jd
krt}nt jr fdd}|j\}}	 j}
 js|
dk	rd}
td |
dkrt|	|}
|
t|	|krt|	|}
td|
   jr:|jdd}||ddtjf 8 } jdkrt||\}}t|ddd }t|jj}||k }t|rtd |||< tj ||d || |dd|f  }}n( jdkrtj!|ddddd \}}|t"|d 9 }|| jd|
 }~~t||}|t |	9 }nt#|dd} j$}|dkrttj%|j&|
|
fd|jd}n.t%|}|j|
|
fkrt
dd|
|
fi  j'|| j(|d} j)dkrt*|f|\}}n j)dkrt+|f|\}}~| _,|r, jrtj-|||gj}nt||j}nd} jr jd kr|s^tj-|||gj}tj.|dd!d"}|| }||j }t|| _/| _0| _1n| _/tj2 j/dd# _3| _4|S )$ad  Fit the model.

        Parameters
        ----------
        X : array-like of shape (n_samples, n_features)
            Training data, where `n_samples` is the number of samples
            and `n_features` is the number of features.

        compute_sources : bool, default=False
            If False, sources are not computes but only the rotation matrix.
            This can save memory when working with big data. Defaults to False.

        Returns
        -------
        S : ndarray of shape (n_samples, n_components) or None
            Sources matrix. `None` if `compute_sources` is `False`.
        r   )r.   r"   Zensure_min_samplesNrG   r   r(   zalpha must be in [1,2]r^   rQ   ru   c                    s    j | f|S rw   )rd   rT   rz   r   r   r6   9  s    z!FastICA._fit_transform.<locals>.gz(Ignoring n_components with whiten=False.z/n_components is too large: it will be set to %srP   r)   r   zfThere are some small singular values, using whiten_solver = 'svd' might lead to more accurate results.)outra   F)Zfull_matricescheck_finiter   )r.   )sizer'   z/w_init has invalid shape -- should be %(shape)sr+   )r5   r6   r7   r8   r9   r\   rt   r]   T)r*   Zkeepdims)r   )5_validate_datarc   r   float64float32r   r7   r   rf   rH   
ValueErrorrd   rO   rR   rU   callabler+   r:   rC   rD   minr0   rB   re   r   r   r    Zargsortr!   r"   epsanyr#   ra   signr   r9   Zasarraynormalr5   r8   rb   rF   r@   rn   r   Zstdcomponents_rl   rk   Zpinvmixing_rm   )rz   r4   rY   ZXTr7   rf   rG   r6   Z
n_featuresZ	n_samplesr:   rq   dr%   Zsort_indicesr   Zdegenerate_idxrp   ZX1r9   kwargsr   r;   ro   ZS_stdr   r}   r   rj     s      
 









 


zFastICA._fit_transformTr[   c                 C   s   | j |ddS )a5  Fit the model and recover the sources from X.

        Parameters
        ----------
        X : array-like of shape (n_samples, n_features)
            Training data, where `n_samples` is the number of samples
            and `n_features` is the number of features.

        y : Ignored
            Not used, present for API consistency by convention.

        Returns
        -------
        X_new : ndarray of shape (n_samples, n_components)
            Estimated sources obtained by transforming the data with the
            estimated unmixing matrix.
        Trh   rj   rz   r4   yr   r   r   fit_transform  s    zFastICA.fit_transformc                 C   s   | j |dd | S )a  Fit the model to X.

        Parameters
        ----------
        X : array-like of shape (n_samples, n_features)
            Training data, where `n_samples` is the number of samples
            and `n_features` is the number of features.

        y : Ignored
            Not used, present for API consistency by convention.

        Returns
        -------
        self : object
            Returns the instance itself.
        Frh   r   r   r   r   r   fit  s    zFastICA.fitc                 C   sH   t |  | j||o| jtjtjgdd}| jr8|| j8 }t|| jj	S )a_  Recover the sources from X (apply the unmixing matrix).

        Parameters
        ----------
        X : array-like of shape (n_samples, n_features)
            Data to transform, where `n_samples` is the number of samples
            and `n_features` is the number of features.

        copy : bool, default=True
            If False, data passed to fit can be overwritten. Defaults to True.

        Returns
        -------
        X_new : ndarray of shape (n_samples, n_components)
            Estimated sources obtained by transforming the data with the
            estimated unmixing matrix.
        F)r.   r"   reset)
r   r   rc   r   r   r   rl   r    r   r   rz   r4   r.   r   r   r   	transform  s      
 
zFastICA.transformc                 C   sH   t |  t||o| jtjtjgd}t|| jj}| jrD|| j	7 }|S )a1  Transform the sources back to the mixed data (apply mixing matrix).

        Parameters
        ----------
        X : array-like of shape (n_samples, n_components)
            Sources, where `n_samples` is the number of samples
            and `n_components` is the number of components.
        copy : bool, default=True
            If False, data passed to fit are overwritten. Defaults to True.

        Returns
        -------
        X_new : ndarray of shape (n_samples, n_features)
            Reconstructed data obtained with the mixing matrix.
        )r.   r"   )
r   r   rc   r   r   r   r    r   r   rl   r   r   r   r   inverse_transform  s    
zFastICA.inverse_transformc                 C   s   | j jd S )z&Number of transformed output features.r   )r   r+   r}   r   r   r   _n_features_out  s    zFastICA._n_features_outc                 C   s   dt jt jgiS )NZpreserves_dtype)r   r   r   r}   r   r   r   
_more_tags  s    zFastICA._more_tags)N)F)N)N)T)T)__name__
__module____qualname____doc__r   r   r   r   boolr   dictr   rv   __annotations__ry   rj   r	   r   r   r   r   propertyr   r   __classcell__r   r   r{   r   r   c  sL   
 	

 
 


)N)N)%r   rC   Znumbersr   r   Znumpyr   Zscipyr   baser   r   r   r	   
exceptionsr
   utilsr   r   r   Zutils._param_validationr   r   r   r   Zutils.validationr   __all__r   r&   r@   rF   rO   rR   rU   r   r   r   r   r   r   <module>   sT   #"
  7