U
    Kvf%                     @   sn   d Z ddlZddlmZ ddlZddlZddlm	Z	 ddl
mZ ddlmZ G dd	 d	e	ZG d
d dZdS )z4Canonical correlation analysis

author: Yichuan Liu
    N)svd)Model)summary2   )multivariate_statsc                       s4   e Zd ZdZd fdd	ZdddZd	d
 Z  ZS )CanCorra  
    Canonical correlation analysis using singular value decomposition

    For matrices exog=x and endog=y, find projections x_cancoef and y_cancoef
    such that:

        x1 = x * x_cancoef, x1' * x1 is identity matrix
        y1 = y * y_cancoef, y1' * y1 is identity matrix

    and the correlation between x1 and y1 is maximized.

    Attributes
    ----------
    endog : ndarray
        See Parameters.
    exog : ndarray
        See Parameters.
    cancorr : ndarray
        The canonical correlation values
    y_cancoef : ndarray
        The canonical coefficients for endog
    x_cancoef : ndarray
        The canonical coefficients for exog

    References
    ----------
    .. [*] http://numerical.recipes/whp/notes/CanonCorrBySVD.pdf
    .. [*] http://www.csun.edu/~ata20315/psy524/docs/Psy524%20Lecture%208%20CC.pdf
    .. [*] http://www.mathematica-journal.com/2014/06/canonical-correlation-analysis/
    :0yE>noneNc                    s.   t t| j||f||d| | | d S )N)missinghasconst)superr   __init___fit)selfendogexog	tolerancer
   r   kwargs	__class__ D/tmp/pip-unpacked-wheel-2v6byqio/statsmodels/multivariate/cancorr.pyr   0   s    zCanCorr.__init__c                    sv  | j j\}}| jj\}}t||g}t| j}||d }t| j }||d }t|d\}}	}
|
j}|	|k}|	 t
|k rtd|dd|f  |	|   < t|d\}}}|j}||k}|	 t
|k rtd|dd|f  ||   < t|j|d\} }t fddtt
 D | _||ddd|f | _||jddd|f | _dS )a=  Fit the model

        A ValueError is raised if there are singular values smaller than the
        tolerance. The treatment of singular arrays might change in future.

        Parameters
        ----------
        tolerance : float
            eigenvalue tolerance, values smaller than which is considered 0
        r   zexog is collinear.Nzendog is collinear.c                    s    g | ]}t d t | dqS )r   r   )maxmin).0isr   r   
<listcomp>Z   s     z CanCorr._fit.<locals>.<listcomp>)r   shaper   npr   arrayZmeanr   Tsumlen
ValueErrordotrangecancorrZ	x_cancoefZ	y_cancoef)r   r   nobsk_yvark_xvarkxyZuxZsxZvxZvx_dsmaskZuyZsyZvyZvy_dsuvr   r   r   r   5   s.    "zCanCorr._fitc                 C   s  | j j\}}| jj\}}t| jd}tjddddddgtt	t
|d d	d	d
}d}t	t
|d d	d	D ]B}|d||  9 }|| }|| }	|| d ||	 d d  }
||	 d d }||	 }|d |	d  d dkrt||	 d d |d |	d  d  }nd}|
| d|  }t|d| }d| | | | }| j| |j|df< ||j|df< ||j|df< ||j|df< ||j|df< tjj|||}||j|df< ql|jjddd	 }|j|ddf }t||||| d }t||S )aI  Approximate F test
        Perform multivariate statistical tests of the hypothesis that
        there is no canonical correlation between endog and exog.
        For each canonical correlation, testing its significance based on
        Wilks' lambda.

        Returns
        -------
        CanCorrTestResults instance
           zCanonical CorrelationzWilks' lambdazNum DFzDen DFzF ValuezPr > Fr   )columnsindex      r   N)r   r   r   r    powerr(   pdZ	DataFramelistr'   r$   sqrtlocscipystatsfZsfr5   valuesr   CanCorrTestResults)r   r)   r*   r+   Z	eigenvalsr>   prodr   pqrr0   Zdf1tZdf2ZlmdFZpvalindstats_mvr   r   r   	corr_test_   sR       ,
  
zCanCorr.corr_test)r   r	   N)r   )__name__
__module____qualname____doc__r   r   rJ   __classcell__r   r   r   r   r      s   
*r   c                   @   s(   e Zd ZdZdd Zdd Zdd ZdS )	rA   z
    Canonical correlation results class

    Attributes
    ----------
    stats : DataFrame
        Contain statistical tests results for each canonical correlation
    stats_mv : DataFrame
        Contain the multivariate statistical tests results
    c                 C   s   || _ || _d S N)r>   rI   )r   r>   rI   r   r   r   r      s    zCanCorrTestResults.__init__c                 C   s   |    S rP   )summary__str__)r   r   r   r   rR      s    zCanCorrTestResults.__str__c                 C   sJ   t  }|d || j |ddi |ddi || j |S )NzCancorr results z,Multivariate Statistics and F Approximations)r   ZSummaryZ	add_titleZadd_dfr>   Zadd_dictrI   )r   Zsummr   r   r   rQ      s    
zCanCorrTestResults.summaryN)rK   rL   rM   rN   r   rR   rQ   r   r   r   r   rA      s   
rA   )rN   Znumpyr    Znumpy.linalgr   r=   Zpandasr9   Zstatsmodels.base.modelr   Zstatsmodels.iolibr   Zmultivariate_olsr   r   rA   r   r   r   r   <module>   s    	