U
    Kvf|  ã                   @   s   d dl ZG dd„ dƒZdS )é    Nc                   @   s   e Zd ZdZdZdd„ Zd$dd„Zdd	„ Zd
d„ Zdd„ Z	dd„ Z
dd„ Zd%dd„Zd&dd„Zdd„ Zdd„ Zd'dd„Zd(d d!„Zd)d"d#„ZdS )*ÚPcazˆ
    A basic class for Principal Component Analysis (PCA).

    p is the number of dimensions, while N is the number of data points
    )ÚrÚgÚbÚcÚyÚmÚkc                 C   s@   | j }|tj|dd }|tj|dd }|| _|| _d | _d S )Nr   ©Zaxis)ÚAÚnpÚmeanÚstdÚMÚNÚ_eig)Úselfr   r   r   © r   ú;/tmp/pip-unpacked-wheel-2v6byqio/statsmodels/sandbox/pca.pyZ__calc   s    z
Pca.__calcNc                 C   sÈ   t  |¡j}|j\}}|| | _| _||krBddlm} |dtƒ || _	| 
¡ | _|  ¡  t  | jt|d t| jƒ ƒd ¡d|… | _|dk	r¤t|ƒ|kr¤tdƒ‚|dkr°dntdd„ |D ƒƒ| _dS )	z$
        p X N matrix input
        r   )Úwarnzp > n - intentional?é   Nznames must match data dimensionc                 S   s   g | ]}t |ƒ‘qS r   )Ústr)Ú.0Úxr   r   r   Ú
<listcomp>(   s     z Pca.__init__.<locals>.<listcomp>)r   ÚarrayÚTÚshapeÚnÚpÚwarningsr   ÚRuntimeWarningr   ÚcopyÚ_origAÚ
_Pca__calcZtileÚ_colorsÚintÚlenÚ
ValueErrorÚtupleÚnames)r   Údatar*   r   r   r   r   r   r   r   Ú__init__   s    


.zPca.__init__c                 C   s   t  | jj¡S )z?
        returns the covariance matrix for the dataset
        )r   Zcovr   r   ©r   r   r   r   ÚgetCovarianceMatrix+   s    zPca.getCovarianceMatrixc                 C   s^   | j dkrXtj |  ¡ ¡}t |d ¡ddd… }|d | |d dd…|f f}|| _ | j S )zQ
        returns a tuple of (eigenvalues,eigenvectors) for the data set.
        Nr   éÿÿÿÿr   )r   r   ÚlinalgZeigr.   Zargsort)r   ÚresZsortir   r   r   ÚgetEigensystem1   s    
 zPca.getEigensystemc                 C   s   |   ¡ d S )Nr   ©r2   r-   r   r   r   ÚgetEigenvalues<   s    zPca.getEigenvaluesc                 C   s   |   ¡ d S )Nr   r3   r-   r   r   r   ÚgetEigenvectors?   s    zPca.getEigenvectorsc                 C   s   |   ¡ }|t |¡ S )z=
        "energies" are just normalized eigenvectors
        )r4   r   Úsum)r   Úvr   r   r   ÚgetEnergiesB   s    zPca.getEnergiesr   r   Tc              
   C   s  ddl m} | jdd…|f | jdd…|f  }}|r>| ¡  | ||¡ |  ¡ \}}| ¡ \}	}
| ¡ \}}|
|	 ||  }}t||j	| j
ƒD ]B\}}}|jdd|||  |||  d|| d d  ||d q| jdk	r| d| j|  d ¡ | d| j|  d ¡ dS )	zü
        Generates a 2-dimensional plot of the data set and principle components
        using matplotlib.

        ix specifies which p-dimension to put on the x-axis of the plot
        and iy specifies which to put on the y-axis (0-indexed)
        r   Ngš™™™™™©?é   g      à?)Z
head_widthZfcZecú$z/\sigma$)Zmatplotlib.pyplotZpyplotr   ÚclfZscatterr2   ZxlimZylimÚzipr   r%   Zarrowr*   ÚxlabelÚylabel)r   ÚixÚiyr;   Zpltr   r   ÚvalsZevsZxlZxuZylZyuZdxZdyÚvalZvecr   r   r   r   Úplot2dI   s    &:z
Pca.plot2dé   c              	   C   sÚ   ddl m  m} |r| ¡  t d¡}|  ¡ |  ¡  }|j||||| || || dd |j	| j
dd…|f | j
dd…|f | j
dd…|f dd | jrÎ|j| j| d | j| d | j| d d n| ¡  dS )	zõ
        Generates a 3-dimensional plot of the data set and principle components
        using mayavi.

        ix, iy, and iz specify which of the input p-dimensions to place on each of
        the x,y,z axes, respectively (0-indexed).
        r   Né   é   )Zscale_factorg333333Ó?z/sigma)r=   r>   Zzlabel)Zenthought.mayavi.mlabZmayaviZmlabr;   r   Úzerosr5   r4   Zquiver3dZpoints3dr   r*   Zaxes)r   r?   r@   Zizr;   r   Zz3r7   r   r   r   Úplot3dc   s    
$<0z
Pca.plot3dc                 C   sz   t  |¡r |t  | jjd ¡ }|t j| jdd }| jjd }t jt  | j¡|k dd}| j| | _|  	¡  |t
|ƒ S )a   
        clips out all data points that are more than a certain number
        of standard deviations from the mean.

        sigs can be either a single value or a length-p sequence that
        specifies the number of standard deviations along each of the
        p dimensions.
        r   r
   r   )r   ZisscalarZonesr   r   r   ÚallÚabsr   r$   r6   )r   Zsigsr   r   r   r   r   Úsigclipw   s    	
zPca.sigclipc                 C   s   | j  ¡ | _|  ¡  d S ©N)r#   r"   r   r$   r-   r   r   r   Úreset‰   s    z	Pca.resetc                 C   sè   t dd„ |||fD ƒƒ}|dkr*tdƒ}n`|dkr<tdƒ‚nN|dk	rR|  ¡ |k}n8|dk	rftd|ƒ}n$|dk	r‚t |  ¡ ¡|k }ntdƒ‚|dkrœ| jj}n.tj	|dd	}| jjj
d |j
d krÊtd
ƒ‚t |  ¡ ¡j| }|| jS )aD  
        projects the normalized values onto the components

        enthresh, nPCs, and cumen determine how many PCs to use

        if vals is None, the normalized data vectors are the values to project.
        Otherwise, it should be convertable to a p x N array

        returns n,p(>threshold) dimension array
        c                 S   s   g | ]}|d k	‘qS rL   r   )r   Úer   r   r   r   ™   s     zPca.project.<locals>.<listcomp>r   Nr   z&cannot specify more than one thresholdzShould be unreachableF)r"   zshape for vals does not match)r6   Úslicer(   Zenergiesr   ZcumsumÚRuntimeErrorr   r   r   r   Úmatrixr5   )r   rA   ZenthreshZnPCsZcumenZnonnonesr   Úprojr   r   r   ÚprojectŽ   s&    


zPca.projectc                 C   sÀ   t  |¡}|j\}}| jjd }||kr0tdƒ‚t j t  |  ¡ ¡j	¡}t  
||f¡}||dd…d|…f< ||j	 }|r†t  |j	¡j	S t j| jdd}	t j| jdd}
t  |j	¡|
 |	 j	S dS )zP
        input is an n X q array, where q <= p

        output is p X n
        r   zq > pNr   r
   )r   Z
atleast_2dr   r   r(   r0   ÚinvrQ   r5   r   rG   r   r   r   r   )r   r   Únormedr   Úqr   ZevinvZzsrR   ZmnsZsdsr   r   r   Ú	deproject±   s    


zPca.deprojectc                 C   s¢   |dkr| j }n$|j}|jd | j jd kr4tdƒ‚|  ¡ }t |¡}|dd…|f |dd…|f< |  |d¡}|j| }|jtj| j	dd }|tj
| j dd S )z¯
        pc can be a scalar or any sequence of pc indecies

        if vals is None, the source data is self.A, else whatever is in vals
        (which must be p x m)
        Nr   z1vals do not have the correct number of componentsFr   r
   )r   r   r   r(   rS   r   Z
zeros_likerW   r   r   r   )r   ZpcrA   ZpcsZzpcsZupcr   ÚBr   r   r   Ú
subtractPCË   s    

zPca.subtractPC)N)r   r   T)r   r   rD   T)NNNN)T)N)Ú__name__Ú
__module__Ú__qualname__Ú__doc__r%   r$   r,   r.   r2   r4   r5   r8   rC   rH   rK   rM   rS   rW   rY   r   r   r   r   r      s    	



#
r   )Znumpyr   r   r   r   r   r   Ú<module>   s   