U
    Kvf                     @   s  d Z ddlZddlmZ ddlmZ ddlm	Z	 G dd dZ
edkrdgZdekrd	Zejejjed
fdeedff Zejeed
d
dedddd f jZeddddgddddgddddggZeddddgddddgddddggZeeeZedejjejd 7 ZeedddgZedejjejd  Ze
eeZee   ed ej!dddd ee   dS )zQ
Created on Sun Nov 14 08:21:41 2010

Author: josef-pktd
License: BSD (3-clause)
    N)pca)LeaveOneOutc                   @   s<   e Zd ZdZdd ZdddZd	d
 ZdddZdd ZdS )FactorModelUnivariatea  

    Todo:
    check treatment of const, make it optional ?
        add hasconst (0 or 1), needed when selecting nfact+hasconst
    options are arguments in calc_factors, should be more public instead
    cross-validation is slow for large number of observations
    c                 C   s   t || _t || _d S )N)npasarrayendogexog)selfr   r    r
   M/tmp/pip-unpacked-wheel-2v6byqio/statsmodels/sandbox/datarich/factormodels.py__init__   s    zFactorModelUnivariate.__init__Nr   Tc                 C   sn   |dkr| j }n
t|}t||dd\}}}}|| _|rRtj|dd| _d| _n|| _d| _|| _	|| _
dS )zget factor decomposition of exogenous variables

        This uses principal component analysis to obtain the factors. The number
        of factors kept is the maximum that will be considered in the regression.
        N   )keepdim	normalizeT)prependr   )r   r   r   r   Zexog_reducedsmZadd_constantfactorshasconstevalsevecs)r	   xr   ZaddconstZxredfactr   r   r
   r
   r   calc_factors!   s    
z"FactorModelUnivariate.calc_factorsc                 C   s:   t | ds|   t| j| jd d d |d f  S )NZfactors_wconstr   )hasattrr   r   OLSr   r   fit)r	   Znfactr
   r
   r   fit_fixed_nfact8   s    
z%FactorModelUnivariate.fit_fixed_nfactc                 C   s  t | ds|   | j}|dkr0| jjd | }|| dk rDtdt|d}| j}g }td|| D ]}| jddd|f }t	
|| }	|s
|dkrtt|}d}
|D ]T\}}t	
|| ||ddf  }|
|| |j|j||ddf  d 7 }
qntj}
|||	j|	j|	j|
g qft| | _}tjt|ddddf d	t|dddf d	t|ddd
f d	f | _dS )aW  estimate the model and selection criteria for up to maxfact factors

        The selection criteria that are calculated are AIC, BIC, and R2_adj. and
        additionally cross-validation prediction error sum of squares if `skip_crossval`
        is false. Cross-validation is not used by default because it can be
        time consuming to calculate.

        By default the cross-validation method is Leave-one-out on the full dataset.
        A different cross-validation sample can be specified as an argument to
        cv_iter.

        Results are attached in `results_find_nfact`



        r   Nr   zFnothing to do, number of factors (incl. constant) should be at least 1
                  @   r   )r   r   r   r   shape
ValueErrorminr   ranger   r   r   r   lenmodelZpredictparamsr   nanappendZaicZbicZrsquared_adjarrayresults_find_nfactZr_ZargminZargmax
best_nfact)r	   maxfactskip_crossvalcv_iterr   y0resultskr   resZprederr2ZinidxZoutidxZres_l1or
   r
   r   fit_find_nfact=   s<    

	 
4z$FactorModelUnivariate.fit_find_nfactc                 C   s   t | ds|   | j}d}|d7 }|ddt| j  7 }ddlm} dd	}d
gdgd  }t|d}|||d|d}|d7 }|d7 }|d|	  7 }|d7 }|d7 }|d7 }|d7 }|S )zprovides a summary for the selection of the number of factors

        Returns
        -------
        sumstr : str
            summary of the results for selecting the number of factors

        r,    z,
Best result for k, by AIC, BIC, R2_adj, L1Oz
                   z%5d %4d %6d %5dr   )SimpleTablezk, AIC, BIC, R2_adj, L1Oz, z%6dz%10.3f   )Z	data_fmtsN)Ztxt_fmtz"
PCA regression on simulated data,z+
DGP: 2 factors and 4 explanatory variables
z)
Notes: k is number of components of PCA,z&
       constant is added additionallyz-
       k=0 means regression on constant onlyz?
       L1O: sum of squared prediction errors for leave-one-out)
r   r5   r,   tupler-   Zstatsmodels.iolib.tabler7   splitdict__str__)r	   r2   Zsumstrr7   headersZ	numformatZtxt_fmt1Ztablr
   r
   r   summary_find_nfact   s&    	


z(FactorModelUnivariate.summary_find_nfact)Nr   T)NTN)	__name__
__module____qualname____doc__r   r   r   r5   r?   r
   r
   r
   r   r      s   

Br   __main__r   i     )sizer8   r!   g      ?r   g      @r   g?g      ?zwith cross validation - slowerF)r.   r/   r0   )"rC   Znumpyr   Zstatsmodels.apiapir   Zstatsmodels.sandbox.toolsr   Z#statsmodels.sandbox.tools.cross_valr   r   r@   ZexamplesZnobsZc_randomnormalZonesZf0repeatZeyeZarangeTZf2xcoefr+   dotZx0r"   Zytruer1   modprintr?   r5   r
   r
   r
   r   <module>   s:    

&0




