U
    Kvf]_                     @  s   d Z ddlmZ ddlmZ ddlmZ ddlZddl	m
Z
mZmZmZ ddlm  mZ ddlmZmZ ddlmZ d	d
 Zdd Zdd ZG dd dZG dd deZG dd deZdd Zdd Zd ddZdd Z d!ddZ!dS )"zr
Base tools for handling various kinds of data structures, attaching metadata to
results, and doing data cleaning
    )annotations)lmap)reduceN)	DataFrameSeriesisnull
MultiIndex)cache_readonlycache_writable)MissingDataErrorc                 C  s,   t | jdkr(t |  jdkr(d S d S N   )npasarrayndimsqueezex r   9/tmp/pip-unpacked-wheel-2v6byqio/statsmodels/base/data.py_asarray_2dcolumns   s    $r   c                 C  sB   t | } | jdkr$| dddf } t jt| dddddf S )zy
    Makes sure input is an array and is 2d. Makes sure output is 2d. True
    indicates a null in the rows of 2d x.
    r   NZaxis)r   r   r   anyr   r   r   r   r   _asarray_2d_null_rows   s    

r   c                  G  s0   t | dkr| dggf7 } dd }t||  S )z
    Returns a boolean array which is True where any of the rows in any
    of the _2d_ arrays in arrs are NaNs. Inputs can be any mixture of Series,
    DataFrames or array_like.
    r   Fc                 S  s0   t | do| jtko| }tt| |t|B S )Ndtype)hasattrr   boolr   
logical_orr   )r   yZx_is_boolean_arrayr   r   r   _nan_row_maybe_two_inputs.   s    

z,_nan_rows.<locals>._nan_row_maybe_two_inputs)lenr   r   )Zarrsr   r   r   r   	_nan_rows%   s    r!   c                   @  sJ  e Zd ZdZdZdZdCddZdd Zdd	 Zd
d Z	e
dd Ze
dd Ze
dd Zdd Ze dd Ze ddddZedd Zejdd Zedd Zejdd Zed d! Zd"d# Zd$d% Zd&d' Zd(d) Zd*d+ ZdDd-d.Zd/d0 Zd1d2 Zd3d4 Z d5d6 Z!d7d8 Z"d9d: Z#d;d< Z$d=d> Z%d?d@ Z&dAdB Z'dS )E	ModelDatazi
    Class responsible for handling input data and extracting metadata into the
    appropriate form
    Nnonec           	      K  s   t |st |r(ddlm} t|d|kr<|d| _d|krP|d| _|dkr| j|||f|\}}|| _	| j
| | j| _| j| _| | j| j\| _| _n,| j
| || _|| _| ||\| _| _d | _d| _| | |   i | _d S )Nr   )recarray_exceptiondesign_infoformular#   )	data_utilZ_is_recarraystatsmodels.tools.sm_exceptionsr$   NotImplementedErrorpopr%   r&   handle_missingZmissing_row_idx__dict__updateendog
orig_endogexog	orig_exog_convert_endog_exog	const_idx
k_constant_handle_constant_check_integrity_cache)	selfr.   r0   missinghasconstkwargsr$   ZarraysZnan_idxr   r   r   __init__>   s6    
zModelData.__init__c                 C  s0   ddl m } || j}d|kr,|d= d|d< |S )Nr   )copyr%   Trestore_design_info)r=   r,   )r8   r=   dr   r   r   __getstate__\   s    
zModelData.__getstate__c           
      C  s   d|krddl m}m} g }z|d }W n& tk
rN   |d |d }Y nX dD ]X}z"||d ||d	d
\}}W  qW qT t|fk
r }	 z||	 W 5 d }	~	X Y qTX qT|d |j| _|d= | j	| d S )Nr>   r   )	dmatrices
PatsyErrorframer/   r1   )      r   r      r&   Z	dataframe)Zeval_envreturn_type)
ZpatsyrA   rB   KeyErrorjoin	NameErrorappendr%   r,   r-   )
r8   r?   rA   rB   excdatadepth_Zdesigner   r   r   __setstate__d   s(    


zModelData.__setstate__c                 C  s  |dks| j d kr"d| _d | _nd}tj| j dd}t| sLtdtj| j dd}t	||kd 
 }|j| _| jdkr| j d d |f  dkrt|| _nd}n| jdkr@g }|D ]B}| j d d |f  }|dkrd| _t|| _ qR|| qt|dk}	|	 r:d| _t||	  | _nd}n| jdkrRd}n |r|stt| j jd | j f}
tj|
}tj| j }t||k| _d | _n|rd| _d S )NFr   r   zexog contains inf or nansr   T)r0   r4   r3   r   maxisfiniteallr   minwherer   sizeZmeanintrL   arrayr   ZargmaxZcolumn_stackZonesshapeZlinalgZmatrix_rank)r8   r:   Zcheck_implicitZexog_maxZexog_minr3   valuesidxvalueposZaugmented_exogZ	rank_augmZ	rank_origr   r   r   r5   }   sP    



zModelData._handle_constantc                 C  s   || S Nr   clsr   nan_maskr   r   r   
_drop_nans   s    zModelData._drop_nansc                 C  s   || d d |f S r`   r   ra   r   r   r   _drop_nans_2d   s    zModelData._drop_nans_2dc                   s  g } dd}|dk	r4d}g }|dkrd|dg7 }n0|dk	rN||f}ddg}n|f}dg}|dg7 }d}	g }
tr* D ]\}}|dkst|dkr||g7 }q~|jdkr|t|f7 }||g7 }q~| jdkr|t|f7 }||g7 }q~|jdkr |	t|f7 }	|
|g7 }
q~td	q~|dk	r|d}|rzt| }|j	d j	d krhtd
|  }|O |	rt|	}|j	d j	d krtd|dk	r||  O }n
|  }|O n(t| |	rtdddf f|	  t
stt||}|	r0|tt|
|	 |rN|fdd|D  |dk	r~|d|i |dk	r~|d|i |g fS |dkrtdn|dkr  fdd} fdd}tt|t||}|dk	r>|dk	r| } ||}|dk	r ||}|d|i |dk	r>|d|i |	r^|tt|
t||	 |r||fdd|D  |t d  fS td| dS )zu
        This returns a dictionary with keys endog, exog and the keys of
        kwargs. It preserves Nones.
        missing_idxNr   r0   r.   r   r   rD   z6Arrays with more than 2 dimensions are not yet handledzBShape mismatch between endog/exog and extra arrays given to model.zEShape mismatch between endog/exog and extra 2d arrays given to model.c                   s   i | ]}|  |d qS r`   get.0kr;   r   r   
<dictcomp>  s    z,ModelData.handle_missing.<locals>.<dictcomp>raisez!NaNs were encountered in the dataZdropc                   s     | S r`   )rd   r   rb   rc   r   r   <lambda>"      z*ModelData.handle_missing.<locals>.<lambda>c                   s     | S r`   )re   r   ro   r   r   rp   #  rq   c                   s   i | ]}|  |d qS r`   rg   ri   rl   r   r   rm   6  s    z missing option %s not understood)r*   r    itemsr   r   r   r   
ValueErrorr!   r[   r   dictzipr-   r   r   rd   rW   tolist)rb   r.   r0   r9   r;   Znone_array_namesrf   ZcombinedZcombined_namesZcombined_2dZcombined_2d_nameskeyZvalue_arrayZupdated_row_maskZcombined_nansZcombined_2d_nansZ	drop_nansZdrop_nans_2dr   )rb   r;   rc   r   r+      s    



















zModelData.handle_missingc                 C  sT   |  |}d }|d k	rL| |}|jdkr:|d d d f }|jdkrLtd||fS )Nr   rD   zexog is not 1d or 2d)	_get_yarr	_get_xarrr   rs   )r8   r.   r0   ZyarrZxarrr   r   r   r2   =  s    



zModelData._convert_endog_exogc                 C  s>   | j }| |}|st| j}t|dkr2|d S t|S d S )Nr   r   )r/   
_get_names_make_endog_namesr.   r    list)r8   r.   ynamesr   r   r   r}   K  s    

zModelData.ynameszlist[str] | None)returnc                 C  s2   | j }|d k	r.| |}|s&t| j}t|S d S r`   )r1   rz   _make_exog_namesr0   r|   )r8   r0   xnamesr   r   r   r   W  s    

zModelData.xnamesc                 C  s   | j p
| jS r`   )_param_namesr   r8   r   r   r   param_namesa  s    zModelData.param_namesc                 C  s
   || _ d S r`   )r   )r8   r\   r   r   r   r   f  s    c                 C  s   | j dk	r| j S | jS )z
        Labels for covariance matrices

        In multidimensional models, each dimension of a covariance matrix
        differs from the number of param_names.

        If not set, returns param_names
        N)
_cov_namesr   r   r   r   r   	cov_namesj  s    
zModelData.cov_namesc                 C  s
   || _ d S r`   )r   )r8   r^   r   r   r   r   y  s    c                 C  s.   | j }|d k	r| |}n| j}| |}|S r`   )r1   _get_row_labelsr/   )r8   r0   
row_labelsr.   r   r   r   r   ~  s    
zModelData.row_labelsc                 C  s   d S r`   r   r8   Zarrr   r   r   r     s    zModelData._get_row_labelsc                 C  st   t |tr2t |jtr&dd |jD S t|jS n>t |trP|jrJ|jgS d S n z
|jjW S  t	k
rn   Y nX d S )Nc                 S  s    g | ]}d  dd |D qS )rP   c                 s  s   | ]}|r|V  qd S r`   r   )rj   levelr   r   r   	<genexpr>  s      z2ModelData._get_names.<locals>.<listcomp>.<genexpr>)rJ   )rj   cr   r   r   
<listcomp>  s   z(ModelData._get_names.<locals>.<listcomp>)

isinstancer   columnsr   r|   r   namer   namesAttributeErrorr   r   r   r   rz     s    


zModelData._get_namesc                 C  sZ   t |rt |}t|}t|dkrR|jdkr8|S |jdkrRt| gS | S r   )r'   _is_structured_ndarraystruct_to_ndarrayr   r   r    r   r   )r8   r.   r   r   r   rx     s    




zModelData._get_yarrc                 C  s   t |rt |}t|S r`   )r'   r   r   r   r   )r8   r0   r   r   r   ry     s    

zModelData._get_xarrc                 C  s*   | j d k	r&t| j t| jkr&tdd S )Nz+endog and exog matrices are different sizes)r0   r    r.   rs   r   r   r   r   r6     s    
zModelData._check_integrityr   c                 C  s   |dkr|  |S |dkr$| |S |dkr6| |S |dkrH| |S |dkrZ| |S |dkrl| |S |dkr| ||S |dkr| ||S |d	kr| |S |d
kr| 	|S |S d S )Nr   ZrowsZcovdatesZ
columns_eqZcov_eqZgeneric_columnsZgeneric_columns_2dr}   Zmultivariate_confint)
attach_columnsattach_rows
attach_covattach_datesattach_columns_eqattach_cov_eqattach_generic_columnsattach_generic_columns_2dattach_ynamesattach_mv_confint)r8   objhowr   r   r   r   wrap_output  s*    







zModelData.wrap_outputc                 C  s   |S r`   r   r8   resultr   r   r   r     s    zModelData.attach_columnsc                 C  s   |S r`   r   r   r   r   r   r     s    zModelData.attach_columns_eqc                 C  s   |S r`   r   r   r   r   r   r     s    zModelData.attach_covc                 C  s   |S r`   r   r   r   r   r   r     s    zModelData.attach_cov_eqc                 C  s   |S r`   r   r   r   r   r   r     s    zModelData.attach_rowsc                 C  s   |S r`   r   r   r   r   r   r     s    zModelData.attach_datesc                 C  s   |S r`   r   r   r   r   r   r     s    zModelData.attach_mv_confintc                 O  s   |S r`   r   r8   r   argsr;   r   r   r   r     s    z ModelData.attach_generic_columnsc                 O  s   |S r`   r   r   r   r   r   r     s    z#ModelData.attach_generic_columns_2dc                 C  s   |S r`   r   r   r   r   r   r     s    zModelData.attach_ynames)Nr#   N)r   N)(__name__
__module____qualname____doc__r   r   r<   r@   rR   r5   classmethodrd   re   r+   r2   r
   r}   r   propertyr   setterr   r	   r   r   rz   rx   ry   r6   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r"   6   sX   
=


z
	




	
r"   c                   @  s   e Zd Zdd ZdS )	PatsyDatac                 C  s   |j jS r`   )r%   column_namesr   r   r   r   rz     s    zPatsyData._get_namesN)r   r   r   rz   r   r   r   r   r     s   r   c                      s   e Zd ZdZd! fdd	Ze fddZe fddZ fd	d
Zdd Z	dd Z
d"ddZdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd  Z  ZS )#
PandasDataz^
    Data handling class which knows how to reattach pandas metadata to model
    results
    Nc                   sV   t |}|d kr|nt |}|jtks<|d k	rD|jtkrDtdtt| ||S )NzRPandas data cast to numpy dtype of object. Check input data with np.asarray(data).)r   r   r   objectrs   superr   r2   r8   r.   r0   	__class__r   r   r2     s
    
zPandasData._convert_endog_exogc                   s.   t |ttfr|j| S tt| ||S d S r`   )r   r   r   locr   r   rd   ra   r   r   r   rd     s    
zPandasData._drop_nansc                   s<   t |ttfr&|j| jd d |f S tt| ||S d S r`   )r   r   r   r   r   r   re   ra   r   r   r   re     s    zPandasData._drop_nans_2dc                   sV   | j | j }}|d k	rDt|drDt|drD| j j| jjsDtdtt|   d S )Nindexz.The indices for endog and exog are not aligned)	r/   r1   r   r   equalsrs   r   r   r6   r   r   r   r   r6     s    zPandasData._check_integrityc                 C  s*   z|j W S  tk
r$   | jj  Y S X d S r`   )r   r   r/   r   r   r   r   r     s    zPandasData._get_row_labelsc                 C  s   t | |d }t||dS )Nr   )getattrr   )r8   r   r   r   r   r   r   r      s    z!PandasData.attach_generic_columnsc                 C  s.   |p|}t | |d }t | |d }t|||dS Nr   r   )r   r   )r8   r   ZrownamesZcolnamesr   r   r   r   %  s    z$PandasData.attach_generic_columns_2dc                 C  s*   |j dkrt|| jdS t|| jdS d S )Nr   r   )r   r   r   r   r   r   r   r   r   +  s    
zPandasData.attach_columnsc                 C  s   t || j| jdS r   )r   r   r}   r   r   r   r   r   4  s    zPandasData.attach_columns_eqc                 C  s   t || j| jdS r   )r   r   r   r   r   r   r   7  s    zPandasData.attach_covc                 C  s   t || j| jdS r   )r   r}   r   r   r   r   r   :  s    zPandasData.attach_cov_eqc                 C  s   |  }tj| jddjd }|dkrB|j|fkrB|d d d f }|jdk rVt|}nt|}| j|_| j	t
| d  |_|S )Nr   Zndminr   rD   )r   r   rZ   r}   r[   r   r   r   r   r   r    r   )r8   r   squeezedk_endogoutr   r   r   r   =  s    

zPandasData.attach_rowsc                 C  s|   |  }tj| jddjd }|dkrH|j|fkrHt|d d d f }|jdk r`t|| jdS t	t|| j| jdS d S )Nr   r   r   rD   r   r   )
r   r   rZ   r}   r[   r   r   r   Zpredict_datesr   )r8   r   r   r   r   r   r   r   M  s    

zPandasData.attach_datesc                 C  s   t |d| jddgdS )N)rH   rD   lowerupperr   )r   Zreshaper   r   r   r   r   r   Z  s    
zPandasData.attach_mv_confintc                 C  s2   |  }|jdk r t|| jdS t|| jdS d S )NrD   )r   )r   )r   r   r   r}   r   )r8   r   r   r   r   r   r   _  s    
zPandasData.attach_ynames)N)N)r   r   r   r   r2   r   rd   re   r6   r   r   r   r   r   r   r   r   r   r   r   __classcell__r   r   r   r   r     s$   		
	r   c                 C  s<   | j dks| jd dkr dg}ndd t| jd D }|S )Nr   r   c                 S  s   g | ]}d |d  qS )zy%dr   r   rj   ir   r   r   r   l  s     z%_make_endog_names.<locals>.<listcomp>)r   r[   range)r.   r}   r   r   r   r{   h  s    r{   c                 C  sh   |  d}|dk rF| }dd td| jd D }||d ndd td| jd d D }|S )Nr   c                 S  s   g | ]}d | qS zx%dr   r   r   r   r   r   w  s     z$_make_exog_names.<locals>.<listcomp>r   constc                 S  s   g | ]}d | qS r   r   r   r   r   r   r   z  s     )varr   Zargminr   r[   insert)r0   Zexog_varr3   Z
exog_namesr   r   r   r   q  s    
r   r#   c                 K  sH   t | |}|dkr0t| |d}|| |d fS |j| |fd|i|S )Nr#   )r.   r0   r9   )handle_data_class_factoryrt   r-   r+   )r.   r0   r9   r;   klassZret_dictr   r   r   r+     s    

r+   c                 C  sd   t | |rt}nNt | |r$t}n<t | |r6t}n*t | |rHt}ntdt	| t	|f |S )z
    Given inputs
    z%unrecognized data structures: %s / %s)
r'   Z_is_using_ndarray_typer"   Z_is_using_pandasr   Z_is_using_patsyr   Z_is_using_ndarrayrs   type)r.   r0   r   r   r   r   r     s    r   c                 K  sR   t | ttfrt| } t |ttfr0t|}t| |}|| f|||d|S )N)r0   r9   r:   )r   r|   tupler   r   r   )r.   r0   r9   r:   r;   r   r   r   r   handle_data  s    


r   )Nr#   )r#   N)"r   
__future__r   Zstatsmodels.compat.pythonr   	functoolsr   Znumpyr   Zpandasr   r   r   r   Zstatsmodels.tools.dataZtoolsrN   r'   Zstatsmodels.tools.decoratorsr	   r
   r(   r   r   r   r!   r"   r   r   r{   r   r+   r   r   r   r   r   r   <module>   s,      :v	
	