U
    KvfRJ                     @   s   d dddgZ ddlZddlZddlmZ ddlmZmZm	Z	m
Z
mZmZmZmZmZmZmZmZmZ e
rpddlZG dd	 d	eZdd
d Zdd Zdd Zdd Zdd ZG dd deZdd ZdddZdd ZdS )Cguess_categoricalCategoricalSniffercategorical_to_int    N)
PatsyError)SortAnythingKeysafe_scalar_isnaniterablehave_pandashave_pandas_categoricalhave_pandas_categorical_dtypesafe_is_pandas_categoricalpandas_Categorical_from_codespandas_Categorical_categoriespandas_Categorical_codessafe_issubdtypeno_picklingassert_no_picklingc                   @   s   e Zd Zdd ZeZdS )_CategoricalBoxc                 C   s   || _ || _|| _d S Ndatacontrastlevels)selfr   r   r    r   5/tmp/pip-unpacked-wheel-68fdvdus/patsy/categorical.py__init__9   s    z_CategoricalBox.__init__N)__name__
__module____qualname__r   r   __getstate__r   r   r   r   r   8   s   r   c                 C   s8   t | tr,|dkr| j}|dkr&| j}| j} t| ||S )a/  
    Marks some `data` as being categorical, and specifies how to interpret
    it.

    This is used for three reasons:

    * To explicitly mark some data as categorical. For instance, integer data
      is by default treated as numerical. If you have data that is stored
      using an integer type, but where you want patsy to treat each different
      value as a different level of a categorical factor, you can wrap it in a
      call to `C` to accomplish this. E.g., compare::

        dmatrix("a", {"a": [1, 2, 3]})
        dmatrix("C(a)", {"a": [1, 2, 3]})

    * To explicitly set the levels or override the default level ordering for
      categorical data, e.g.::

        dmatrix("C(a, levels=["a2", "a1"])", balanced(a=2))
    * To override the default coding scheme for categorical data. The
      `contrast` argument can be any of:

      * A :class:`ContrastMatrix` object
      * A simple 2d ndarray (which is treated the same as a ContrastMatrix
        object except that you can't specify column names)
      * An object with methods called `code_with_intercept` and
        `code_without_intercept`, like the built-in contrasts
        (:class:`Treatment`, :class:`Diff`, :class:`Poly`, etc.). See
        :ref:`categorical-coding` for more details.
      * A callable that returns one of the above.
    N)
isinstancer   r   r   r   r   r   r   r   r   @   s     
c                  C   s   t d} t| tst| jdks$t| jd ks2t| jd ks@tt ddd}|jdksZt|jdksht|jdksvtt |dd}|jdkst|jdkst|jdkstt |d}|jdkst|jdkst|jdkstt| d S )NZasdfZDATACONTRASTZLEVELSz
NEW LEVELSr   zNEW CONTRAST)r   r"   r   AssertionErrorr   r   r   r   )Zc1c2c3Zc4r   r   r   test_Ch   s$    
r(   c                 C   s:   t | rdS t| trdS t| } t| jtjr6dS dS )NTF)r   r"   r   npasarrayr   dtypenumberr   r   r   r   r   }   s    

c                  C   s   t r6tdddg} t| s ttr6tt| s6tttdddgsLttddgs\ttddgslttddtj	gsttddd gsttdddgrttdddtj	grttdd	d
grttdd	d
tj	grtd S )N         TFabg      ?g       @g      @)
r   pandasCategoricalr   r%   r   Seriesr   r)   nan)cr   r   r   test_guess_categorical   s    r8   c                 C   s@   t | dr| jdkrtdt| r6t| tjtjfr<| g} | S )Nndimr.   z)categorical data cannot be >1-dimensional)hasattrr9   r   r	   r"   six	text_typebinary_typer-   r   r   r   _categorical_shape_fix   s    r>   c                   @   s*   e Zd ZdddZdd Zdd ZeZdS )	r   Nc                 C   s$   || _ || _d | _d | _t | _d S r   )
_NA_action_origin	_contrast_levelsset
_level_set)r   	NA_actionoriginr   r   r   r      s
    zCategoricalSniffer.__init__c                 C   s6   | j d kr&t| j}|jtd || _ t| j | jfS )N)key)rB   listrD   sortr   tuplerA   )r   r   r   r   r   levels_contrast   s
    

z"CategoricalSniffer.levels_contrastc              	   C   s  t |dr|j| _t|tr<|jd k	r6t|j| _dS |j}t	|rVtt
|| _dS t |drt|jtjrtddg| _dS t|}|D ]f}| j|rq|dks|dkr| jddg qz| j| W q tk
r   td| jY qX q| jtddgkS )Nr   Tr+   F?Error interpreting categorical data: all items must be hashable)r:   r   rA   r"   r   r   rJ   rB   r   r   r   r   r+   r)   bool_rC   rD   r>   r?   is_categorical_NAupdateadd	TypeErrorr   r@   )r   r   valuer   r   r   sniff   s4    


zCategoricalSniffer.sniff)N)r   r   r    r   rK   rS   r   r!   r   r   r   r   r      s   
(c               	      s  ddl m  d+ fdd	} trdd tg}tr@|tjdd g7 }|D ]}| g |tdd	d ggd
d | g |tddgddggd
d | g |tddgddggd
d |tddg}d|_	| g |gd
dd qD| g tdd	gtdd	ggdd | g tdd	gdd	dgdtdd	ggd
d | g tdd	gdd	dgdtdd	ggd
d | ddgtdt
jgtdd ggdd t dgd}|tdt
jd g | \}}t|td dgkst| ddgtd
t
jd ggd
d | g tddgtdgtddggdd  | g t
d
dgd!ggd
d | ddgtd"d d#t
jd$ggdd% | g tddgd&d'gdd(d& | g ddgdggdd) | g ddgdggdd | g dgdd* dd l}t  }|t|ji g |t|jt
dgg d S ),Nr   NAActionc                    sT   t  | d}|D ](}||}|r2|s,t q<q|rtq| ||fksPtd S )NNA_types)r   rS   r%   rK   )rW   ZdatasZexp_finish_fastZ
exp_levelsZexp_contrastsnifferr   donerT   r   r   t   s    

z"test_CategoricalSniffer.<locals>.tc                 S   s   | S r   r   xr   r   r   <lambda>       z)test_CategoricalSniffer.<locals>.<lambda>c                 S   s   t t| S r   )r   r3   r5   r[   r   r   r   r]      r^   r.   r/   T)r.   r/   r1   r2   r1   r2   r2   r1   r#   r0   F)r.   r/   r0   r$      )r0   r/   r.   NoneNaN
   )r.   rd   rV   FT      (   )FTrd   rf   rg   rh   Zfoor2   r/   r1   r.   r7   N)rj   ri   rk   ZFOO)r   )rd   rf   )rd   rf   rg   )r2   )N)patsy.missingrU   r   r   r   r3   r5   r4   r   r   r)   r6   r   rS   rK   rC   r%   r*   pytestraisesr   )rZ   ZprepsprepobjrX   r   _rm   r   rT   r   test_CategoricalSniffer   sz       ",,   "   
rr   c              
   C   sL  t |tstt| rDtt| }||ks<td||f |t| S t | tr| jd k	r~t| j|kr~td|t| jf || j	} t
| } ztt|tt|}W n tk
r   td|Y nX t| dr
t| jtjr
|d dkr
|d dkr
| tjS tjt| td}t| D ]\}}||rBd	||< nz|| ||< W n tk
r   d
}	g }
t||	kr|
dd |D 7 }
nH|
dd |d |	d  D 7 }
|
d |
dd ||	 d d  D 7 }
dd|
 }td||f |Y n& tk
r   td|f |Y nX q$trHt | tj rHtj || j!d}|S )Nz'mismatching levels: expected %r, got %rrL   r+   Fr   Tr.   r+   ra   c                 S   s   g | ]}t |qS r   repr.0levelr   r   r   
<listcomp>_  s     z&categorical_to_int.<locals>.<listcomp>c                 S   s   g | ]}t |qS r   ru   rw   r   r   r   rz   a  s   r/   z...c                 S   s   g | ]}t |qS r   ru   rw   r   r   r   rz   d  s   z[%s]z, zxError converting data to categorical: observation with value %r does not match any of the expected levels (expected: %s)zEError converting data to categorical: encountered unhashable value %rindex)"r"   rJ   r%   r   r   r   r   r   r   r   r>   dictziprangelenrQ   r:   r   r+   r)   rM   Zastypeint_emptyint	enumeraterN   KeyErrorappendjoinr
   r3   r5   r|   )r   r   rE   rF   Zdata_levels_tupleZlevel_to_intoutirR   ZSHOW_LEVELSZ
level_strsZ	level_strr   r   r   r   4  sz    



c               	   C   s:  dd l } ddlm} trtjdddgdddgd	}t|d
| }t|dddgksXt	t|j
dddgksrt	| tttd|id
|  trltg}trdd }|| |D ]}|dddgd}t|d| }t|dddgkst	|dddgd}	t|d|dgd}
t|
dddgks.t	| tt|ddgdd|  | tt|ddgdd|  q| fdd}|dddgddddg |tdddgddddg |tjdddgtdddddg |dddgddddg |tdddgddddg |tjdddgtdddddg |tjdddgtdddddg |dddgddddg |dddgddddg | ttdddgd|  |tdddgddddg |tdddgddddg |tdddgddgdddddg | tttdddgddgdd|  |dddg |dddg |d d!dg | tttddgddggd|  | ttddgddi f|  | ttddi gd|  |dd tjdgdddddg|dd"gd |dd tjdgd#ddddg|dd"gd |dd tjdgd#ddddg|d"gd | ttddd$gd%|  d S )&Nr   rT   r1   r2   r7   rd   rf   rg   r{   )r1   r2   r7   r.   r/   c                 S   s   t | |}t|S r   )r   r3   r5   )codes
categoriesr7   r   r   r   Series_from_codes  s    
z2test_categorical_to_int.<locals>.Series_from_codesrt   r_   )r1   rb   rb   rV   )r1   r7   r`   c                 S   s    t | ||}t||std S r   )r   r)   Zarray_equalr%   )r   r   expectedrE   gotr   r   r   rZ     s    z"test_categorical_to_int.<locals>.trs   )r.   r/   r   )r1   dzr2   r0   rj   r2   r   )rj   r   r$   Tre   rc   )r1   r2   Nq)r1   r2   r7   r   efgh)rm   rl   rU   r
   r3   r5   r   r)   allr%   r|   rn   r   Z	DataFramer   r   r   r   r*   objectfloatr   r6   )rm   rU   sZc_pandasconstructorsr   concatconvZcat2Zconv2rZ   r   r   r   test_categorical_to_ints  s      

"""  $      
r   )NN)N) __all__Znumpyr)   r;   Zpatsyr   Z
patsy.utilr   r   r	   r
   r   r   r   r   r   r   r   r   r   r3   r   r   r   r(   r   r8   r>   r   rr   r   r   r   r   r   r   <module>   s$   <
(
9V
?