U
    Kvf.                     @   sv   d Z ddlmZ ddlZddlmZ G dd dZG dd dZG d	d
 d
Z	G dd dZ
dd ZG dd dZdS )a!  
Utilities for cross validation.

taken from scikits.learn

# Author: Alexandre Gramfort <alexandre.gramfort@inria.fr>,
#         Gael Varoquaux    <gael.varoquaux@normalesup.org>
# License: BSD Style.
# $Id$

changes to code by josef-pktd:
 - docstring formatting: underlines of headers

    )lrangeN)combinationsc                   @   s(   e Zd ZdZdd Zdd Zdd ZdS )	LeaveOneOutzs
    Leave-One-Out cross validation iterator:
    Provides train/test indexes to split data in train test sets
    c                 C   s
   || _ dS )a9  
        Leave-One-Out cross validation iterator:
        Provides train/test indexes to split data in train test sets

        Parameters
        ----------
        n: int
            Total number of elements

        Examples
        --------
        >>> from scikits.learn import cross_val
        >>> X = [[1, 2], [3, 4]]
        >>> y = [1, 2]
        >>> loo = cross_val.LeaveOneOut(2)
        >>> for train_index, test_index in loo:
        ...    print "TRAIN:", train_index, "TEST:", test_index
        ...    X_train, X_test, y_train, y_test = cross_val.split(train_index, test_index, X, y)
        ...    print X_train, X_test, y_train, y_test
        TRAIN: [False  True] TEST: [ True False]
        [[3 4]] [[1 2]] [2] [1]
        TRAIN: [ True False] TEST: [False  True]
        [[1 2]] [[3 4]] [1] [2]
        N)n)selfr    r   G/tmp/pip-unpacked-wheel-2v6byqio/statsmodels/sandbox/tools/cross_val.py__init__   s    zLeaveOneOut.__init__c                 c   sB   | j }t|D ].}tj|td}d||< t|}||fV  qd S NZdtypeT)r   rangenpzerosboollogical_not)r   r   i
test_indextrain_indexr   r   r   __iter__8   s    
zLeaveOneOut.__iter__c                 C   s   d| j j| j j| jf S Nz%s.%s(n=%i)	__class__
__module____name__r   r   r   r   r   __repr__A   s    zLeaveOneOut.__repr__Nr   r   __qualname____doc__r	   r   r   r   r   r   r   r      s   	r   c                   @   s(   e Zd ZdZdd Zdd Zdd ZdS )		LeavePOutzq
    Leave-P-Out cross validation iterator:
    Provides train/test indexes to split data in train test sets
    c                 C   s   || _ || _dS )aV  
        Leave-P-Out cross validation iterator:
        Provides train/test indexes to split data in train test sets

        Parameters
        ----------
        n: int
            Total number of elements
        p: int
            Size test sets

        Examples
        --------
        >>> from scikits.learn import cross_val
        >>> X = [[1, 2], [3, 4], [5, 6], [7, 8]]
        >>> y = [1, 2, 3, 4]
        >>> lpo = cross_val.LeavePOut(4, 2)
        >>> for train_index, test_index in lpo:
        ...    print "TRAIN:", train_index, "TEST:", test_index
        ...    X_train, X_test, y_train, y_test = cross_val.split(train_index, test_index, X, y)
        TRAIN: [False False  True  True] TEST: [ True  True False False]
        TRAIN: [False  True False  True] TEST: [ True False  True False]
        TRAIN: [False  True  True False] TEST: [ True False False  True]
        TRAIN: [ True False False  True] TEST: [False  True  True False]
        TRAIN: [ True False  True False] TEST: [False  True False  True]
        TRAIN: [ True  True False False] TEST: [False False  True  True]
        N)r   p)r   r   r    r   r   r   r	   P   s    zLeavePOut.__init__c                 c   sX   | j }| j}tt||}|D ]4}tj|td}d|t|< t|}||fV  qd S r
   )	r   r    r   r   r   r   r   arrayr   )r   r   r    combidxr   r   r   r   r   r   p   s    
zLeavePOut.__iter__c                 C   s   d| j j| j j| j| jf S )Nz%s.%s(n=%i, p=%i))r   r   r   r   r    r   r   r   r   r   {   s    zLeavePOut.__repr__Nr   r   r   r   r   r   J   s    r   c                   @   s(   e Zd ZdZdd Zdd Zdd ZdS )	KFoldzm
    K-Folds cross validation iterator:
    Provides train/test indexes to split data in train test sets
    c                 C   s@   |dkst td||k s0t td||f || _|| _dS )a  
        K-Folds cross validation iterator:
        Provides train/test indexes to split data in train test sets

        Parameters
        ----------
        n: int
            Total number of elements
        k: int
            number of folds

        Examples
        --------
        >>> from scikits.learn import cross_val
        >>> X = [[1, 2], [3, 4], [1, 2], [3, 4]]
        >>> y = [1, 2, 3, 4]
        >>> kf = cross_val.KFold(4, k=2)
        >>> for train_index, test_index in kf:
        ...    print "TRAIN:", train_index, "TEST:", test_index
        ...    X_train, X_test, y_train, y_test = cross_val.split(train_index, test_index, X, y)
        TRAIN: [False False  True  True] TEST: [ True  True False False]
        TRAIN: [ True  True False False] TEST: [False False  True  True]

        Notes
        -----
        All the folds have size trunc(n/k), the last one has the complementary
        r   zcannot have k below 1z cannot have k=%d greater than %dN)AssertionError
ValueErrorr   k)r   r   r'   r   r   r   r	      s    zKFold.__init__c                 c   s   | j }| j}tt|| }t|D ]\}tj|td}||d k r^d||| |d | < nd||| d < t|}||fV  q&d S )Nr      T)	r   r'   intr   ceilr   r   r   r   )r   r   r'   jr   r   r   r   r   r   r      s    
zKFold.__iter__c                 C   s   d| j j| j j| j| jf S )Nz%s.%s(n=%i, k=%i))r   r   r   r   r'   r   r   r   r   r      s    zKFold.__repr__Nr   r   r   r   r   r$      s   "r$   c                   @   s(   e Zd ZdZdd Zdd Zdd ZdS )	LeaveOneLabelOutzy
    Leave-One-Label_Out cross-validation iterator:
    Provides train/test indexes to split data in train test sets
    c                 C   s
   || _ dS )a  
        Leave-One-Label_Out cross validation:
        Provides train/test indexes to split data in train test sets

        Parameters
        ----------
        labels : list
                List of labels

        Examples
        --------
        >>> from scikits.learn import cross_val
        >>> X = [[1, 2], [3, 4], [5, 6], [7, 8]]
        >>> y = [1, 2, 1, 2]
        >>> labels = [1, 1, 2, 2]
        >>> lol = cross_val.LeaveOneLabelOut(labels)
        >>> for train_index, test_index in lol:
        ...    print "TRAIN:", train_index, "TEST:", test_index
        ...    X_train, X_test, y_train, y_test = cross_val.split(train_index,             test_index, X, y)
        ...    print X_train, X_test, y_train, y_test
        TRAIN: [False False  True  True] TEST: [ True  True False False]
        [[5 6]
        [7 8]] [[1 2]
        [3 4]] [1 2] [1 2]
        TRAIN: [ True  True False False] TEST: [False False  True  True]
        [[1 2]
        [3 4]] [[5 6]
        [7 8]] [1 2] [1 2]
        N)labels)r   r-   r   r   r   r	      s    zLeaveOneLabelOut.__init__c                 c   sV   t j| jdd}t |D ]6}t jt|td}d|||k< t |}||fV  qd S )NT)copyr   )r   r!   r-   uniquer   lenr   r   )r   r-   r   r   r   r   r   r   r      s    
zLeaveOneLabelOut.__iter__c                 C   s   d| j j| j j| jf S )Nz%s.%s(labels=%s))r   r   r   r-   r   r   r   r   r      s
    zLeaveOneLabelOut.__repr__Nr   r   r   r   r   r,      s   "
r,   c                 G   s@   g }|D ]2}t |}||  }|| }|| || q|S )zx
    For each arg return a train and test subsets defined by indexes provided
    in train_indexes and test_indexes
    )r   Z
asanyarrayappend)Ztrain_indexesZtest_indexesargsretargZ	arg_trainZarg_testr   r   r   split   s    

r5   c                   @   s*   e Zd ZdZdddZdd Zd	d
 ZdS )
KStepAheadzn
    KStepAhead cross validation iterator:
    Provides fit/test indexes to split data in sequential sets
    r(   NTc                 C   s<   || _ || _|dkr&tt|d }|| _|| _|| _dS )a=  
        KStepAhead cross validation iterator:
        Provides train/test indexes to split data in train test sets

        Parameters
        ----------
        n: int
            Total number of elements
        k : int
            number of steps ahead
        start : int
            initial size of data for fitting
        kall : bool
            if true. all values for up to k-step ahead are included in the test index.
            If false, then only the k-th step ahead value is returnd


        Notes
        -----
        I do not think this is really useful, because it can be done with
        a very simple loop instead.
        Useful as a plugin, but it could return slices instead for faster array access.

        Examples
        --------
        >>> from scikits.learn import cross_val
        >>> X = [[1, 2], [3, 4]]
        >>> y = [1, 2]
        >>> loo = cross_val.LeaveOneOut(2)
        >>> for train_index, test_index in loo:
        ...    print "TRAIN:", train_index, "TEST:", test_index
        ...    X_train, X_test, y_train, y_test = cross_val.split(train_index, test_index, X, y)
        ...    print X_train, X_test, y_train, y_test
        TRAIN: [False  True] TEST: [ True False]
        [[3 4]] [[1 2]] [2] [1]
        TRAIN: [ True False] TEST: [False  True]
        [[1 2]] [[3 4]] [1] [2]
        Ng      ?)r   r'   r)   r   truncstartkallreturn_slice)r   r   r'   r8   r9   r:   r   r   r   r	      s    'zKStepAhead.__init__c           	      c   s   | j }| j}| j}| jrpt||| D ]F}td |d }| jrLt||| }nt|| d || }||fV  q&nvt||| D ]f}tj|t	d}d|d |< tj|t	d}| jrd|||| < nd||| d || < ||fV  q~d S )Nr(   r   T)
r   r'   r8   r:   r   slicer9   r   r   r   )	r   r   r'   r8   r   Ztrain_sliceZ
test_slicer   r   r   r   r   r   P  s$    zKStepAhead.__iter__c                 C   s   d| j j| j j| jf S r   r   r   r   r   r   r   k  s    zKStepAhead.__repr__)r(   NTTr   r   r   r   r   r6     s   
0r6   )r   Zstatsmodels.compat.pythonr   Znumpyr   	itertoolsr   r   r   r$   r,   r5   r6   r   r   r   r   <module>   s   4;A: