U
    Kvf('                     @   sz   d Z ddlmZmZmZmZmZ ddlZddlZ	ddl
ZddlmZ ddlmZ ddlmZ ddlmZ G dd	 d	ZdS )
a  
Author: Kishan Manani
License: BSD-3 Clause

An implementation of MSTL [1], an algorithm for time series decomposition when
there are multiple seasonal components.

This implementation has the following differences with the original algorithm:
- Missing data must be handled outside of this class.
- The algorithm proposed in the paper handles a case when there is no
seasonality. This implementation assumes that there is at least one seasonal
component.

[1] K. Bandura, R.J. Hyndman, and C. Bergmeir (2021)
MSTL: A Seasonal-Trend Decomposition Algorithm for Time Series with Multiple
Seasonal Patterns
https://arxiv.org/pdf/2107.13462.pdf
    )DictOptionalSequenceTupleUnionN)boxcox)ArrayLike1D)STL)freq_to_periodc                   @   st  e Zd ZdZddddddeeeeee f  eeeee f  eee	e
f  eeee
eeedf f  dddZdd	 Zd
d Zeeee df eeee df eee ee f dddZeeee df ee dddZeeee df eee dddZedddZeeee ee f dddZeeedddZeeee dddZed d! ZdS )"MSTLa
  
    MSTL(endog, periods=None, windows=None, lmbda=None, iterate=2,
         stl_kwargs=None)

    Season-Trend decomposition using LOESS for multiple seasonalities.

    .. versionadded:: 0.14.0

    Parameters
    ----------
    endog : array_like
        Data to be decomposed. Must be squeezable to 1-d.
    periods : {int, array_like, None}, optional
        Periodicity of the seasonal components. If None and endog is a pandas
        Series or DataFrame, attempts to determine from endog. If endog is a
        ndarray, periods must be provided.
    windows : {int, array_like, None}, optional
        Length of the seasonal smoothers for each corresponding period.
        Must be an odd integer, and should normally be >= 7 (default). If None
        then default values determined using 7 + 4 * np.arange(1, n + 1, 1)
        where n is number of seasonal components.
    lmbda : {float, str, None}, optional
        The lambda parameter for the Box-Cox transform to be applied to `endog`
        prior to decomposition. If None, no transform is applied. If "auto", a
        value will be estimated that maximizes the log-likelihood function.
    iterate : int, optional
        Number of iterations to use to refine the seasonal component.
    stl_kwargs: dict, optional
        Arguments to pass to STL.

    See Also
    --------
    statsmodels.tsa.seasonal.STL

    References
    ----------
    .. [1] K. Bandura, R.J. Hyndman, and C. Bergmeir (2021)
        MSTL: A Seasonal-Trend Decomposition Algorithm for Time Series with
        Multiple Seasonal Patterns. arXiv preprint arXiv:2107.13462.

    Examples
    --------
    Start by creating a toy dataset with hourly frequency and multiple seasonal
    components.

    >>> import numpy as np
    >>> import matplotlib.pyplot as plt
    >>> import pandas as pd
    >>> pd.plotting.register_matplotlib_converters()
    >>> np.random.seed(0)
    >>> t = np.arange(1, 1000)
    >>> trend = 0.0001 * t ** 2 + 100
    >>> daily_seasonality = 5 * np.sin(2 * np.pi * t / 24)
    >>> weekly_seasonality = 10 * np.sin(2 * np.pi * t / (24 * 7))
    >>> noise = np.random.randn(len(t))
    >>> y = trend + daily_seasonality + weekly_seasonality + noise
    >>> index = pd.date_range(start='2000-01-01', periods=len(t), freq='H')
    >>> data = pd.DataFrame(data=y, index=index)

    Use MSTL to decompose the time series into two seasonal components
    with periods 24 (daily seasonality) and 24*7 (weekly seasonality).

    >>> from statsmodels.tsa.seasonal import MSTL
    >>> res = MSTL(data, periods=(24, 24*7)).fit()
    >>> res.plot()
    >>> plt.tight_layout()
    >>> plt.show()

    .. plot:: plots/mstl_plot.py
    N   )periodswindowslmbdaiterate
stl_kwargs)endogr   r   r   r   r   c                C   sX   || _ | || _| jjd | _|| _| ||\| _| _|| _	| 
|rL|ni | _d S )Nr   )r   _to_1d_array_yshapenobsr   _process_periods_and_windowsr   r   r   _remove_overloaded_stl_kwargs_stl_kwargs)selfr   r   r   r   r   r    r   </tmp/pip-unpacked-wheel-2v6byqio/statsmodels/tsa/stl/mstl.py__init__g   s    
 
zMSTL.__init__c                 C   s  t | j}|dkrdn| j}| jdkr@t| jdd\}}|| _n| jrXt| j| jd}n| j}| jdd}| jdd}t	j
|| jfd}|}t|D ]b}	t|D ]T}
|||
  }tf || j|
 | j|
 d| jj||d	}|j||
< |||
  }qqt	|j}|j}|j}|| }t| jtjtjfr| jj}tj||d
d}tj||dd}tj||dd}tj||dd}dd | jD }|jdkrtj||dd}ntj|||d}ddlm} ||||||S )z
        Estimate a trend component, multiple seasonal components, and a
        residual component.

        Returns
        -------
        DecomposeResult
            Estimation results.
           autoN)r   
inner_iter
outer_iter)r   )r   periodseasonal)r    r!   Zobserved)indexnametrendresidZrobust_weightc                 S   s   g | ]}d | qS )Z	seasonal_r   .0r"   r   r   r   
<listcomp>   s     zMSTL.fit.<locals>.<listcomp>r#   )r$   columnsr   )DecomposeResult)lenr   r   r   r   r   Z	est_lmbdar   popnpzerosr   ranger	   r   fitr#   squeezeTr&   weights
isinstancer   pdSeries	DataFramer$   ndimZstatsmodels.tsa.seasonalr,   )r   num_seasonsr   yr   Zstl_inner_iterZstl_outer_iterr#   Zdeseas_iresr&   rwr'   r$   colsr,   r   r   r   r2   }   sV    


 
zMSTL.fitc              	   C   s&   d| j  d| j d| j d| j d	S )NzMSTL(endog, periods=z
, windows=z, lmbda=z
, iterate=))r   r   r   r   r   r   r   r   __str__   s    $zMSTL.__str__)r   r   returnc                    s     |}|r2 j|t|d} ||\}}n j|t|d}t|}t|t|krdtdt fdd|D rtd t	 fdd|D }|d t| }||fS )N)r;   )Periods and windows must have same lengthc                 3   s   | ]}| j d  kV  qdS r   Nr   r(   rC   r   r   	<genexpr>   s     z4MSTL._process_periods_and_windows.<locals>.<genexpr>zTA period(s) is larger than half the length of time series. Removing these period(s).c                 3   s    | ]}| j d  k r|V  qdS rG   rH   r(   rC   r   r   rI      s     )
_process_periods_process_windowsr-   _sort_periods_and_windowssorted
ValueErroranywarningswarntuple)r   r   r   r   rC   r   r      s"    
z!MSTL._process_periods_and_windows)r   rE   c                 C   s*   |d kr|   f}nt|tr&|f}n |S N)_infer_periodr6   int)r   r   r   r   r   rJ      s
    
zMSTL._process_periods)r   r;   rE   c                 C   s*   |d kr|  |}nt|tr&|f}n |S rS   )_default_seasonal_windowsr6   rU   )r   r   r;   r   r   r   rK      s
    
zMSTL._process_windows)rE   c                 C   sD   d }t | jtjtjfr(t| jjdd }|d kr8tdt|}|S )NZinferred_freqz%Unable to determine period from endog)	r6   r   r7   r8   r9   getattrr$   rN   r
   )r   freqr"   r   r   r   rT      s    zMSTL._infer_periodc                 C   s6   t | t |krtdttt| | \} }| |fS )NrF   )r-   rN   ziprM   )r   r   r   r   r   rL     s    zMSTL._sort_periods_and_windows)r   rE   c                 C   s$   dddg}|D ]}|  |d  q| S )Nr   r"   r#   )r.   )r   argsargr   r   r   r     s    
z"MSTL._remove_overloaded_stl_kwargs)nrE   c                 C   s   t dd td| d D S )Nc                 s   s   | ]}d d|  V  qdS )      Nr   )r)   r>   r   r   r   rI     s     z1MSTL._default_seasonal_windows.<locals>.<genexpr>r   )rR   r1   )r\   r   r   r   rV     s    zMSTL._default_seasonal_windowsc                 C   s2   t jt t | t jd}|jdkr.td|S )N)Zdtyper   zy must be a 1d array)r/   Zascontiguousarrayr3   Zasarraydoubler:   rN   )xr<   r   r   r   r     s    
zMSTL._to_1d_array)__name__
__module____qualname____doc__r   r   r   rU   r   floatstrr   boolr   r2   rD   r   r   rJ   rK   rT   staticmethodrL   r   rV   r   r   r   r   r   r      sJ   K@	r   )rd   typingr   r   r   r   r   rP   Znumpyr/   Zpandasr7   Zscipy.statsr   Zstatsmodels.tools.typingr   Zstatsmodels.tsa.stl._stlr	   Zstatsmodels.tsa.tsatoolsr
   r   r   r   r   r   <module>   s   