Source code for etna.models.deadline_ma

import warnings
from enum import Enum
from typing import Dict
from typing import Optional
from typing import Tuple

import numpy as np
import pandas as pd
from typing_extensions import assert_never

from etna.datasets import TSDataset
from etna.distributions import BaseDistribution
from etna.distributions import IntDistribution
from etna.models.base import NonPredictionIntervalContextRequiredAbstractModel


[docs]class SeasonalityMode(str, Enum): """Enum for seasonality mode for DeadlineMovingAverageModel.""" month = "month" year = "year" @classmethod def _missing_(cls, value): raise NotImplementedError( f"{value} is not a valid {cls.__name__}. Only {', '.join([repr(m.value) for m in cls])} seasonality allowed" )
[docs]class DeadlineMovingAverageModel( NonPredictionIntervalContextRequiredAbstractModel, ): """Moving average model that uses exact previous dates to predict. Notes _____ This model supports in-sample and out-of-sample prediction decomposition. Prediction components are corresponding target seasonal lags (monthly or annual) with weights of :math:`1/window`. """ def __init__(self, window: int = 3, seasonality: str = "month"): """Initialize deadline moving average model. Length of the context is equal to the number of ``window`` months or years, depending on the ``seasonality``. Parameters ---------- window: Number of values taken for forecast for each point. seasonality: Only allowed values are "month" and "year". """ self.window = window self.seasonality = SeasonalityMode(seasonality) self._freqs_available = {"H", "D"} self._freq: Optional[str] = None def _validate_fitted(self): """Check if model is fitted.""" if self._freq is None: raise ValueError("Model is not fitted! Fit the model before trying the find out context size!") @property def context_size(self) -> int: """Upper bound to context size of the model.""" self._validate_fitted() cur_value = None if self.seasonality is SeasonalityMode.year: cur_value = 366 elif self.seasonality is SeasonalityMode.month: cur_value = 31 else: assert_never(self.seasonality) if self._freq == "H": cur_value *= 24 cur_value *= self.window return cur_value
[docs] def get_model(self) -> "DeadlineMovingAverageModel": """Get internal model. Returns ------- : Itself """ return self
[docs] def fit(self, ts: TSDataset) -> "DeadlineMovingAverageModel": """Fit model. Parameters ---------- ts: Dataset with features Returns ------- : Model after fit """ # we make a normalization to treat "1d" like "D" freq = pd.tseries.frequencies.to_offset(ts.freq).freqstr if freq not in self._freqs_available: raise ValueError(f"Freq {freq} is not supported! Use daily or hourly frequency!") self._freq = freq columns = set(ts.columns.get_level_values("feature")) if columns != {"target"}: warnings.warn( message=f"{type(self).__name__} does not work with any exogenous series or features. " f"It uses only target series for predict/\n " ) return self
@staticmethod def _get_context_beginning( df: pd.DataFrame, prediction_size: int, seasonality: SeasonalityMode, window: int ) -> pd.Timestamp: """Get timestamp where context begins. Parameters ---------- df: Time series in a wide format. prediction_size: Number of last timestamps to leave after making prediction. Previous timestamps will be used as a context for models that require it. seasonality: Seasonality. window: Number of values taken for forecast of each point. Returns ------- : Timestamp with beginning of the context. Raises ------ ValueError: if context isn't big enough """ df_history = df.iloc[:-prediction_size] history_timestamps = df_history.index future_timestamps = df.iloc[-prediction_size:].index # if we have len(history_timestamps) == 0, then len(df) <= prediction_size if len(history_timestamps) == 0: raise ValueError( "Given context isn't big enough, try to decrease context_size, prediction_size or increase length of given dataframe!" ) if seasonality is SeasonalityMode.month: first_index = future_timestamps[0] - pd.DateOffset(months=window) elif seasonality is SeasonalityMode.year: first_index = future_timestamps[0] - pd.DateOffset(years=window) else: assert_never(seasonality) if first_index < history_timestamps[0]: raise ValueError( "Given context isn't big enough, try to decrease context_size, prediction_size or increase length of given dataframe!" ) return first_index def _get_previous_date(self, date, offset): """Get previous date using seasonality offset.""" if self.seasonality == SeasonalityMode.month: prev_date = date - pd.DateOffset(months=offset) elif self.seasonality == SeasonalityMode.year: prev_date = date - pd.DateOffset(years=offset) else: assert_never(self.seasonality) return prev_date def _make_prediction_components( self, result_template: pd.DataFrame, context: pd.DataFrame, prediction_size: int ) -> pd.DataFrame: """Estimate prediction components using ``result_template`` as a base and ``context`` as a context.""" index = result_template.index end_idx = len(result_template) start_idx = end_idx - prediction_size components_data = [] for i in range(start_idx, end_idx): obs_components = [] for w in range(1, self.window + 1): prev_date = self._get_previous_date(date=result_template.index[i], offset=w) obs_components.append(context.loc[prev_date].values) components_data.append(obs_components) # shape: (prediction_size, window, num_segments) raw_components = np.asarray(components_data, dtype=float) # shape: (prediction_size, num_segments, window) # this is needed to place elements in the right order raw_components = np.swapaxes(raw_components, -1, -2) # shape: (prediction_size, num_segments * window) raw_components = raw_components.reshape(raw_components.shape[0], -1) raw_components /= self.window components_names = [f"target_component_{self.seasonality.name}_lag_{w}" for w in range(1, self.window + 1)] segment_names = context.columns.get_level_values("segment") column_names = pd.MultiIndex.from_product([segment_names, components_names], names=("segment", "feature")) target_components_df = pd.DataFrame(data=raw_components, columns=column_names, index=index[start_idx:end_idx]) return target_components_df def _make_predictions( self, result_template: pd.DataFrame, context: pd.DataFrame, prediction_size: int ) -> np.ndarray: """Make predictions using ``result_template`` as a base and ``context`` as a context.""" index = result_template.index start_idx = len(result_template) - prediction_size end_idx = len(result_template) for i in range(start_idx, end_idx): for w in range(1, self.window + 1): prev_date = self._get_previous_date(date=result_template.index[i], offset=w) result_template.loc[index[i]] += context.loc[prev_date] result_template.loc[index[i]] = result_template.loc[index[i]] / self.window result_values = result_template.values[-prediction_size:] return result_values def _forecast( self, df: pd.DataFrame, prediction_size: int, return_components: bool = False ) -> Tuple[pd.DataFrame, Optional[pd.DataFrame]]: """Make autoregressive forecasts on a wide dataframe.""" context_beginning = self._get_context_beginning( df=df, prediction_size=prediction_size, seasonality=self.seasonality, window=self.window ) history = df.loc[:, pd.IndexSlice[:, "target"]] history = history.iloc[:-prediction_size] history = history.loc[history.index >= context_beginning] if np.any(history.isnull()): raise ValueError("There are NaNs in a forecast context, forecast method requires context to be filled!") num_segments = history.shape[1] index = pd.date_range(start=context_beginning, end=df.index[-1], freq=self._freq) result_template = np.append(history.values, np.zeros((prediction_size, num_segments)), axis=0) result_template = pd.DataFrame(result_template, index=index, columns=history.columns) result_values = self._make_predictions( result_template=result_template, context=result_template, prediction_size=prediction_size ) df = df.iloc[-prediction_size:] y_pred = result_values[-prediction_size:] df.loc[:, pd.IndexSlice[:, "target"]] = y_pred target_components_df = None if return_components: target_components_df = self._make_prediction_components( result_template=result_template, context=result_template, prediction_size=prediction_size ) return df, target_components_df
[docs] def forecast(self, ts: TSDataset, prediction_size: int, return_components: bool = False) -> TSDataset: """Make autoregressive forecasts. Parameters ---------- ts: Dataset with features prediction_size: Number of last timestamps to leave after making prediction. Previous timestamps will be used as a context. return_components: If True additionally returns forecast components Returns ------- : Dataset with predictions Raises ------ NotImplementedError: if return_components mode is used ValueError: if model isn't fitted ValueError: if context isn't big enough ValueError: if forecast context contains NaNs """ self._validate_fitted() df = ts.to_pandas() new_df, target_components_df = self._forecast( df=df, prediction_size=prediction_size, return_components=return_components ) ts.df = new_df if return_components: ts.add_target_components(target_components_df=target_components_df) return ts
def _predict( self, df: pd.DataFrame, prediction_size: int, return_components: bool = False ) -> Tuple[pd.DataFrame, Optional[pd.DataFrame]]: """Make predictions on a wide dataframe using true values as autoregression context.""" context_beginning = self._get_context_beginning( df=df, prediction_size=prediction_size, seasonality=self.seasonality, window=self.window ) context = df.loc[:, pd.IndexSlice[:, "target"]] context = context.loc[context.index >= context_beginning] if np.any(context.isnull()): raise ValueError("There are NaNs in a target column, predict method requires target to be filled!") num_segments = context.shape[1] index = pd.date_range(start=df.index[-prediction_size], end=df.index[-1], freq=self._freq) result_template = pd.DataFrame(np.zeros((prediction_size, num_segments)), index=index, columns=context.columns) result_values = self._make_predictions( result_template=result_template, context=context, prediction_size=prediction_size ) df = df.iloc[-prediction_size:] y_pred = result_values[-prediction_size:] df.loc[:, pd.IndexSlice[:, "target"]] = y_pred target_components_df = None if return_components: target_components_df = self._make_prediction_components( result_template=result_template, context=context, prediction_size=prediction_size ) return df, target_components_df
[docs] def predict(self, ts: TSDataset, prediction_size: int, return_components: bool = False) -> TSDataset: """Make predictions using true values as autoregression context (teacher forcing). Parameters ---------- ts: Dataset with features prediction_size: Number of last timestamps to leave after making prediction. Previous timestamps will be used as a context. return_components: If True additionally returns prediction components Returns ------- : Dataset with predictions Raises ------ NotImplementedError: if return_components mode is used ValueError: if model isn't fitted ValueError: if context isn't big enough ValueError: if forecast context contains NaNs """ self._validate_fitted() df = ts.to_pandas() new_df, target_components_df = self._predict( df=df, prediction_size=prediction_size, return_components=return_components ) ts.df = new_df if return_components: ts.add_target_components(target_components_df=target_components_df) return ts
[docs] def params_to_tune(self) -> Dict[str, BaseDistribution]: """Get default grid for tuning hyperparameters. This grid tunes ``window`` parameter. Other parameters are expected to be set by the user. Returns ------- : Grid to tune. """ return {"window": IntDistribution(low=1, high=10)}
__all__ = ["DeadlineMovingAverageModel"]