Source code for pytorch_forecasting.models.tide._tide

"""
Implements the TiDE (Time-series Dense Encoder-decoder) model, which is designed for
long-term time-series forecasting.
"""

from copy import copy
from typing import Optional, Union

import torch
from torch import nn

from pytorch_forecasting.data import TimeSeriesDataSet
from pytorch_forecasting.data.encoders import NaNLabelEncoder
from pytorch_forecasting.metrics import MAE, MAPE, MASE, RMSE, SMAPE
from pytorch_forecasting.models.base import BaseModelWithCovariates
from pytorch_forecasting.models.nn.embeddings import MultiEmbedding
from pytorch_forecasting.models.tide.sub_modules import _TideModule


[docs] class TiDEModel(BaseModelWithCovariates): """TiDE model for long-term time-series forecasting.""" @classmethod def _pkg(cls): """Package for the model.""" from pytorch_forecasting.models.tide._tide_pkg import TiDEModel_pkg return TiDEModel_pkg def __init__( self, output_chunk_length: int, input_chunk_length: int, num_encoder_layers: int = 2, num_decoder_layers: int = 2, decoder_output_dim: int = 16, hidden_size: int = 128, temporal_width_future: int = 4, temporal_hidden_size_future: int = 32, temporal_decoder_hidden: int = 32, use_layer_norm: bool = False, dropout: float = 0.1, output_size: int | list[int] = 1, static_categoricals: list[str] | None = None, static_reals: list[str] | None = None, time_varying_categoricals_encoder: list[str] | None = None, time_varying_categoricals_decoder: list[str] | None = None, categorical_groups: dict[str, list[str]] | None = None, time_varying_reals_encoder: list[str] | None = None, time_varying_reals_decoder: list[str] | None = None, embedding_sizes: dict[str, tuple[int, int]] | None = None, embedding_paddings: list[str] | None = None, embedding_labels: list[str] | None = None, x_reals: list[str] | None = None, x_categoricals: list[str] | None = None, logging_metrics: nn.ModuleList = None, **kwargs, ): """An implementation of the TiDE model. TiDE shares similarities with Transformers (implemented in :class:TransformerModel), but aims to deliver better performance with reduced computational requirements by utilizing MLP-based encoder-decoder architectures without attention mechanisms. This model supports future covariates (known for output_chunk_length points after the prediction time) andstatic covariates. The encoder and decoder are constructed using residual blocks. The number of residual blocks in the encoder and decoder can be specified with `num_encoder_layers` and `num_decoder_layers` respectively. The layer width in the residual blocks can be adjusted using `hidden_size`, while the layer width in the temporal decoder can be controlled via `temporal_decoder_hidden`. Parameters ---------- input_chunk_length :int Number of past time steps to use as input for themodel (per chunk). This applies to the target series and future covariates (if supported by the model). output_chunk_length : int Number of time steps the internal model predicts simultaneously (per chunk). This also determines how many future values from future covariates are used as input (if supported by the model). num_encoder_layers : int, default=2 Number of residual blocks in the encoder num_decoder_layers : int, default=2 Number of residual blocks in the decoder decoder_output_dim : int, default=16 Dimensionality of the decoder's output hidden_size : int, default=128 Size of hidden layers in the encoder and decoder. Typically ranges from 32 to 128 when no covariates are used. temporal_width_future (int): Width of the output layer in the residual block for future covariate projections. If set to 0, bypasses feature projection and uses raw feature data. Defaults to 4. temporal_hidden_size_future (int): Width of the hidden layer in the residual block for future covariate projections. Defaults to 32. temporal_decoder_hidden (int): Width of the layers in the temporal decoder. Defaults to 32. use_layer_norm (bool): Whether to apply layer normalization in residual blocks. Defaults to False. dropout (float): Dropout probability for fully connected layers. Defaults to 0.1. output_size: Union[int, List[int]]: included as its required by deduce_default_output_parameters in from_dataset function. Defaults to 1. static_categoricals (List[str]): names of static categorical variables static_reals (List[str]): names of static continuous variables time_varying_categoricals_encoder (List[str]): names of categorical variables for encoder time_varying_categoricals_decoder (List[str]): names of categorical variables for decoder time_varying_reals_encoder (List[str]): names of continuous variables for encoder time_varying_reals_decoder (List[str]): names of continuous variables for decoder x_reals (List[str]): order of continuous variables in tensor passed to forward function x_categoricals (List[str]): order of categorical variables in tensor passed to forward function embedding_sizes (Dict[str, Tuple[int, int]]): dictionary mapping categorical variables to tuple of integers where the first integer denotes the number of categorical classes and the second the embedding size embedding_labels (Dict[str, List[str]]): dictionary mapping (string) indices to list of categorical labels embedding_paddings (List[str]): names of categorical variables for which label 0 is always mapped to an embedding vector filled with zeros categorical_groups (Dict[str, List[str]]): dictionary of categorical variables that are grouped together and can also take multiple values simultaneously (e.g. holiday during octoberfest). They should be implemented as bag of embeddings logging_metrics (nn.ModuleList[MultiHorizonMetric]): list of metrics that are logged during training. Defaults to nn.ModuleList([SMAPE(), MAE(), RMSE(), MAPE(), MASE()]) **kwargs Allows optional arguments to configure pytorch_lightning.Module, pytorch_lightning.Trainer, and pytorch-forecasting's :class:BaseModelWithCovariates. Note: The model supports future covariates and static covariates. """ # noqa: E501 if static_categoricals is None: static_categoricals = [] if static_reals is None: static_reals = [] if time_varying_categoricals_encoder is None: time_varying_categoricals_encoder = [] if time_varying_categoricals_decoder is None: time_varying_categoricals_decoder = [] if categorical_groups is None: categorical_groups = {} if time_varying_reals_encoder is None: time_varying_reals_encoder = [] if time_varying_reals_decoder is None: time_varying_reals_decoder = [] if embedding_sizes is None: embedding_sizes = {} if embedding_paddings is None: embedding_paddings = [] if embedding_labels is None: embedding_labels = {} if x_reals is None: x_reals = [] if x_categoricals is None: x_categoricals = [] if logging_metrics is None: logging_metrics = nn.ModuleList([SMAPE(), MAE(), RMSE(), MAPE(), MASE()]) # loss and logging_metrics are ignored as they are modules # and stored before calling save_hyperparameters self.save_hyperparameters(ignore=["loss", "logging_metrics"]) super().__init__(logging_metrics=logging_metrics, **kwargs) self.output_dim = len(self.target_names) self.embeddings = MultiEmbedding( embedding_sizes=self.hparams.embedding_sizes, categorical_groups=self.hparams.categorical_groups, embedding_paddings=self.hparams.embedding_paddings, x_categoricals=self.hparams.x_categoricals, ) self.model = _TideModule( output_dim=self.output_dim, future_cov_dim=self.encoder_covariate_size, static_cov_dim=self.static_size, output_chunk_length=output_chunk_length, input_chunk_length=input_chunk_length, num_encoder_layers=num_encoder_layers, num_decoder_layers=num_decoder_layers, decoder_output_dim=decoder_output_dim, hidden_size=hidden_size, temporal_decoder_hidden=temporal_decoder_hidden, temporal_width_future=temporal_width_future, use_layer_norm=use_layer_norm, dropout=dropout, temporal_hidden_size_future=temporal_hidden_size_future, ) @property def decoder_covariate_size(self) -> int: """Decoder covariates size. Returns: int: size of time-dependent covariates used by the decoder """ return len( set(self.hparams.time_varying_reals_decoder) - set(self.target_names) ) + sum( self.embeddings.output_size[name] for name in self.hparams.time_varying_categoricals_decoder ) @property def encoder_covariate_size(self) -> int: """Encoder covariate size. Returns: int: size of time-dependent covariates used by the encoder """ return len( set(self.hparams.time_varying_reals_encoder) - set(self.target_names) ) + sum( self.embeddings.output_size[name] for name in self.hparams.time_varying_categoricals_encoder ) @property def static_size(self) -> int: """Static covariate size. Returns: int: size of static covariates """ return len(self.hparams.static_reals) + sum( self.embeddings.output_size[name] for name in self.hparams.static_categoricals )
[docs] @classmethod def from_dataset(cls, dataset: TimeSeriesDataSet, **kwargs): """ Convenience function to create network from :py:class`~pytorch_forecasting.data.timeseries.TimeSeriesDataSet`. Args: dataset (TimeSeriesDataSet): dataset where sole predictor is the target. **kwargs: additional arguments to be passed to `__init__` method. Returns: TiDE """ # validate arguments assert not isinstance( dataset.target_normalizer, NaNLabelEncoder ), "only regression tasks are supported - target must not be categorical" assert dataset.min_encoder_length == dataset.max_encoder_length, ( "only fixed encoder length is allowed," " but min_encoder_length != max_encoder_length" ) assert dataset.max_prediction_length == dataset.min_prediction_length, ( "only fixed prediction length is allowed," " but max_prediction_length != min_prediction_length" ) assert ( dataset.randomize_length is None ), "length has to be fixed, but randomize_length is not None" assert ( not dataset.add_relative_time_idx ), "add_relative_time_idx has to be False" new_kwargs = copy(kwargs) new_kwargs.update( { "output_chunk_length": dataset.max_prediction_length, "input_chunk_length": dataset.max_encoder_length, } ) new_kwargs.update(cls.deduce_default_output_parameters(dataset, kwargs, MAE())) # initialize class return super().from_dataset(dataset, **new_kwargs)
[docs] def forward(self, x: dict[str, torch.Tensor]) -> dict[str, torch.Tensor]: """ Pass forward of network. Args: x (Dict[str, torch.Tensor]): input from dataloader generated from :py:class:~pytorch_forecasting.data.timeseries.TimeSeriesDataSet. Returns: Dict[str, torch.Tensor]: output of model """ # target encoder_y = x["encoder_cont"][..., self.target_positions] # covariates encoder_features = self.extract_features(x, self.embeddings, period="encoder") if self.encoder_covariate_size > 0: # encoder_features = self.extract_features( # x, self.embeddings, period="encoder") encoder_x_t = torch.concat( [ encoder_features[name] for name in self.encoder_variables if name not in self.target_names ], dim=2, ) input_vector = torch.concat((encoder_y, encoder_x_t), dim=2) else: encoder_x_t = None input_vector = encoder_y if self.decoder_covariate_size > 0: decoder_features = self.extract_features( x, self.embeddings, period="decoder" ) decoder_x_t = torch.concat( [decoder_features[name] for name in self.decoder_variables], dim=2 ) else: decoder_x_t = None # statics if self.static_size > 0: x_s = torch.concat( [encoder_features[name][:, 0] for name in self.static_variables], dim=1 ) else: x_s = None x_in = (input_vector, decoder_x_t, x_s) prediction = self.model(x_in) if self.output_dim > 1: # for multivariate targets # adjust prefictions dimensions according # to format required for consequent processes # from (batch size, seq len, output_dim) to # (output_dim, batch size, seq len) prediction = prediction.permute(2, 0, 1) prediction = [i.clone().detach().requires_grad_(True) for i in prediction] # rescale predictions into target space prediction = self.transform_output(prediction, target_scale=x["target_scale"]) # transform output to format processed by other functions return self.to_network_output(prediction=prediction)