Source code for pytorch_forecasting.models.tide._tide

"""
Implements the TiDE (Time-series Dense Encoder-decoder) model, which is designed for
long-term time-series forecasting.
"""

from copy import copy
from typing import Optional, Union

import torch
from torch import nn

from pytorch_forecasting.data import TimeSeriesDataSet
from pytorch_forecasting.data.encoders import NaNLabelEncoder
from pytorch_forecasting.metrics import MAE, MAPE, MASE, RMSE, SMAPE
from pytorch_forecasting.models.base import BaseModelWithCovariates
from pytorch_forecasting.models.nn.embeddings import MultiEmbedding
from pytorch_forecasting.models.tide.sub_modules import _TideModule



[docs]
class TiDEModel(BaseModelWithCovariates):
    """TiDE model for long-term time-series forecasting."""

    @classmethod
    def _pkg(cls):
        """Package for the model."""
        from pytorch_forecasting.models.tide._tide_pkg import TiDEModel_pkg

        return TiDEModel_pkg


[docs]
    def __init__(
        self,
        output_chunk_length: int,
        input_chunk_length: int,
        num_encoder_layers: int = 2,
        num_decoder_layers: int = 2,
        decoder_output_dim: int = 16,
        hidden_size: int = 128,
        temporal_width_future: int = 4,
        temporal_hidden_size_future: int = 32,
        temporal_decoder_hidden: int = 32,
        use_layer_norm: bool = False,
        dropout: float = 0.1,
        output_size: int | list[int] = 1,
        static_categoricals: list[str] | None = None,
        static_reals: list[str] | None = None,
        time_varying_categoricals_encoder: list[str] | None = None,
        time_varying_categoricals_decoder: list[str] | None = None,
        categorical_groups: dict[str, list[str]] | None = None,
        time_varying_reals_encoder: list[str] | None = None,
        time_varying_reals_decoder: list[str] | None = None,
        embedding_sizes: dict[str, tuple[int, int]] | None = None,
        embedding_paddings: list[str] | None = None,
        embedding_labels: list[str] | None = None,
        x_reals: list[str] | None = None,
        x_categoricals: list[str] | None = None,
        logging_metrics: nn.ModuleList = None,
        **kwargs,
    ):
        """An implementation of the TiDE model.

        TiDE shares similarities with Transformers
        (implemented in :class:TransformerModel), but aims to deliver better performance
        with reduced computational requirements by utilizing MLP-based encoder-decoder
        architectures without attention mechanisms.

        This model supports future covariates (known for output_chunk_length points
        after the prediction time) andstatic covariates.

        The encoder and decoder are constructed using residual blocks. The number of
        residual blocks in the encoder and decoder can be specified with
        `num_encoder_layers` and `num_decoder_layers` respectively. The layer width in
        the residual blocks can be adjusted using `hidden_size`, while the layer width
        in the temporal decoder can be controlled via `temporal_decoder_hidden`.

        Parameters
        ----------
        input_chunk_length :int
            Number of past time steps to use as input for themodel (per chunk).
            This applies to the target series and future covariates
            (if supported by the model).
        output_chunk_length : int
            Number of time steps the internal model predicts simultaneously (per chunk).
            This also determines how many future values from future covariates
            are used as input (if supported by the model).
        num_encoder_layers : int, default=2
            Number of residual blocks in the encoder
        num_decoder_layers : int, default=2
            Number of residual blocks in the decoder
        decoder_output_dim : int, default=16
            Dimensionality of the decoder's output
        hidden_size : int, default=128
            Size of hidden layers in the encoder and decoder.
            Typically ranges from 32 to 128 when no covariates are used.
        temporal_width_future (int): Width of the output layer in the residual block for future covariate projections.
            If set to 0, bypasses feature projection and uses raw feature data. Defaults to 4.
        temporal_hidden_size_future (int): Width of the hidden layer in the residual block for future covariate
            projections. Defaults to 32.
        temporal_decoder_hidden (int): Width of the layers in the temporal decoder. Defaults to 32.
        use_layer_norm (bool): Whether to apply layer normalization in residual blocks. Defaults to False.
        dropout (float): Dropout probability for fully connected layers. Defaults to 0.1.
        output_size: Union[int, List[int]]: included as its required by deduce_default_output_parameters in
            from_dataset function. Defaults to 1.
        static_categoricals (List[str]): names of static categorical variables
        static_reals (List[str]): names of static continuous variables
        time_varying_categoricals_encoder (List[str]): names of categorical variables for encoder
        time_varying_categoricals_decoder (List[str]): names of categorical variables for decoder
        time_varying_reals_encoder (List[str]): names of continuous variables for encoder
        time_varying_reals_decoder (List[str]): names of continuous variables for decoder
        x_reals (List[str]): order of continuous variables in tensor passed to forward function
        x_categoricals (List[str]): order of categorical variables in tensor passed to forward function
        embedding_sizes (Dict[str, Tuple[int, int]]): dictionary mapping categorical variables to tuple of integers
            where the first integer denotes the number of categorical classes and the second the embedding size
        embedding_labels (Dict[str, List[str]]): dictionary mapping (string) indices to list of categorical labels
        embedding_paddings (List[str]): names of categorical variables for which label 0 is always mapped to an
            embedding vector filled with zeros
        categorical_groups (Dict[str, List[str]]): dictionary of categorical variables that are grouped together and
            can also take multiple values simultaneously (e.g. holiday during octoberfest). They should be implemented
            as bag of embeddings
        logging_metrics (nn.ModuleList[MultiHorizonMetric]): list of metrics that are logged during training.
            Defaults to nn.ModuleList([SMAPE(), MAE(), RMSE(), MAPE(), MASE()])
        **kwargs
            Allows optional arguments to configure pytorch_lightning.Module, pytorch_lightning.Trainer, and
            pytorch-forecasting's :class:BaseModelWithCovariates.

        Note:
            The model supports future covariates and static covariates.
        """  # noqa: E501
        if static_categoricals is None:
            static_categoricals = []
        if static_reals is None:
            static_reals = []
        if time_varying_categoricals_encoder is None:
            time_varying_categoricals_encoder = []
        if time_varying_categoricals_decoder is None:
            time_varying_categoricals_decoder = []
        if categorical_groups is None:
            categorical_groups = {}
        if time_varying_reals_encoder is None:
            time_varying_reals_encoder = []
        if time_varying_reals_decoder is None:
            time_varying_reals_decoder = []
        if embedding_sizes is None:
            embedding_sizes = {}
        if embedding_paddings is None:
            embedding_paddings = []
        if embedding_labels is None:
            embedding_labels = {}
        if x_reals is None:
            x_reals = []
        if x_categoricals is None:
            x_categoricals = []
        if logging_metrics is None:
            logging_metrics = nn.ModuleList([SMAPE(), MAE(), RMSE(), MAPE(), MASE()])

        # loss and logging_metrics are ignored as they are modules
        # and stored before calling save_hyperparameters
        self.save_hyperparameters(ignore=["loss", "logging_metrics"])
        super().__init__(logging_metrics=logging_metrics, **kwargs)
        self.output_dim = len(self.target_names)

        self.embeddings = MultiEmbedding(
            embedding_sizes=self.hparams.embedding_sizes,
            categorical_groups=self.hparams.categorical_groups,
            embedding_paddings=self.hparams.embedding_paddings,
            x_categoricals=self.hparams.x_categoricals,
        )

        self.model = _TideModule(
            output_dim=self.output_dim,
            future_cov_dim=self.encoder_covariate_size,
            static_cov_dim=self.static_size,
            output_chunk_length=output_chunk_length,
            input_chunk_length=input_chunk_length,
            num_encoder_layers=num_encoder_layers,
            num_decoder_layers=num_decoder_layers,
            decoder_output_dim=decoder_output_dim,
            hidden_size=hidden_size,
            temporal_decoder_hidden=temporal_decoder_hidden,
            temporal_width_future=temporal_width_future,
            use_layer_norm=use_layer_norm,
            dropout=dropout,
            temporal_hidden_size_future=temporal_hidden_size_future,
        )


    @property
    def decoder_covariate_size(self) -> int:
        """Decoder covariates size.

        Returns:
            int: size of time-dependent covariates used by the decoder
        """
        return len(
            set(self.hparams.time_varying_reals_decoder) - set(self.target_names)
        ) + sum(
            self.embeddings.output_size[name]
            for name in self.hparams.time_varying_categoricals_decoder
        )

    @property
    def encoder_covariate_size(self) -> int:
        """Encoder covariate size.

        Returns:
            int: size of time-dependent covariates used by the encoder
        """
        return len(
            set(self.hparams.time_varying_reals_encoder) - set(self.target_names)
        ) + sum(
            self.embeddings.output_size[name]
            for name in self.hparams.time_varying_categoricals_encoder
        )

    @property
    def static_size(self) -> int:
        """Static covariate size.

        Returns:
            int: size of static covariates
        """
        return len(self.hparams.static_reals) + sum(
            self.embeddings.output_size[name]
            for name in self.hparams.static_categoricals
        )

    @classmethod
    def from_dataset(cls, dataset: TimeSeriesDataSet, **kwargs):
        """
        Convenience function to create network from
        :py:class`~pytorch_forecasting.data.timeseries.TimeSeriesDataSet`.

        Args:
            dataset (TimeSeriesDataSet): dataset where sole predictor is the target.
            **kwargs: additional arguments to be passed to `__init__` method.

        Returns:
            TiDE
        """

        # validate arguments
        assert not isinstance(
            dataset.target_normalizer, NaNLabelEncoder
        ), "only regression tasks are supported - target must not be categorical"

        assert dataset.min_encoder_length == dataset.max_encoder_length, (
            "only fixed encoder length is allowed,"
            " but min_encoder_length != max_encoder_length"
        )

        assert dataset.max_prediction_length == dataset.min_prediction_length, (
            "only fixed prediction length is allowed,"
            " but max_prediction_length != min_prediction_length"
        )

        assert (
            dataset.randomize_length is None
        ), "length has to be fixed, but randomize_length is not None"
        assert (
            not dataset.add_relative_time_idx
        ), "add_relative_time_idx has to be False"

        new_kwargs = copy(kwargs)
        new_kwargs.update(
            {
                "output_chunk_length": dataset.max_prediction_length,
                "input_chunk_length": dataset.max_encoder_length,
            }
        )
        new_kwargs.update(cls.deduce_default_output_parameters(dataset, kwargs, MAE()))
        # initialize class
        return super().from_dataset(dataset, **new_kwargs)

    def forward(self, x: dict[str, torch.Tensor]) -> dict[str, torch.Tensor]:
        """
        Pass forward of network.

        Args:
            x (Dict[str, torch.Tensor]): input from dataloader generated from
                :py:class:~pytorch_forecasting.data.timeseries.TimeSeriesDataSet.

        Returns:
            Dict[str, torch.Tensor]: output of model
        """

        # target
        encoder_y = x["encoder_cont"][..., self.target_positions]
        # covariates
        encoder_features = self.extract_features(x, self.embeddings, period="encoder")

        if self.encoder_covariate_size > 0:
            # encoder_features = self.extract_features(
            #                   x, self.embeddings, period="encoder")
            encoder_x_t = torch.concat(
                [
                    encoder_features[name]
                    for name in self.encoder_variables
                    if name not in self.target_names
                ],
                dim=2,
            )
            input_vector = torch.concat((encoder_y, encoder_x_t), dim=2)

        else:
            encoder_x_t = None
            input_vector = encoder_y

        if self.decoder_covariate_size > 0:
            decoder_features = self.extract_features(
                x, self.embeddings, period="decoder"
            )
            decoder_x_t = torch.concat(
                [decoder_features[name] for name in self.decoder_variables], dim=2
            )
        else:
            decoder_x_t = None

        # statics
        if self.static_size > 0:
            x_s = torch.concat(
                [encoder_features[name][:, 0] for name in self.static_variables], dim=1
            )
        else:
            x_s = None

        x_in = (input_vector, decoder_x_t, x_s)
        prediction = self.model(x_in)

        if self.output_dim > 1:  # for multivariate targets
            # adjust prefictions dimensions according
            # to format required for consequent processes
            # from (batch size, seq len, output_dim) to
            # (output_dim, batch size, seq len)
            prediction = prediction.permute(2, 0, 1)
            prediction = [i.clone().detach().requires_grad_(True) for i in prediction]

        # rescale predictions into target space
        prediction = self.transform_output(prediction, target_scale=x["target_scale"])
        # transform output to format processed by other functions
        return self.to_network_output(prediction=prediction)