{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "b5d44943",
   "metadata": {},
   "source": [
    "# TSLib for v2 - Example notebook for full pipeline"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b7d27b55",
   "metadata": {},
   "source": [
    "## Basic imports for getting started\n",
    "\n",
    "This notebook is a basic vignette for the usage of the `tslib` data module on the `TimeXer` model for the v2 of PyTorch Forecasting. This is an experimental version and is an unstable version of the API.\n",
    "\n",
    "Feedback and suggestions on this pipeline - PR [#1836](https://github.com/sktime/pytorch-forecasting/pull/1836)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "550a3fbf",
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "from sklearn.preprocessing import StandardScaler\n",
    "import torch\n",
    "\n",
    "from pytorch_forecasting.data.data_module import TslibDataModule\n",
    "from pytorch_forecasting.data.encoders import (\n",
    "    NaNLabelEncoder,\n",
    "    TorchNormalizer,\n",
    ")\n",
    "from pytorch_forecasting.data.timeseries import TimeSeries\n",
    "from pytorch_forecasting.models.timexer._timexer_v2 import TimeXer"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "2625ed3d",
   "metadata": {},
   "source": [
    "## Construct a time series dataset\n",
    "\n",
    "This step requires us to build a `TimeSeries` object for creating a time series dataset, which identifies the features from a raw time series dataset. As you can see below, we are initialising a sample time series dataset."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "a0058487",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>series_id</th>\n",
       "      <th>time_idx</th>\n",
       "      <th>x</th>\n",
       "      <th>y</th>\n",
       "      <th>category</th>\n",
       "      <th>future_known_feature</th>\n",
       "      <th>static_feature</th>\n",
       "      <th>static_feature_cat</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.177658</td>\n",
       "      <td>0.181124</td>\n",
       "      <td>0</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.409581</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0.181124</td>\n",
       "      <td>0.314081</td>\n",
       "      <td>0</td>\n",
       "      <td>0.995004</td>\n",
       "      <td>0.409581</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>0.314081</td>\n",
       "      <td>0.601934</td>\n",
       "      <td>0</td>\n",
       "      <td>0.980067</td>\n",
       "      <td>0.409581</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>0.601934</td>\n",
       "      <td>0.733805</td>\n",
       "      <td>0</td>\n",
       "      <td>0.955336</td>\n",
       "      <td>0.409581</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "      <td>0.733805</td>\n",
       "      <td>0.768843</td>\n",
       "      <td>0</td>\n",
       "      <td>0.921061</td>\n",
       "      <td>0.409581</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   series_id  time_idx         x         y  category  future_known_feature  \\\n",
       "0          0         0  0.177658  0.181124         0              1.000000   \n",
       "1          0         1  0.181124  0.314081         0              0.995004   \n",
       "2          0         2  0.314081  0.601934         0              0.980067   \n",
       "3          0         3  0.601934  0.733805         0              0.955336   \n",
       "4          0         4  0.733805  0.768843         0              0.921061   \n",
       "\n",
       "   static_feature  static_feature_cat  \n",
       "0        0.409581                   0  \n",
       "1        0.409581                   0  \n",
       "2        0.409581                   0  \n",
       "3        0.409581                   0  \n",
       "4        0.409581                   0  "
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "num_series = 100\n",
    "seq_length = 50\n",
    "data_list = []\n",
    "for i in range(num_series):\n",
    "    x = np.arange(seq_length)\n",
    "    y = np.sin(x / 5.0) + np.random.normal(scale=0.1, size=seq_length)\n",
    "    category = i % 5\n",
    "    static_value = np.random.rand()\n",
    "    for t in range(seq_length - 1):\n",
    "        data_list.append(\n",
    "            {\n",
    "                \"series_id\": i,\n",
    "                \"time_idx\": t,\n",
    "                \"x\": y[t],\n",
    "                \"y\": y[t + 1],\n",
    "                \"category\": category,\n",
    "                \"future_known_feature\": np.cos(t / 10),\n",
    "                \"static_feature\": static_value,\n",
    "                \"static_feature_cat\": i % 3,\n",
    "            }\n",
    "        )\n",
    "data_df = pd.DataFrame(data_list)\n",
    "data_df.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c7c04ff5",
   "metadata": {},
   "source": [
    "## Feature Categories and Definitions\n",
    "\n",
    "### **`time_idx`**\n",
    "- **Definition**: The temporal index column that orders observations chronologically\n",
    "- **Example**: Sequential time steps (0, 1, 2, ...) or timestamps\n",
    "- **Usage**: Identifies the temporal ordering of data points within each time series\n",
    "\n",
    "### **`target`** \n",
    "- **Definition**: The variable you want to predict/forecast\n",
    "- **Example**: Sales volume, stock price, temperature readings\n",
    "- **Usage**: The dependent variable that the model learns to forecast\n",
    "\n",
    "### **`group`**\n",
    "- **Definition**: Categorical variables that identify different time series entities\n",
    "- **Example**: `series_id`, `store_id`, `product_id`, `customer_id`\n",
    "- **Usage**: Distinguishes between multiple time series in the dataset\n",
    "\n",
    "### **`num`**\n",
    "- **Definition**: Numerical/continuous features used as model inputs\n",
    "- **Example**: Price, quantity, weather data, economic indicators  \n",
    "- **Usage**: Continuous variables that provide numerical context for predictions\n",
    "\n",
    "### **`cat`**\n",
    "- **Definition**: Categorical features that represent discrete classes or labels\n",
    "- **Example**: Product category, day of week, seasonal indicators, region\n",
    "- **Usage**: Discrete variables that provide categorical context for predictions\n",
    "\n",
    "### **`known`**\n",
    "- **Definition**: Future values that are known at prediction time (exogenous variables)\n",
    "- **Example**: Holidays, planned promotions, scheduled events, calendar features\n",
    "- **Usage**: Information available for both historical and future periods\n",
    "\n",
    "### **`unknown`**\n",
    "- **Definition**: Variables only available during training/historical periods\n",
    "- **Example**: Past weather conditions, historical prices, competitor actions\n",
    "- **Usage**: Features that help with training but aren't available for future predictions\n",
    "\n",
    "### **`static`**\n",
    "- **Definition**: Time-invariant features that remain constant for each time series\n",
    "- **Example**: Store size, product attributes, geographic location, customer demographics\n",
    "- **Usage**: Entity-specific characteristics that don't change over time"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "89a5adbe",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/aryan/pytorch-forecasting/pytorch_forecasting/data/timeseries/_timeseries_v2.py:105: UserWarning: TimeSeries is part of an experimental rework of the pytorch-forecasting data layer, scheduled for release with v2.0.0. The API is not stable and may change without prior warning. For beta testing, but not for stable production use. Feedback and suggestions are very welcome in pytorch-forecasting issue 1736, https://github.com/sktime/pytorch-forecasting/issues/1736\n",
      "  warn(\n"
     ]
    }
   ],
   "source": [
    "dataset = TimeSeries(\n",
    "    data=data_df,\n",
    "    time=\"time_idx\",\n",
    "    target=\"y\",\n",
    "    group=[\"series_id\"],\n",
    "    num=[\"x\", \"future_know_feature\", \"static_feature\"],\n",
    "    cat=[\"category\", \"static_feature_cat\"],\n",
    "    known=[\"future_known_feature\"],\n",
    "    unknown=[\"x\", \"category\"],\n",
    "    static=[\"static_feature\", \"static_feature_cat\"],\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f8753a6a",
   "metadata": {},
   "source": [
    "## Initialise the `TslibDataModule` using the dataset\n",
    "\n",
    "This steps initialises a basic data module built specially for `tslib` modules and provides all the metadata required to train and implement the `tslib` of your choice!\n",
    "You can refer the implementation for `TslibDataModule` for more information."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "5eae9035",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/aryan/pytorch-forecasting/pytorch_forecasting/data/_tslib_data_module.py:275: UserWarning: TslibDataModule is experimental and subject to change. The API is not stable and may change without prior warning.\n",
      "  warnings.warn(\n"
     ]
    }
   ],
   "source": [
    "data_module = TslibDataModule(\n",
    "    time_series_dataset=dataset,\n",
    "    context_length=30,\n",
    "    prediction_length=1,\n",
    "    add_relative_time_idx=True,\n",
    "    target_normalizer=TorchNormalizer(),\n",
    "    categorical_encoders={\n",
    "        \"category\": NaNLabelEncoder(add_nan=True),\n",
    "        \"static_feature_cat\": NaNLabelEncoder(add_nan=True),\n",
    "    },\n",
    "    scalers={\n",
    "        \"x\": StandardScaler(),\n",
    "        \"future_known_feature\": StandardScaler(),\n",
    "        \"static_feature\": StandardScaler(),\n",
    "    },\n",
    "    batch_size=32,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "b1843233",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'feature_names': {'categorical': ['category', 'static_feature_cat'],\n",
       "  'continuous': ['x', 'future_known_feature', 'static_feature'],\n",
       "  'static': ['static_feature', 'static_feature_cat'],\n",
       "  'known': ['future_known_feature'],\n",
       "  'unknown': ['x', 'category', 'static_feature', 'static_feature_cat'],\n",
       "  'target': ['y'],\n",
       "  'all': ['x',\n",
       "   'category',\n",
       "   'future_known_feature',\n",
       "   'static_feature',\n",
       "   'static_feature_cat'],\n",
       "  'static_categorical': ['static_feature_cat'],\n",
       "  'static_continuous': ['static_feature']},\n",
       " 'feature_indices': {'categorical': [1, 4],\n",
       "  'continuous': [0, 2, 3],\n",
       "  'static': [],\n",
       "  'known': [2],\n",
       "  'unknown': [0, 1, 3, 4],\n",
       "  'target': [0]},\n",
       " 'n_features': {'categorical': 2,\n",
       "  'continuous': 3,\n",
       "  'static': 2,\n",
       "  'known': 1,\n",
       "  'unknown': 4,\n",
       "  'target': 1,\n",
       "  'all': 5,\n",
       "  'static_categorical': 1,\n",
       "  'static_continuous': 1},\n",
       " 'context_length': 30,\n",
       " 'prediction_length': 1,\n",
       " 'freq': 'h',\n",
       " 'features': 'MS'}"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_module.metadata"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "dd9451ee",
   "metadata": {},
   "source": [
    "## Initialise the model\n",
    "\n",
    "We shall try out two versions of this model, one using `MAE()` and one with `QuantileLoss()`.\n",
    "\n",
    "Let us quickly import the required packages for the next steps."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f6b568a5",
   "metadata": {},
   "outputs": [],
   "source": [
    "from pytorch_forecasting.metrics import MAE, SMAPE, QuantileLoss"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "429b5f15",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/aryan/pytorch-forecasting/pytorch_forecasting/models/base/_base_model_v2.py:61: UserWarning: The Model 'TimeXer' is part of an experimental reworkof the pytorch-forecasting model layer, scheduled for release with v2.0.0. The API is not stable and may change without prior warning. This class is intended for beta testing and as a basic skeleton, but not for stable production use. Feedback and suggestions are very welcome in pytorch-forecasting issue 1736, https://github.com/sktime/pytorch-forecasting/issues/1736\n",
      "  warn(\n",
      "/home/aryan/pytorch-forecasting/pytorch_forecasting/models/base/_tslib_base_model_v2.py:60: UserWarning: The Model 'TimeXer' is part of an experimental implementationof the pytorch-forecasting model layer for Time Series Library, scheduledfor release with v2.0.0. The API is not stableand may change without prior warning. This class is intended for betatesting, not for stable production use.\n",
      "  warn(\n",
      "/home/aryan/pytorch-forecasting/pytorch_forecasting/models/timexer/_timexer_v2.py:133: UserWarning: TimeXer is an experimental model implemented on TslibBaseModelV2. It is an unstable version and maybe subject to unannouced changes.Please use with caution. Feedback on the design and implementation iswelcome. On the issue #1833 - https://github.com/sktime/pytorch-forecasting/issues/1833\n",
      "  warn.warn(\n",
      "/home/aryan/pytorch-forecasting/pytorch_forecasting/models/timexer/_timexer_v2.py:179: UserWarning: Context length (30) is not divisible by patch length. This may lead to unexpected behavior, as sometime steps will not be used in the model.\n",
      "  warn.warn(\n"
     ]
    }
   ],
   "source": [
    "model1 = TimeXer(\n",
    "    loss=MAE(),\n",
    "    hidden_size=64,\n",
    "    nhead=4,\n",
    "    e_layers=2,\n",
    "    d_ff=256,\n",
    "    dropout=0.1,\n",
    "    patch_length=4,\n",
    "    logging_metrics=[MAE(), SMAPE()],\n",
    "    optimizer=\"adam\",\n",
    "    optimizer_params={\"lr\": 1e-3},\n",
    "    lr_scheduler=\"reduce_lr_on_plateau\",\n",
    "    lr_scheduler_params={\n",
    "        \"mode\": \"min\",\n",
    "        \"factor\": 0.5,\n",
    "        \"patience\": 5,\n",
    "    },\n",
    "    metadata=data_module.metadata,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "0aa21f48",
   "metadata": {},
   "outputs": [],
   "source": [
    "model2 = TimeXer(\n",
    "    loss=QuantileLoss(quantiles=[0.1, 0.5, 0.9]),  # quantiles of 0.1, 0.5 and 0.9 used.\n",
    "    hidden_size=64,\n",
    "    nhead=4,\n",
    "    e_layers=2,\n",
    "    d_ff=256,\n",
    "    dropout=0.1,\n",
    "    patch_length=4,\n",
    "    optimizer=\"adam\",\n",
    "    optimizer_params={\"lr\": 1e-3},\n",
    "    lr_scheduler=\"reduce_lr_on_plateau\",\n",
    "    lr_scheduler_params={\n",
    "        \"mode\": \"min\",\n",
    "        \"factor\": 0.5,\n",
    "        \"patience\": 5,\n",
    "    },\n",
    "    metadata=data_module.metadata,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "02605f9b",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "GPU available: True (cuda), used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "HPU available: False, using: 0 HPUs\n",
      "GPU available: True (cuda), used: True\n",
      "TPU available: False, using: 0 TPU cores\n",
      "HPU available: False, using: 0 HPUs\n"
     ]
    }
   ],
   "source": [
    "from lightning.pytorch import Trainer\n",
    "\n",
    "trainer1 = Trainer(\n",
    "    max_epochs=5,\n",
    "    accelerator=\"auto\",\n",
    "    devices=1,\n",
    "    enable_progress_bar=True,\n",
    "    enable_model_summary=True,\n",
    ")\n",
    "\n",
    "trainer2 = Trainer(\n",
    "    max_epochs=4,\n",
    "    accelerator=\"auto\",\n",
    "    devices=1,\n",
    "    enable_progress_bar=True,\n",
    "    enable_model_summary=True,\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e22756b2",
   "metadata": {},
   "source": [
    "## Fit the trainer on the model and feed data using the data module"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "6e9117d2",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "You are using a CUDA device ('NVIDIA GeForce RTX 4050 Laptop GPU') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision\n",
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n",
      "\n",
      "  | Name         | Type                   | Params | Mode \n",
      "----------------------------------------------------------------\n",
      "0 | loss         | MAE                    | 0      | train\n",
      "1 | en_embedding | EnEmbedding            | 320    | train\n",
      "2 | ex_embedding | DataEmbedding_inverted | 2.0 K  | train\n",
      "3 | encoder      | Encoder                | 133 K  | train\n",
      "4 | head         | FlattenHead            | 513    | train\n",
      "----------------------------------------------------------------\n",
      "136 K     Trainable params\n",
      "0         Non-trainable params\n",
      "136 K     Total params\n",
      "0.546     Total estimated model params size (MB)\n",
      "57        Modules in train mode\n",
      "0         Modules in eval mode\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "f26d868819404cb0a48cc030aefef48c",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Sanity Checking: |          | 0/? [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/aryan/pytorch-forecasting/.venv/lib/python3.12/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:425: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=15` in the `DataLoader` to improve performance.\n",
      "/home/aryan/pytorch-forecasting/.venv/lib/python3.12/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=15` in the `DataLoader` to improve performance.\n",
      "/home/aryan/pytorch-forecasting/.venv/lib/python3.12/site-packages/lightning/pytorch/loops/fit_loop.py:310: The number of training batches (42) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "db0fdcf01b784ddf8bf0d93fe1cc23e4",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: |          | 0/? [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "26073d89e3014850ba0c4d7f01104047",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Validation: |          | 0/? [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "bf42294210dc4d53a066967bc54b1282",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Validation: |          | 0/? [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "27ed3f4556284bf0a918d22554e8e9c2",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Validation: |          | 0/? [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "0e338f265f554e6d96020135a3f64f64",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Validation: |          | 0/? [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "46bbd28e869740449aad2c7c1aaf5da7",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Validation: |          | 0/? [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "`Trainer.fit` stopped: `max_epochs=5` reached.\n"
     ]
    }
   ],
   "source": [
    "trainer1.fit(model1, data_module)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e8fb4f31",
   "metadata": {},
   "source": [
    "Now let us train the model using `QuantileLoss`."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "3c67d86f",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n",
      "\n",
      "  | Name         | Type                   | Params | Mode \n",
      "----------------------------------------------------------------\n",
      "0 | loss         | QuantileLoss           | 0      | train\n",
      "1 | en_embedding | EnEmbedding            | 320    | train\n",
      "2 | ex_embedding | DataEmbedding_inverted | 2.0 K  | train\n",
      "3 | encoder      | Encoder                | 133 K  | train\n",
      "4 | head         | FlattenHead            | 1.5 K  | train\n",
      "----------------------------------------------------------------\n",
      "137 K     Trainable params\n",
      "0         Non-trainable params\n",
      "137 K     Total params\n",
      "0.550     Total estimated model params size (MB)\n",
      "57        Modules in train mode\n",
      "0         Modules in eval mode\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "acba85c2e99b4dc391315e10471e1e8c",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Sanity Checking: |          | 0/? [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/aryan/pytorch-forecasting/.venv/lib/python3.12/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:425: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=15` in the `DataLoader` to improve performance.\n",
      "/home/aryan/pytorch-forecasting/.venv/lib/python3.12/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=15` in the `DataLoader` to improve performance.\n",
      "/home/aryan/pytorch-forecasting/.venv/lib/python3.12/site-packages/lightning/pytorch/loops/fit_loop.py:310: The number of training batches (42) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "77d8d477ad3040aea894b76ca2df8130",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Training: |          | 0/? [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "f1bbc1f476b046e788d827e5dc02d888",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Validation: |          | 0/? [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "1233ff4c4fbe4bde9e7717c54e28567d",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Validation: |          | 0/? [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "334cb342267b47b1b4e1e2ca1038569f",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Validation: |          | 0/? [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "26260a641c604444811ec1bf00c80edb",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Validation: |          | 0/? [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "`Trainer.fit` stopped: `max_epochs=4` reached.\n"
     ]
    }
   ],
   "source": [
    "trainer2.fit(model2, data_module)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "16e2d445",
   "metadata": {},
   "source": [
    "## Test the model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "dbf1ace6",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n",
      "/home/aryan/pytorch-forecasting/.venv/lib/python3.12/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:425: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=15` in the `DataLoader` to improve performance.\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "d8e51a5bb1564edab4e0d32743a16299",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Testing: |          | 0/? [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────\n",
      "       Test metric             DataLoader 0\n",
      "────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────\n",
      "        test_MAE            0.13894660770893097\n",
      "       test_SMAPE           0.40041154623031616\n",
      "        test_loss           0.13894660770893097\n",
      "────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────\n"
     ]
    }
   ],
   "source": [
    "test_metrics = trainer1.test(model1, data_module)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "250b128a",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "TimeXer(\n",
       "  (loss): MAE()\n",
       "  (en_embedding): EnEmbedding(\n",
       "    (value_embedding): Linear(in_features=4, out_features=64, bias=False)\n",
       "    (position_embedding): PositionalEmbedding()\n",
       "    (dropout): Dropout(p=0.1, inplace=False)\n",
       "  )\n",
       "  (ex_embedding): DataEmbedding_inverted(\n",
       "    (value_embedding): Linear(in_features=30, out_features=64, bias=True)\n",
       "    (dropout): Dropout(p=0.1, inplace=False)\n",
       "  )\n",
       "  (encoder): Encoder(\n",
       "    (layers): ModuleList(\n",
       "      (0-1): 2 x EncoderLayer(\n",
       "        (self_attention): AttentionLayer(\n",
       "          (inner_attention): FullAttention(\n",
       "            (dropout): Dropout(p=0.1, inplace=False)\n",
       "          )\n",
       "          (query_projection): Linear(in_features=64, out_features=64, bias=True)\n",
       "          (key_projection): Linear(in_features=64, out_features=64, bias=True)\n",
       "          (value_projection): Linear(in_features=64, out_features=64, bias=True)\n",
       "          (out_projection): Linear(in_features=64, out_features=64, bias=True)\n",
       "        )\n",
       "        (cross_attention): AttentionLayer(\n",
       "          (inner_attention): FullAttention(\n",
       "            (dropout): Dropout(p=0.1, inplace=False)\n",
       "          )\n",
       "          (query_projection): Linear(in_features=64, out_features=64, bias=True)\n",
       "          (key_projection): Linear(in_features=64, out_features=64, bias=True)\n",
       "          (value_projection): Linear(in_features=64, out_features=64, bias=True)\n",
       "          (out_projection): Linear(in_features=64, out_features=64, bias=True)\n",
       "        )\n",
       "        (conv1): Conv1d(64, 256, kernel_size=(1,), stride=(1,))\n",
       "        (conv2): Conv1d(256, 64, kernel_size=(1,), stride=(1,))\n",
       "        (norm1): LayerNorm((64,), eps=1e-05, elementwise_affine=True)\n",
       "        (norm2): LayerNorm((64,), eps=1e-05, elementwise_affine=True)\n",
       "        (norm3): LayerNorm((64,), eps=1e-05, elementwise_affine=True)\n",
       "        (dropout): Dropout(p=0.1, inplace=False)\n",
       "      )\n",
       "    )\n",
       "    (norm): LayerNorm((64,), eps=1e-05, elementwise_affine=True)\n",
       "  )\n",
       "  (head): FlattenHead(\n",
       "    (flatten): Flatten(start_dim=-2, end_dim=-1)\n",
       "    (linear): Linear(in_features=512, out_features=1, bias=True)\n",
       "    (dropout): Dropout(p=0.1, inplace=False)\n",
       "  )\n",
       ")"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model1.eval()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "f730b49a",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Prediction: tensor([[[ 0.1253]],\n",
      "\n",
      "        [[ 0.2623]],\n",
      "\n",
      "        [[ 0.4591]],\n",
      "\n",
      "        [[ 0.6304]],\n",
      "\n",
      "        [[ 0.7916]],\n",
      "\n",
      "        [[ 0.9132]],\n",
      "\n",
      "        [[ 1.0252]],\n",
      "\n",
      "        [[ 1.1069]],\n",
      "\n",
      "        [[ 1.1370]],\n",
      "\n",
      "        [[ 1.1317]],\n",
      "\n",
      "        [[ 1.0659]],\n",
      "\n",
      "        [[ 0.9617]],\n",
      "\n",
      "        [[ 0.8297]],\n",
      "\n",
      "        [[ 0.6622]],\n",
      "\n",
      "        [[ 0.5254]],\n",
      "\n",
      "        [[ 0.3310]],\n",
      "\n",
      "        [[ 0.1579]],\n",
      "\n",
      "        [[-0.0506]],\n",
      "\n",
      "        [[-0.1999]],\n",
      "\n",
      "        [[ 0.0740]],\n",
      "\n",
      "        [[ 0.2787]],\n",
      "\n",
      "        [[ 0.4506]],\n",
      "\n",
      "        [[ 0.6381]],\n",
      "\n",
      "        [[ 0.7867]],\n",
      "\n",
      "        [[ 0.9343]],\n",
      "\n",
      "        [[ 1.0370]],\n",
      "\n",
      "        [[ 1.1286]],\n",
      "\n",
      "        [[ 1.1737]],\n",
      "\n",
      "        [[ 1.1367]],\n",
      "\n",
      "        [[ 1.0765]],\n",
      "\n",
      "        [[ 0.9569]],\n",
      "\n",
      "        [[ 0.8583]]])\n"
     ]
    }
   ],
   "source": [
    "with torch.no_grad():\n",
    "    test_batch = next(iter(data_module.test_dataloader()))\n",
    "    x_test, y_test = test_batch\n",
    "    y_pred = model1(x_test)\n",
    "\n",
    "    print(\"Prediction:\", y_pred[\"prediction\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "e316c047",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "torch.Size([32, 1, 1])"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y_pred[\"prediction\"].shape"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "a01927d4",
   "metadata": {},
   "source": [
    "Let us do the same for `QuantileLoss` predictions."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "22bd191f",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n",
      "/home/aryan/pytorch-forecasting/.venv/lib/python3.12/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:425: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=15` in the `DataLoader` to improve performance.\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "f29e92e8bae645c0bc6692d18c8bdd1f",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Testing: |          | 0/? [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────\n",
      "       Test metric             DataLoader 0\n",
      "────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────\n",
      "        test_loss           0.07047828286886215\n",
      "────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────\n"
     ]
    }
   ],
   "source": [
    "test_metrics = trainer2.test(model2, data_module)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "a1d857db",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "TimeXer(\n",
       "  (loss): QuantileLoss(quantiles=[0.1, 0.5, 0.9])\n",
       "  (en_embedding): EnEmbedding(\n",
       "    (value_embedding): Linear(in_features=4, out_features=64, bias=False)\n",
       "    (position_embedding): PositionalEmbedding()\n",
       "    (dropout): Dropout(p=0.1, inplace=False)\n",
       "  )\n",
       "  (ex_embedding): DataEmbedding_inverted(\n",
       "    (value_embedding): Linear(in_features=30, out_features=64, bias=True)\n",
       "    (dropout): Dropout(p=0.1, inplace=False)\n",
       "  )\n",
       "  (encoder): Encoder(\n",
       "    (layers): ModuleList(\n",
       "      (0-1): 2 x EncoderLayer(\n",
       "        (self_attention): AttentionLayer(\n",
       "          (inner_attention): FullAttention(\n",
       "            (dropout): Dropout(p=0.1, inplace=False)\n",
       "          )\n",
       "          (query_projection): Linear(in_features=64, out_features=64, bias=True)\n",
       "          (key_projection): Linear(in_features=64, out_features=64, bias=True)\n",
       "          (value_projection): Linear(in_features=64, out_features=64, bias=True)\n",
       "          (out_projection): Linear(in_features=64, out_features=64, bias=True)\n",
       "        )\n",
       "        (cross_attention): AttentionLayer(\n",
       "          (inner_attention): FullAttention(\n",
       "            (dropout): Dropout(p=0.1, inplace=False)\n",
       "          )\n",
       "          (query_projection): Linear(in_features=64, out_features=64, bias=True)\n",
       "          (key_projection): Linear(in_features=64, out_features=64, bias=True)\n",
       "          (value_projection): Linear(in_features=64, out_features=64, bias=True)\n",
       "          (out_projection): Linear(in_features=64, out_features=64, bias=True)\n",
       "        )\n",
       "        (conv1): Conv1d(64, 256, kernel_size=(1,), stride=(1,))\n",
       "        (conv2): Conv1d(256, 64, kernel_size=(1,), stride=(1,))\n",
       "        (norm1): LayerNorm((64,), eps=1e-05, elementwise_affine=True)\n",
       "        (norm2): LayerNorm((64,), eps=1e-05, elementwise_affine=True)\n",
       "        (norm3): LayerNorm((64,), eps=1e-05, elementwise_affine=True)\n",
       "        (dropout): Dropout(p=0.1, inplace=False)\n",
       "      )\n",
       "    )\n",
       "    (norm): LayerNorm((64,), eps=1e-05, elementwise_affine=True)\n",
       "  )\n",
       "  (head): FlattenHead(\n",
       "    (flatten): Flatten(start_dim=-2, end_dim=-1)\n",
       "    (linear): Linear(in_features=512, out_features=3, bias=True)\n",
       "    (dropout): Dropout(p=0.1, inplace=False)\n",
       "  )\n",
       ")"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model2.eval()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "52e2a36a",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Prediction: tensor([[[-0.1025, -0.0489,  0.0900]],\n",
      "\n",
      "        [[ 0.0680,  0.0936,  0.2504]],\n",
      "\n",
      "        [[ 0.2310,  0.2605,  0.4298]],\n",
      "\n",
      "        [[ 0.3604,  0.3968,  0.5679]],\n",
      "\n",
      "        [[ 0.4935,  0.5408,  0.7165]],\n",
      "\n",
      "        [[ 0.6274,  0.6697,  0.8745]],\n",
      "\n",
      "        [[ 0.7192,  0.7940,  0.9812]],\n",
      "\n",
      "        [[ 0.7555,  0.8650,  1.0313]],\n",
      "\n",
      "        [[ 0.7602,  0.8706,  1.0427]],\n",
      "\n",
      "        [[ 0.7532,  0.8524,  1.0308]],\n",
      "\n",
      "        [[ 0.7003,  0.7784,  0.9995]],\n",
      "\n",
      "        [[ 0.5987,  0.6807,  0.9390]],\n",
      "\n",
      "        [[ 0.4757,  0.5814,  0.7966]],\n",
      "\n",
      "        [[ 0.3432,  0.4587,  0.6614]],\n",
      "\n",
      "        [[ 0.1659,  0.2931,  0.5039]],\n",
      "\n",
      "        [[-0.0338,  0.0983,  0.3208]],\n",
      "\n",
      "        [[-0.1989, -0.0829,  0.1821]],\n",
      "\n",
      "        [[-0.3732, -0.2402,  0.0121]],\n",
      "\n",
      "        [[-0.5151, -0.3600, -0.1606]],\n",
      "\n",
      "        [[-0.0789, -0.0406,  0.0908]],\n",
      "\n",
      "        [[ 0.0495,  0.0830,  0.2585]],\n",
      "\n",
      "        [[ 0.2185,  0.2520,  0.4223]],\n",
      "\n",
      "        [[ 0.3870,  0.4209,  0.5818]],\n",
      "\n",
      "        [[ 0.5243,  0.5766,  0.7636]],\n",
      "\n",
      "        [[ 0.6293,  0.6854,  0.8715]],\n",
      "\n",
      "        [[ 0.7055,  0.7854,  0.9698]],\n",
      "\n",
      "        [[ 0.7722,  0.8390,  1.0474]],\n",
      "\n",
      "        [[ 0.8323,  0.9074,  1.0969]],\n",
      "\n",
      "        [[ 0.8132,  0.8968,  1.1051]],\n",
      "\n",
      "        [[ 0.6892,  0.8067,  1.0172]],\n",
      "\n",
      "        [[ 0.5896,  0.7130,  0.9167]],\n",
      "\n",
      "        [[ 0.4989,  0.5976,  0.8067]]])\n"
     ]
    }
   ],
   "source": [
    "with torch.no_grad():\n",
    "    test_batch = next(iter(data_module.test_dataloader()))\n",
    "    x_test, y_test = test_batch\n",
    "    y_pred = model2(x_test)\n",
    "\n",
    "    print(\"Prediction:\", y_pred[\"prediction\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "a4e6e4b1",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "torch.Size([32, 1, 3])"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y_pred[\"prediction\"].shape"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": ".venv (3.12.3)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}