{ "cells": [ { "cell_type": "markdown", "id": "30b18a7c-b40c-4b53-b1cc-90eb0632e6d3", "metadata": {}, "source": [ "# Transformer-Architecture for the prediction of energy consumption data over 48 hours\n", "### 1 - Transformer-Architecture with Energy consumption data and weather data\n", "### 2 - Transformer-Architecture with Energy consumption data and 2 variables 'Lastgang_Moving_Average' and 'Lastgang_First_Difference'" ] }, { "cell_type": "code", "execution_count": 18, "id": "db9e4cca-6b76-41b9-a8c3-645bc7bd97de", "metadata": {}, "outputs": [], "source": [ "#Install all required packages with 'conda install NAME' or with pip install NAME'\n", "# pandas\n", "# numpy\n", "# matplotlib\n", "# scikit-learn\n", "# torch\n", "# gputil\n", "# psutil\n", "# torchsummary" ] }, { "cell_type": "code", "execution_count": 19, "id": "abe044e7-e328-433e-8c33-adf3b8442863", "metadata": {}, "outputs": [], "source": [ "#Import all required libraries\n", "import sys # Provides access to some variables used or maintained by the interpreter\n", "import pandas as pd # Library for data manipulation and analysis, ideal for working with structured data like tables\n", "import numpy as np # For scientific computing, supports large, multi-dimensional arrays and matrices\n", "import matplotlib.pyplot as plt # For creating static, interactive, and animated visualizations\n", "import matplotlib.dates as mdates # Provides classes for manipulating dates in plots\n", "from pathlib import Path # Used for filesystem path manipulation in an object-oriented way\n", "import torch # Scientific computing library for ML and neural networks\n", "import torch.nn as nn # Module in PyTorch providing various layers and parameters for neural networks, facilitates building and training\n", "from sklearn.metrics import mean_squared_error, mean_absolute_error # For calculating key regression metrics\n", "from sklearn.preprocessing import MinMaxScaler # For scaling and normalizing features, often useful in data preprocessing\n", "import os # Provides a way of using operating system dependent functionality\n", "import GPUtil # For monitoring GPU utilization\n", "from datetime import datetime # For handling dates and times\n", "import psutil # For accessing system details and process utilities\n", "from torchsummary import summary # Import summary from torchsummary to display a summary of model layers and parameters" ] }, { "cell_type": "code", "execution_count": 20, "id": "cabbab8f-0d40-44ab-975e-0f18ea6056db", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "/home/sarah/anaconda3/envs/BT2024PyTorch/bin/python\n" ] } ], "source": [ "#To display the current environment\n", "print(sys.executable)" ] }, { "cell_type": "code", "execution_count": 21, "id": "944e31d4-e885-4fa7-ab6c-e2a2a3bc14e7", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "First and last rows from dfClimaAll:\n", " dy Globalstrahlung_15Min Sonnenhöhe ExtraterrestrischeStrahlung \\\n", "0 1 0 -65.0 0 \n", "1 1 0 -65.0 0 \n", "2 1 0 -65.4 0 \n", "3 1 0 -65.5 0 \n", "4 1 0 -65.2 0 \n", "\n", " StundenwertStrahlung Diffusstrahlung StrahlungGeneigteFläche \\\n", "0 0 0 0 \n", "1 0 0 0 \n", "2 0 0 0 \n", "3 0 0 0 \n", "4 0 0 0 \n", "\n", " DiffusstrahlungGeneigteFläche Direktnormalstrahlung Lufttemperatur \\\n", "0 0 0 2.5 \n", "1 0 0 2.5 \n", "2 0 0 2.5 \n", "3 0 0 2.5 \n", "4 0 0 2.4 \n", "\n", " Windgeschwindigkeit Schönwetterstrahlung Taupunkttemperatur TheorPVProd \\\n", "0 1.2 0 0.8 0 \n", "1 1.2 0 0.8 0 \n", "2 1.2 0 0.8 0 \n", "3 1.1 0 0.8 0 \n", "4 1.1 0 0.8 0 \n", "\n", " TimestampWeather \n", "0 2021-01-01 00:00:00 \n", "1 2021-01-01 00:15:00 \n", "2 2021-01-01 00:30:00 \n", "3 2021-01-01 00:45:00 \n", "4 2021-01-01 01:00:00 \n", " dy Globalstrahlung_15Min Sonnenhöhe ExtraterrestrischeStrahlung \\\n", "35035 365 0 -56.5 0 \n", "35036 365 0 -58.5 0 \n", "35037 365 0 -60.3 0 \n", "35038 365 0 -61.9 0 \n", "35039 365 0 -63.3 0 \n", "\n", " StundenwertStrahlung Diffusstrahlung StrahlungGeneigteFläche \\\n", "35035 0 0 0 \n", "35036 0 0 0 \n", "35037 0 0 0 \n", "35038 0 0 0 \n", "35039 0 0 0 \n", "\n", " DiffusstrahlungGeneigteFläche Direktnormalstrahlung Lufttemperatur \\\n", "35035 0 0 5.5 \n", "35036 0 0 5.5 \n", "35037 0 0 5.5 \n", "35038 0 0 5.5 \n", "35039 0 0 4.9 \n", "\n", " Windgeschwindigkeit Schönwetterstrahlung Taupunkttemperatur \\\n", "35035 2.4 0 -18.2 \n", "35036 2.4 0 -18.2 \n", "35037 2.4 0 -18.2 \n", "35038 2.4 0 -18.2 \n", "35039 1.8 0 -14.0 \n", "\n", " TheorPVProd TimestampWeather \n", "35035 0 2023-12-31 22:45:00 \n", "35036 0 2023-12-31 23:00:00 \n", "35037 0 2023-12-31 23:15:00 \n", "35038 0 2023-12-31 23:30:00 \n", "35039 0 2023-12-31 23:45:00 \n", "Number of missing values per column:\n", "dy 0\n", "Globalstrahlung_15Min 0\n", "Sonnenhöhe 0\n", "ExtraterrestrischeStrahlung 0\n", "StundenwertStrahlung 0\n", "Diffusstrahlung 0\n", "StrahlungGeneigteFläche 0\n", "DiffusstrahlungGeneigteFläche 0\n", "Direktnormalstrahlung 0\n", "Lufttemperatur 0\n", "Windgeschwindigkeit 0\n", "Schönwetterstrahlung 0\n", "Taupunkttemperatur 0\n", "TheorPVProd 0\n", "TimestampWeather 0\n", "dtype: int64\n", "Number of rows in dfClimaAll: 105120\n" ] } ], "source": [ "# Step 1 - Reading Data\n", "\n", "## dfClimaAll: Data from 2021-2023\n", "## dfEnergyAll: All data from the transformer station from 2021-2023\n", "\n", "# Load data for 2021\n", "dfClima21 = pd.read_excel('/home/sarah/Documents/BT2024/Weather_Data/2021TimeWeather.xlsx')\n", "\n", "# Load data for 2022\n", "dfClima22 = pd.read_excel('/home/sarah/Documents/BT2024/Weather_Data/2022TimeWeather.xlsx')\n", "\n", "# Load data for 2023\n", "dfClima23 = pd.read_excel('/home/sarah/Documents/BT2024/Weather_Data/2023TimeWeather.xlsx')\n", "\n", "# Merge the DataFrames\n", "dfClimaAll = pd.concat([dfClima21, dfClima22, dfClima23])\n", "\n", "# Check the resulting DataFrame\n", "print(\"First and last rows from dfClimaAll:\")\n", "print(dfClimaAll.head())\n", "print(dfClimaAll.tail())\n", "\n", "# Check for missing values in each column\n", "## .isnull(): Returns DF where values with NaN are replaced by True & values without NaN are replaced by False\n", "## sum(): Sums up the number of True values\n", "print(\"Number of missing values per column:\")\n", "print(dfClimaAll.isnull().sum())\n", "\n", "# Convert 'Timestamp' into a Datetime object and set it as index, if not already set\n", "if 'TimestampWeather' in dfClimaAll.columns:\n", " ## pd.to_datetime: Converts values into Datetime objects —> Pandas can then treat these as timestamps\n", " dfClimaAll['TimestampWeather'] = pd.to_datetime(dfClimaAll['TimestampWeather'])\n", " ## set_index: Sets index for the Timestamp column to facilitate access to data based on time points\n", " dfClimaAll = dfClimaAll.set_index('TimestampWeather')\n", "\n", "# Number of rows in dfClimaAll\n", "print(\"Number of rows in dfClimaAll:\", dfClimaAll.shape[0])" ] }, { "cell_type": "code", "execution_count": 22, "id": "dd18e843-0297-4813-a58b-4a448f63c446", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "First and last rows from dfEnergyAll:\n", " Lastgang\n", "Timestamp \n", "2021-01-01 00:00:00 472.88\n", "2021-01-01 00:15:00 498.83\n", "2021-01-01 00:30:00 480.48\n", "2021-01-01 00:45:00 446.74\n", "2021-01-01 01:00:00 459.55\n", " Lastgang\n", "Timestamp \n", "2023-12-31 22:45:00 833.17\n", "2023-12-31 23:00:00 767.89\n", "2023-12-31 23:15:00 770.04\n", "2023-12-31 23:30:00 730.45\n", "2023-12-31 23:45:00 744.21\n", "Number of rows in dfEnergyAll: 105108\n" ] } ], "source": [ "# Step 1 - Reading Data\n", "\n", "## dfClimaAll: Data from 2021-2023\n", "## dfEnergyAll: All data from the transformer station from 2021-2023\n", "\n", "# Path to the directory containing the Excel files\n", "directory_path = Path('/home/sarah/Documents/BT2024/All')\n", "\n", "# Create a list of all Excel files in the directory\n", "file_paths = list(directory_path.glob('*.xlsx'))\n", "\n", "# List to store the individual DataFrames\n", "dfs = []\n", "\n", "# Loop over all file paths\n", "for file_path in file_paths:\n", " # Read the Excel file\n", " df = pd.read_excel(file_path)\n", " \n", " # Convert the 'Timestamp' column to a datetime\n", " df['Timestamp'] = pd.to_datetime(df['Timestamp'])\n", " \n", " # Convert the 'Lastgang' column to a numeric type, treat errors as NaN\n", " df['Lastgang'] = pd.to_numeric(df['Lastgang'], errors='coerce')\n", " \n", " # Sort the DataFrame by 'Timestamp'\n", " df = df.sort_values(by='Timestamp')\n", " \n", " # Perform linear interpolation for 'Lastgang' on the individual DataFrame\n", " df['Lastgang'] = df['Lastgang'].interpolate(method='linear')\n", " \n", " # Add the DataFrame to the list\n", " dfs.append(df)\n", "\n", "# Merge all DataFrames in the list\n", "dfEnergyAll = pd.concat(dfs).set_index('Timestamp')\n", "\n", "# Sum the 'Lastgang' values for identical timestamps\n", "dfEnergyAll = dfEnergyAll.groupby('Timestamp').sum()\n", "\n", "# Check the resulting DataFrame\n", "print(\"First and last rows from dfEnergyAll:\")\n", "print(dfEnergyAll.head())\n", "print(dfEnergyAll.tail())\n", "\n", "# Display the number of rows in dfEnergyAll\n", "print(\"Number of rows in dfEnergyAll:\", dfEnergyAll.shape[0])" ] }, { "cell_type": "code", "execution_count": 23, "id": "f57c59a7-8588-400b-a65e-7798892b3bff", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Missing timestamps in dfEnergyAll:\n", "DatetimeIndex(['2021-03-28 02:00:00', '2021-03-28 02:15:00',\n", " '2021-03-28 02:30:00', '2021-03-28 02:45:00',\n", " '2022-03-27 02:00:00', '2022-03-27 02:15:00',\n", " '2022-03-27 02:30:00', '2022-03-27 02:45:00',\n", " '2023-03-26 02:00:00', '2023-03-26 02:15:00',\n", " '2023-03-26 02:30:00', '2023-03-26 02:45:00'],\n", " dtype='datetime64[ns]', name='Timestamp', freq=None)\n", "Check after adding the missing timestamps:\n", " Lastgang\n", "2021-03-28 02:00:00 372.28\n", "2021-03-28 02:15:00 372.28\n", "2021-03-28 02:30:00 372.28\n", "2021-03-28 02:45:00 372.28\n", "2022-03-27 02:00:00 554.20\n", "2022-03-27 02:15:00 554.20\n", "2022-03-27 02:30:00 554.20\n", "2022-03-27 02:45:00 554.20\n", "2023-03-26 02:00:00 520.34\n", "2023-03-26 02:15:00 520.34\n", "2023-03-26 02:30:00 520.34\n", "2023-03-26 02:45:00 520.34\n", "Number of rows in dfClima21: 105120\n", "Number of rows in dfEnergyAll: 105120\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/tmp/ipykernel_1514653/1871761932.py:32: FutureWarning: DataFrame.fillna with 'method' is deprecated and will raise in a future version. Use obj.ffill() or obj.bfill() instead.\n", " dfEnergyAll = dfEnergyAll.sort_index().fillna(method='ffill')\n" ] } ], "source": [ "# Step 2 - Preparing the Data\n", "\n", "# Create a complete timestamp index for the years 2021 - 2023 in 15-minute intervals\n", "all_timestamps = pd.date_range(start='2021-01-01 00:00:00', end='2023-12-31 23:45:00', freq='15T')\n", "\n", "# Convert this into a DataFrame\n", "df_all_timestamps = pd.DataFrame(all_timestamps, columns=['Timestamp'])\n", "df_all_timestamps = df_all_timestamps.set_index('Timestamp')\n", "\n", "# Compare the complete timestamp index with dfEnergyAll\n", "missing_timestamps = df_all_timestamps.index.difference(dfEnergyAll.index)\n", "\n", "print(\"Missing timestamps in dfEnergyAll:\")\n", "print(missing_timestamps)\n", "\n", "\n", "# Missing timestamps\n", "missing_timestamps = pd.DatetimeIndex(['2021-03-28 02:00:00', '2021-03-28 02:15:00',\n", " '2021-03-28 02:30:00', '2021-03-28 02:45:00',\n", " '2022-03-27 02:00:00', '2022-03-27 02:15:00',\n", " '2022-03-27 02:30:00', '2022-03-27 02:45:00',\n", " '2023-03-26 02:00:00', '2023-03-26 02:15:00',\n", " '2023-03-26 02:30:00', '2023-03-26 02:45:00'])\n", "\n", "# Create a DataFrame with the missing timestamps\n", "df_missing = pd.DataFrame(index=missing_timestamps)\n", "\n", "# Merge this DataFrame with the original DataFrame\n", "dfEnergyAll = dfEnergyAll.combine_first(df_missing)\n", "\n", "# Fill the missing values. Use 'ffill' for forward fill.\n", "dfEnergyAll = dfEnergyAll.sort_index().fillna(method='ffill')\n", "\n", "print(\"Check after adding the missing timestamps:\")\n", "print(dfEnergyAll.loc[missing_timestamps])\n", "\n", "# Number of rows in dfClimaAll\n", "# Ensure dfClimaAll is already defined before executing this line.\n", "print(\"Number of rows in dfClimaAll:\", dfClimaAll.shape[0])\n", "\n", "# Number of rows in dfEnergyAll\n", "print(\"Number of rows in dfEnergyAll:\", dfEnergyAll.shape[0])" ] }, { "cell_type": "markdown", "id": "aff293e9-0b20-45bf-981e-5c952ddb4e52", "metadata": {}, "source": [ "## 1 - Transformer-Architecture with Energy consumption data and weather data" ] }, { "cell_type": "code", "execution_count": 24, "id": "5939df7d-254c-4aba-a087-fccc490a0ee5", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Number of missing values per column:\n", "Lastgang 0\n", "StundenwertStrahlung 0\n", "Globalstrahlung_15Min 0\n", "StrahlungGeneigteFläche 0\n", "TheorPVProd 0\n", "Direktnormalstrahlung 0\n", "Schönwetterstrahlung 0\n", "Lufttemperatur 0\n", "dtype: int64\n", "First and last rows from dfEnergyAll-Edited:\n", " Lastgang StundenwertStrahlung Globalstrahlung_15Min \\\n", "2021-01-01 00:00:00 472.88 0 0 \n", "2021-01-01 00:15:00 498.83 0 0 \n", "2021-01-01 00:30:00 480.48 0 0 \n", "2021-01-01 00:45:00 446.74 0 0 \n", "2021-01-01 01:00:00 459.55 0 0 \n", "\n", " StrahlungGeneigteFläche TheorPVProd \\\n", "2021-01-01 00:00:00 0 0 \n", "2021-01-01 00:15:00 0 0 \n", "2021-01-01 00:30:00 0 0 \n", "2021-01-01 00:45:00 0 0 \n", "2021-01-01 01:00:00 0 0 \n", "\n", " Direktnormalstrahlung Schönwetterstrahlung \\\n", "2021-01-01 00:00:00 0 0 \n", "2021-01-01 00:15:00 0 0 \n", "2021-01-01 00:30:00 0 0 \n", "2021-01-01 00:45:00 0 0 \n", "2021-01-01 01:00:00 0 0 \n", "\n", " Lufttemperatur \n", "2021-01-01 00:00:00 2.5 \n", "2021-01-01 00:15:00 2.5 \n", "2021-01-01 00:30:00 2.5 \n", "2021-01-01 00:45:00 2.5 \n", "2021-01-01 01:00:00 2.4 \n", " Lastgang StundenwertStrahlung Globalstrahlung_15Min \\\n", "2023-12-31 22:45:00 833.17 0 0 \n", "2023-12-31 23:00:00 767.89 0 0 \n", "2023-12-31 23:15:00 770.04 0 0 \n", "2023-12-31 23:30:00 730.45 0 0 \n", "2023-12-31 23:45:00 744.21 0 0 \n", "\n", " StrahlungGeneigteFläche TheorPVProd \\\n", "2023-12-31 22:45:00 0 0 \n", "2023-12-31 23:00:00 0 0 \n", "2023-12-31 23:15:00 0 0 \n", "2023-12-31 23:30:00 0 0 \n", "2023-12-31 23:45:00 0 0 \n", "\n", " Direktnormalstrahlung Schönwetterstrahlung \\\n", "2023-12-31 22:45:00 0 0 \n", "2023-12-31 23:00:00 0 0 \n", "2023-12-31 23:15:00 0 0 \n", "2023-12-31 23:30:00 0 0 \n", "2023-12-31 23:45:00 0 0 \n", "\n", " Lufttemperatur \n", "2023-12-31 22:45:00 5.5 \n", "2023-12-31 23:00:00 5.5 \n", "2023-12-31 23:15:00 5.5 \n", "2023-12-31 23:30:00 5.5 \n", "2023-12-31 23:45:00 4.9 \n" ] } ], "source": [ "# Step 2 - Preparing the Data\n", "\n", "# To check if the indexes in both DataFrames are set equally\n", "dfClimaAll.index.equals(dfEnergyAll.index)\n", "\n", "# Adding the columns from dfClima21 to dfEnergyAll\n", "dfEnergyAll = dfEnergyAll.join(dfClimaAll[['StundenwertStrahlung', 'Globalstrahlung_15Min', 'StrahlungGeneigteFläche', 'TheorPVProd', 'Direktnormalstrahlung', 'Schönwetterstrahlung', 'Lufttemperatur']])\n", "\n", "# Checking for missing values in each column\n", "print(\"Number of missing values per column:\")\n", "print(dfEnergyAll.isnull().sum())\n", "\n", "# Printing the first and last rows from the dataframe\n", "print(\"First and last rows from dfEnergyAll-Edited:\")\n", "print(dfEnergyAll.head())\n", "print(dfEnergyAll.tail())" ] }, { "cell_type": "code", "execution_count": 25, "id": "d74e0399-9d4c-4aad-aeb1-b615170f8c5c", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " Lastgang StundenwertStrahlung Globalstrahlung_15Min \\\n", "2021-01-01 00:00:00 472.88 0 0 \n", "2021-01-01 00:15:00 498.83 0 0 \n", "2021-01-01 00:30:00 480.48 0 0 \n", "2021-01-01 00:45:00 446.74 0 0 \n", "2021-01-01 01:00:00 459.55 0 0 \n", "\n", " StrahlungGeneigteFläche TheorPVProd \\\n", "2021-01-01 00:00:00 0 0 \n", "2021-01-01 00:15:00 0 0 \n", "2021-01-01 00:30:00 0 0 \n", "2021-01-01 00:45:00 0 0 \n", "2021-01-01 01:00:00 0 0 \n", "\n", " Direktnormalstrahlung Schönwetterstrahlung \\\n", "2021-01-01 00:00:00 0 0 \n", "2021-01-01 00:15:00 0 0 \n", "2021-01-01 00:30:00 0 0 \n", "2021-01-01 00:45:00 0 0 \n", "2021-01-01 01:00:00 0 0 \n", "\n", " Lufttemperatur Lastgang_Moving_Average \\\n", "2021-01-01 00:00:00 2.5 549.123854 \n", "2021-01-01 00:15:00 2.5 549.123854 \n", "2021-01-01 00:30:00 2.5 549.123854 \n", "2021-01-01 00:45:00 2.5 549.123854 \n", "2021-01-01 01:00:00 2.4 549.123854 \n", "\n", " Lastgang_First_Difference \n", "2021-01-01 00:00:00 25.95 \n", "2021-01-01 00:15:00 25.95 \n", "2021-01-01 00:30:00 -18.35 \n", "2021-01-01 00:45:00 -33.74 \n", "2021-01-01 01:00:00 12.81 \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/tmp/ipykernel_1514653/1800522370.py:11: FutureWarning: DataFrame.fillna with 'method' is deprecated and will raise in a future version. Use obj.ffill() or obj.bfill() instead.\n", " dfEnergyAll = dfEnergyAll.fillna(method='bfill')\n" ] } ], "source": [ "# Step 2 - Preparing the Data\n", "\n", "# Calculating the moving average for 'Lastgang'\n", "window_size = 96 # This corresponds to 24 hours at 15-minute intervals\n", "dfEnergyAll['Lastgang_Moving_Average'] = dfEnergyAll['Lastgang'].rolling(window=window_size).mean()\n", "\n", "# Calculating the first differences for 'Lastgang'\n", "dfEnergyAll['Lastgang_First_Difference'] = dfEnergyAll['Lastgang'].diff()\n", "\n", "# Since the first `window_size - 1` values of the moving average will be NaN and the first value of the first difference will be NaN, fill these values.\n", "dfEnergyAll = dfEnergyAll.fillna(method='bfill')\n", "\n", "# To check the first few rows to ensure the new features look as expected.\n", "print(dfEnergyAll.head())" ] }, { "cell_type": "code", "execution_count": 26, "id": "c513ae39-9901-490c-939b-c2061b85d5d9", "metadata": {}, "outputs": [], "source": [ "# Step 3 - Data Scaling\n", "\n", "# Initialize the scalers\n", "lastgang_scaler = MinMaxScaler(feature_range=(0, 1))\n", "features_scaler = MinMaxScaler(feature_range=(0, 1))\n", "\n", "# Apply the scaler to the corresponding columns\n", "dfEnergyAll['Lastgang'] = lastgang_scaler.fit_transform(dfEnergyAll['Lastgang'].values.reshape(-1, 1))\n", "dfEnergyAll[['StundenwertStrahlung', 'Lufttemperatur', 'Globalstrahlung_15Min', 'StrahlungGeneigteFläche', 'Lastgang_Moving_Average', 'Lastgang_First_Difference']] = features_scaler.fit_transform(dfEnergyAll[['StundenwertStrahlung', 'Lufttemperatur', 'Globalstrahlung_15Min', 'StrahlungGeneigteFläche', 'Lastgang_Moving_Average', 'Lastgang_First_Difference']])" ] }, { "cell_type": "code", "execution_count": 27, "id": "f98d7e5d-6358-4ddf-a1e0-907e2c133c40", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " Lastgang StundenwertStrahlung Globalstrahlung_15Min \\\n", "count 105120.000000 105120.000000 105120.000000 \n", "mean 0.425947 0.561652 0.125328 \n", "std 0.149590 0.111328 0.197261 \n", "min 0.000000 0.000000 0.000000 \n", "25% 0.312299 0.490186 0.000000 \n", "50% 0.397657 0.492149 0.000861 \n", "75% 0.527101 0.593719 0.182444 \n", "max 1.000000 1.000000 1.000000 \n", "\n", " StrahlungGeneigteFläche TheorPVProd Direktnormalstrahlung \\\n", "count 105120.000000 105120.000000 105120.000000 \n", "mean 0.125328 116.568056 144.916924 \n", "std 0.197261 177.407501 266.941484 \n", "min 0.000000 0.000000 0.000000 \n", "25% 0.000000 0.000000 0.000000 \n", "50% 0.000861 1.000000 0.000000 \n", "75% 0.182444 179.000000 132.000000 \n", "max 1.000000 799.000000 1010.000000 \n", "\n", " Schönwetterstrahlung Lufttemperatur Lastgang_Moving_Average \\\n", "count 105120.000000 105120.000000 105120.000000 \n", "mean 220.075295 0.487743 0.468536 \n", "std 296.754964 0.169107 0.234477 \n", "min 0.000000 0.000000 0.000000 \n", "25% 0.000000 0.359408 0.282593 \n", "50% 6.000000 0.482030 0.433883 \n", "75% 403.000000 0.606765 0.641900 \n", "max 1020.000000 1.000000 1.000000 \n", "\n", " Lastgang_First_Difference \n", "count 105120.000000 \n", "mean 0.504792 \n", "std 0.046065 \n", "min 0.000000 \n", "25% 0.476896 \n", "50% 0.504275 \n", "75% 0.532252 \n", "max 1.000000 \n", "Skalierparameter für Lastgang: [0.00068067] [-0.03440131]\n" ] } ], "source": [ "# Step 3 - Check the scaled values\n", "\n", "print(dfEnergyAll.describe())\n", "\n", "print(\"Skalierparameter für Lastgang:\", lastgang_scaler.scale_, lastgang_scaler.min_)" ] }, { "cell_type": "code", "execution_count": 28, "id": "5a0dcfe5-3d9a-40e1-834e-468d8c47864e", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "GPU before model training: 2024-05-03 17:23:54\n", "GPU: NVIDIA A100 80GB PCIe, GPU RAM Free: 15712.0MB, Used: 65326.0MB, Utilization: 4.0%\n", "GPU: NVIDIA A100 80GB PCIe, GPU RAM Free: 236.0MB, Used: 80803.0MB, Utilization: 100.0%\n", "CPU before model training:\n", "CPU Utilization: 82.1%\n", "RAM before model training:\n", "Total memory: 146.88 GB\n", "Available memory: 110.48 GB\n", "Used memory: 34.61 GB\n", "Memory usage: 24.8%\n" ] } ], "source": [ "# Step 3 - Show GPU Utilization and Memory Usage before Model Training\n", "\n", "def print_gpu_utilization():\n", " # Retrieve and print GPU utilization and memory stats\n", " GPUs = GPUtil.getGPUs()\n", " for GPU in GPUs:\n", " print(f\"GPU: {GPU.name}, GPU RAM Free: {GPU.memoryFree}MB, Used: {GPU.memoryUsed}MB, Utilization: {GPU.load*100}%\")\n", "\n", "print(\"GPU before model training:\", datetime.now().strftime(\"%Y-%m-%d %H:%M:%S\"))\n", "print_gpu_utilization()\n", "\n", "# Function to print CPU utilization\n", "def print_cpu_utilization():\n", " # Retrieve and print the percentage of CPU utilization\n", " print(f\"CPU Utilization: {psutil.cpu_percent(interval=1)}%\")\n", "\n", "print(\"CPU before model training:\")\n", "print_cpu_utilization()\n", "\n", "def print_memory_usage():\n", " # Retrieve and print memory statistics\n", " memory = psutil.virtual_memory()\n", " print(f\"Total memory: {memory.total / (1024**3):.2f} GB\")\n", " print(f\"Available memory: {memory.available / (1024**3):.2f} GB\")\n", " print(f\"Used memory: {memory.used / (1024**3):.2f} GB\")\n", " print(f\"Memory usage: {memory.percent}%\")\n", "\n", "print(\"RAM before model training:\")\n", "print_memory_usage()\n" ] }, { "cell_type": "code", "execution_count": 29, "id": "0d90e033-70b7-4d3d-b038-315db7f388a9", "metadata": {}, "outputs": [], "source": [ "# Step 4 - Creating the sequences, positional encoding and the LSTM model\n", "\n", "def create_sequences(data, seq_length):\n", " xs = []\n", " ys = []\n", " for i in range(len(data) - seq_length):\n", " x = data.iloc[i:(i + seq_length)].to_numpy() # Extract sequence from data\n", " y = data.iloc[i + seq_length, 0] # Extract target value for the sequence\n", " xs.append(x)\n", " ys.append(y)\n", " return np.array(xs), np.array(ys) # Convert lists to numpy arrays\n", "\n", "seq_length = 192 # Length of each sequence\n", "X, y = create_sequences(dfEnergyAll, seq_length) # Create sequences from data\n", "\n", "# Use the last 192 sequences for testing to maintain a comparable distribution as the TBATS model\n", "X_train, X_test = X[:-192], X[-192:] # Split data into training and testing sets\n", "y_train, y_test = y[:-192], y[-192:] # Split targets into training and testing sets\n", "\n", "\n", "# Function for positional encoding\n", "def positional_encoding(seq_len, d_model):\n", " encoding = np.array([\n", " [pos / np.power(10000, 2 * (j // 2) / d_model) for j in range(d_model)]\n", " if pos != 0 else np.zeros(d_model)\n", " for pos in range(seq_len)\n", " ])\n", " encoding[1:, 0::2] = np.sin(encoding[1:, 0::2]) # Apply sin to even indices\n", " encoding[1:, 1::2] = np.cos(encoding[1:, 1::2]) # Apply cos to odd indices\n", " return encoding\n", "\n", "# Apply positional encodings\n", "d_model = X_train.shape[2] # Number of features in the data\n", "pos_enc = positional_encoding(seq_length, d_model) # Generate positional encoding\n", "\n", "# Add positional encodings to the sequences\n", "X_train_enc = np.array([x + pos_enc for x in X_train]) # Add encoding to training data\n", "X_test_enc = np.array([x + pos_enc for x in X_test]) # Add encoding to testing data" ] }, { "cell_type": "code", "execution_count": 30, "id": "e0e131ce-7193-42fb-94dc-f2bd0876aa2b", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "X_train Shape: (104736, 192, 10)\n", "X_test Shape: (192, 192, 10)\n", "X_train_enc Shape: (104736, 192, 10)\n", "X_test_enc Shape: (192, 192, 10)\n" ] } ], "source": [ "# Step 4 - Creating the sequences and the LSTM model\n", "\n", "# Print the shapes of the training and testing datasets\n", "print(f\"X_train Shape: {X_train.shape}\")\n", "print(f\"X_test Shape: {X_test.shape}\")\n", "\n", "# Print the shapes of the training and testing datasets with positional encodings applied\n", "print(f\"X_train_enc Shape: {X_train_enc.shape}\")\n", "print(f\"X_test_enc Shape: {X_test_enc.shape}\")" ] }, { "cell_type": "code", "execution_count": 31, "id": "69131796-20f7-4e93-a091-5fa9dfed0237", "metadata": {}, "outputs": [], "source": [ "# Step 4 - Creating the sequences and the LSTM model\n", "\n", "# Check Hardware Availability for the PyTorch code\n", "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')" ] }, { "cell_type": "code", "execution_count": 32, "id": "514b9884-3534-4263-b95f-e830396947aa", "metadata": {}, "outputs": [], "source": [ "# Step 4 - Creating the sequences and the LSTM model\n", "\n", "X_train_tensors = torch.Tensor(X_train_enc).to(device) # Convert training data to tensors and move to the designated device (GPU or CPU)\n", "y_train_tensors = torch.Tensor(y_train).view(-1, 1).to(device) # Convert training labels to tensors, reshape them and move to the device\n", "X_test_tensors = torch.Tensor(X_test_enc).to(device) # Convert testing data to tensors and move to the designated device\n", "y_test_tensors = torch.Tensor(y_test).view(-1, 1).to(device) # Convert testing labels to tensors, reshape them and move to the device" ] }, { "cell_type": "code", "execution_count": 33, "id": "e84d8190-9f4f-4e97-94e5-6587d3efa9a1", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/home/sarah/anaconda3/envs/BT2024PyTorch/lib/python3.10/site-packages/torch/nn/modules/transformer.py:286: UserWarning: enable_nested_tensor is True, but self.use_nested_tensor is False because encoder_layer.self_attn.batch_first was not True(use batch_first for better inference performance)\n", " warnings.warn(f\"enable_nested_tensor is True, but self.use_nested_tensor is False because {why_not_sparsity_fast_path}\")\n" ] } ], "source": [ "# Step 4 - Creating the sequences and the LSTM model\n", "\n", "#Create Transformer Model\n", "import torch.nn as nn\n", "\n", "class TransformerModel(nn.Module):\n", " def __init__(self, input_size, hidden_layer_size, output_size, num_layers, nhead):\n", " super(TransformerModel, self).__init__()\n", " self.input_size = input_size # Number of input features\n", " self.hidden_layer_size = hidden_layer_size # Size of the hidden layer\n", " self.output_size = output_size # Size of the output layer\n", "\n", " # Embedding layer that maps input features to the hidden layer size\n", " self.embedding = nn.Linear(input_size, hidden_layer_size)\n", "\n", " # Transformer Encoder\n", " encoder_layers = nn.TransformerEncoderLayer(d_model=hidden_layer_size, nhead=nhead)\n", " self.transformer_encoder = nn.TransformerEncoder(encoder_layers, num_layers=num_layers)\n", "\n", " # Linear layer for output\n", " self.linear = nn.Linear(hidden_layer_size, output_size)\n", "\n", " def forward(self, src):\n", " # Embedding\n", " src = self.embedding(src) # Apply embedding layer to source data\n", "\n", " # Transformer Encoder\n", " src = src.permute(1, 0, 2) # Permute the dimensions of src for the Transformer (expects [seq_len, batch, features])\n", " output = self.transformer_encoder(src) # Pass the embedded src through the transformer encoder\n", "\n", " # Only take the output from the last time step for making predictions\n", " output = output[-1]\n", "\n", " # Linear layer\n", " output = self.linear(output) # Apply linear layer to the output of the transformer\n", " return output\n", "\n", "# Parameters\n", "input_size = 10 # Number of features\n", "hidden_layer_size = 100 # Size of the hidden layer\n", "output_size = 1 # Output size\n", "num_layers = 2 # Number of layers in the transformer\n", "nhead = 2 # Number of attention heads\n", "\n", "# Instantiate the model\n", "model = TransformerModel(input_size, hidden_layer_size, output_size, num_layers, nhead).to(device) # Create the model and move it to the designated device" ] }, { "cell_type": "markdown", "id": "d936195f-89d1-48e4-9acb-4d63cd825590", "metadata": {}, "source": [ "from torchsummary import summary\n", "\n", "# Angabe der Eingabedimension für torchsummary\n", "seq_length = 192 # Sequenzlänge, wie im Modell verwendet\n", "summary(model, input_size=(seq_length, input_size))" ] }, { "cell_type": "code", "execution_count": 34, "id": "5ba77573-4e7b-45d1-8a98-d0c25deed352", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/home/sarah/anaconda3/envs/BT2024PyTorch/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", " from .autonotebook import tqdm as notebook_tqdm\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "New best model saved at epoch 1 with test loss 0.03984663731534965.\n", "Epoch 1, Train Loss: 0.013731314026245361, Test Loss: 0.03984663731534965\n", "New best model saved at epoch 2 with test loss 0.016820339420519304.\n", "Epoch 2, Train Loss: 0.005286569327589577, Test Loss: 0.016820339420519304\n", "Epoch 3, Train Loss: 0.006792317569947825, Test Loss: 0.017148404789622873\n", "Epoch 4, Train Loss: 0.004567338423792936, Test Loss: 0.02858270751312375\n", "Epoch 5, Train Loss: 0.004809053334021856, Test Loss: 0.017867928676423617\n", "Epoch 6, Train Loss: 0.004574279051601939, Test Loss: 0.04403836881829193\n", "Epoch 7, Train Loss: 0.0048564385962972505, Test Loss: 0.024106490673148073\n", "Early Stopping after 7 epochs!\n" ] } ], "source": [ "# Step 5 - Model Training\n", "\n", "# Define the number of epochs and batch size\n", "loss_function = nn.MSELoss() # Loss function for measuring the mean squared error loss\n", "optimizer = torch.optim.Adam(model.parameters(), lr=0.001) # Optimizer with a learning rate of 0.001\n", "epochs = 70 # Total number of epochs to train the model\n", "batch_size = 12 # Number of samples per batch\n", "\n", "# For storing the loss values\n", "train_losses = [] # List to store training loss values for each epoch\n", "test_losses = [] # List to store validation loss values for each epoch\n", "\n", "# Initialize variables for Early Stopping\n", "best_test_loss = float('inf') # Best validation loss seen so far, initialized to infinity\n", "best_model_path = 'best_model.pth' # Path to save the best performing model\n", "early_stopping_patience = 5 # Number of epochs to continue without improvement before stopping\n", "epochs_without_improvement = 0 # Counter to track epochs without improvement\n", "\n", "for epoch in range(epochs): # Loop over each epoch\n", " model.train() # Set the model to training mode\n", " train_loss = 0.0 # Reset training loss for this epoch\n", " for i in range(0, len(X_train_tensors), batch_size): # Iterate over the training data in batches\n", " X_batch = X_train_tensors[i:i + batch_size] # Get the current batch of input data\n", " y_batch = y_train_tensors[i:i + batch_size] # Get the current batch of target data\n", "\n", " # Dynamic adjustment of the batch size for the last batch of the epoch\n", " actual_batch_size = X_batch.size(0)\n", "\n", " optimizer.zero_grad() # Reset the gradients to zero\n", "\n", " # No need to manage hidden states for the Transformer model\n", " y_pred = model(X_batch) # Forward pass through the model\n", "\n", " loss = loss_function(y_pred, y_batch) # Calculate loss\n", " loss.backward() # Backpropagation\n", " optimizer.step() # Update model weights\n", " \n", " train_loss += loss.item() * actual_batch_size # Accumulate the loss\n", "\n", " # Calculate the average training loss for this epoch\n", " train_loss /= len(X_train_tensors)\n", " train_losses.append(train_loss)\n", "\n", " # Validation phase\n", " model.eval() # Set the model to evaluation mode\n", " test_loss = 0.0\n", " with torch.no_grad(): # No gradient calculations\n", " for i in range(0, len(X_test_tensors), batch_size):\n", " X_batch = X_test_tensors[i:i + batch_size]\n", " y_batch = y_test_tensors[i:i + batch_size]\n", "\n", " y_pred = model(X_batch) # Forward pass through the model for validation\n", " loss = loss_function(y_pred, y_batch) # Calculate loss\n", " test_loss += loss.item() * actual_batch_size # Accumulate the loss\n", " \n", " # Calculate the average validation loss for this epoch\n", " test_loss /= len(X_test_tensors)\n", " test_losses.append(test_loss)\n", "\n", " if test_loss < best_test_loss:\n", " best_test_loss = test_loss # Update the best test loss\n", " torch.save(model.state_dict(), best_model_path) # Save the best model\n", " epochs_without_improvement = 0 # Reset the improvement counter\n", " print(f'New best model saved at epoch {epoch+1} with test loss {test_loss}.')\n", " else:\n", " epochs_without_improvement += 1 # Increment the no-improvement counter\n", "\n", " print(f'Epoch {epoch + 1}, Train Loss: {train_loss}, Test Loss: {test_loss}')\n", "\n", " if epochs_without_improvement >= early_stopping_patience:\n", " print(f'Early Stopping after {epoch+1} epochs!') # Stop training if no improvement\n", " break" ] }, { "cell_type": "code", "execution_count": 35, "id": "2eb9a478-fe5e-4e77-9b67-4cd32b584b00", "metadata": {}, "outputs": [ { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Step 5 - Model Training\n", "\n", "#Printing the learning curve\n", "plt.plot(train_losses, label='Train Loss')\n", "plt.plot(test_losses, label='Test Loss')\n", "plt.xlabel('Epochs')\n", "plt.ylabel('Loss')\n", "plt.title('Learning Curve')\n", "plt.legend()\n", "plt.show()" ] }, { "cell_type": "markdown", "id": "55e3a217-8042-4676-a203-55e4441ee2cf", "metadata": {}, "source": [ "Learning Curve Chart: Die Lernkurve zeigt den Verlust von Trainings- und Testdaten über die Anzahl der Epochen. Der kontinuierliche Abwärtstrend beider Kurven deutet darauf hin, dass das Modell im Laufe der Zeit besser wird und keine Anzeichen von Überanpassung zeigt, da der Testverlust (orange Linie) dem Trainingsverlust (blaue Linie) folgt. Das Konvergieren beider Kurven ist ein gutes Zeichen dafür, dass das Modell generalisiert und nicht nur die Trainingsdaten auswendig lernt." ] }, { "cell_type": "code", "execution_count": 36, "id": "a22fa477-6ab6-4e77-827e-8bb770345c44", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Mean Squared Error (MSE): 36304.058488096474\n", "Mean Absolute Error (MAE): 162.9043857530535\n", "Root Mean Square Error (RMSE): 190.53623930396148\n", "Mean Absolute Percentage Error (MAPE): 22.51108406593072%\n" ] }, { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Step 6 - Rescaling, error metrics and visualization\n", "\n", "# Load the best model weights\n", "best_model_path = 'best_model.pth'\n", "model.load_state_dict(torch.load(best_model_path, map_location=device))\n", "\n", "# Evaluate the model\n", "model.eval()\n", "test_predictions = []\n", "with torch.no_grad(): # No gradients needed for inference\n", " for i in range(len(X_test_tensors)):\n", " X_batch = X_test_tensors[i].unsqueeze(0) # Add batch dimension\n", " y_pred = model(X_batch) # No hidden state needed for Transformer\n", " test_predictions.append(y_pred.squeeze().item()) # Remove batch dimension and convert to item\n", "\n", "# Denormalization of predictions and actual values\n", "test_predictions_denorm = lastgang_scaler.inverse_transform(np.array(test_predictions).reshape(-1, 1))\n", "y_test_denorm = lastgang_scaler.inverse_transform(y_test.reshape(-1, 1))\n", "\n", "# Calculation of error metrics\n", "mse = mean_squared_error(y_test_denorm, test_predictions_denorm) # Mean Squared Error\n", "mae = mean_absolute_error(y_test_denorm, test_predictions_denorm) # Mean Absolute Error\n", "rmse = np.sqrt(mse) # Root Mean Square Error\n", "mape = np.mean(np.abs((y_test_denorm - test_predictions_denorm) / y_test_denorm)) * 100 # Mean Absolute Percentage Error\n", "\n", "print(f\"Mean Squared Error (MSE): {mse}\")\n", "print(f\"Mean Absolute Error (MAE): {mae}\")\n", "print(f\"Root Mean Square Error (RMSE): {rmse}\")\n", "print(f\"Mean Absolute Percentage Error (MAPE): {mape}%\")\n", "\n", "# Visualization of actual vs predicted values\n", "plt.figure(figsize=(12, 6))\n", "test_dates = pd.date_range(start=dfEnergyAll.index[-len(X_test):][0], periods=len(y_test), freq='15T')\n", "plt.plot(test_dates, y_test_denorm, label='Actual Values', color='#3E7A6F')\n", "plt.plot(test_dates, test_predictions_denorm, label='Predicted Values', color='#7DFFE7')\n", "plt.legend()\n", "plt.title('Comparison of Actual and Predicted Values: Transformer over 48 Hours')\n", "plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d %H:%M')) # Format x-axis for dates\n", "plt.gca().xaxis.set_major_locator(mdates.HourLocator(interval=3)) # Set major tick intervals\n", "plt.gcf().autofmt_xdate() # Auto-format date labels for better readability\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 37, "id": "a467f613-a988-4cff-8b07-6c24a18061ce", "metadata": {}, "outputs": [], "source": [ "# Step 6 - Rescaling, error metrics and visualization\n", "\n", "# Saving the model for later\n", "torch.save(model.state_dict(), 'transformer_model.pth')" ] }, { "cell_type": "code", "execution_count": 38, "id": "ec2ae0f8-ea84-46c0-90ce-4961b21cb9b3", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "GPU after model training: 2024-05-03 17:30:49\n", "GPU: NVIDIA A100 80GB PCIe, GPU RAM Free: 14824.0MB, Used: 66214.0MB, Utilization: 24.0%\n", "GPU: NVIDIA A100 80GB PCIe, GPU RAM Free: 236.0MB, Used: 80803.0MB, Utilization: 55.00000000000001%\n", "CPU after model training:\n", "CPU Utilization: 81.2%\n", "RAM after model training:\n", "Total memory: 146.88 GB\n", "Available memory: 110.39 GB\n", "Used memory: 34.70 GB\n", "Memory usage: 24.8%\n" ] } ], "source": [ "# Step 7 - Show GPU Utilization and Memory Usage after Model Training\n", "\n", "print(\"GPU after model training:\", datetime.now().strftime(\"%Y-%m-%d %H:%M:%S\")) # Print GPU utilization after model training\n", "print_gpu_utilization() # Call function to print GPU utilization\n", "\n", "print(\"CPU after model training:\") # Print CPU utilization after model training\n", "print_cpu_utilization() # Call function to print CPU utilization\n", "\n", "print(\"RAM after model training:\") # Print RAM usage after model training\n", "print_memory_usage() # Call function to print RAM usage" ] }, { "cell_type": "markdown", "id": "dcd59d0a-ef66-4e51-886a-ddfb2f42025f", "metadata": {}, "source": [ "## 2 - Transformer-Architecture with Energy consumption data and 2 variables 'Lastgang_Moving_Average' and 'Lastgang_First_Difference'" ] }, { "cell_type": "code", "execution_count": null, "id": "04646c37-be07-45ad-98ad-9b2851d89658", "metadata": {}, "outputs": [], "source": [ "# Step 3 - Data Scaling\n", "\n", "# Initialize the scalers\n", "lastgang_scaler = MinMaxScaler(feature_range=(0, 1)) # Scaler for the target variable\n", "features_scaler = MinMaxScaler(feature_range=(0, 1)) # Scaler for the feature variables\n", "\n", "# Apply the scaler to the appropriate columns\n", "dfEnergyAll['Lastgang'] = lastgang_scaler.fit_transform(dfEnergyAll['Lastgang'].values.reshape(-1, 1)) # Scale the main target variable\n", "dfEnergyAll[['Lastgang_Moving_Average', 'Lastgang_First_Difference']] = features_scaler.fit_transform(dfEnergyAll[['Lastgang_Moving_Average', 'Lastgang_First_Difference']]) # Scale selected features\n" ] }, { "cell_type": "code", "execution_count": null, "id": "86d884b0-1b5e-4041-bcf6-c0cbe4252f0e", "metadata": {}, "outputs": [], "source": [ "# Step 4 - Creating the sequences, positional encoding and the LSTM model\n", "\n", "# Step 2 - Data Preparation: Create sequences and add positional encoding\n", "def create_sequences(data, seq_length):\n", " xs = [] # list to store sequences\n", " ys = [] # list to store target values corresponding to each sequence\n", " for i in range(len(data) - seq_length):\n", " x = data.iloc[i:(i + seq_length)].to_numpy() # extract sequence of length seq_length from data\n", " y = data.iloc[i + seq_length, 0] # target value at the end of the sequence\n", " xs.append(x)\n", " ys.append(y)\n", " return np.array(xs), np.array(ys) # convert lists to numpy arrays for machine learning processing\n", "\n", "seq_length = 192 # define the length of sequences\n", "X, y = create_sequences(dfEnergyAll, seq_length) # create sequences and corresponding targets from the dataset\n", "\n", "# Use the last 192 sequences for testing to maintain a comparable distribution as the TBATS model\n", "X_train, X_test = X[:-192], X[-192:] # split data into training and testing sets\n", "y_train, y_test = y[:-192], y[-192:] # split targets into training and testing sets\n", "\n", "# Function for positional encoding\n", "def positional_encoding(seq_len, d_model):\n", " encoding = np.array([\n", " [pos / np.power(10000, 2 * (j // 2) / d_model) for j in range(d_model)]\n", " if pos != 0 else np.zeros(d_model) \n", " for pos in range(seq_len)\n", " ])\n", " encoding[1:, 0::2] = np.sin(encoding[1:, 0::2]) # apply sine to even indices\n", " encoding[1:, 1::2] = np.cos(encoding[1:, 1::2]) # apply cosine to odd indices\n", " return encoding\n", "\n", "# Apply positional encodings\n", "d_model = X_train.shape[2] # number of features in the data\n", "pos_enc = positional_encoding(seq_length, d_model) # generate positional encoding based on sequence length and number of features\n", "\n", "# Add positional encodings to the sequences\n", "X_train_enc = np.array([x + pos_enc for x in X_train]) # add positional encoding to training data\n", "X_test_enc = np.array([x + pos_enc for x in X_test]) # add positional encoding to testing data" ] }, { "cell_type": "code", "execution_count": null, "id": "d48f7903-acde-4110-a95d-7291ef66848f", "metadata": {}, "outputs": [], "source": [ "# Step 4 - Creating the sequences, positional encoding and the LSTM model\n", "\n", "# Print the shapes of the training and testing datasets\n", "print(f\"X_train Shape: {X_train.shape}\")\n", "print(f\"X_test Shape: {X_test.shape}\")\n", "\n", "# Print the shapes of the training and testing datasets with positional encodings applied\n", "print(f\"X_train_enc Shape: {X_train_enc.shape}\")\n", "print(f\"X_test_enc Shape: {X_test_enc.shape}\")" ] }, { "cell_type": "code", "execution_count": null, "id": "9522a46a-5d2b-462d-8841-57163be8dff2", "metadata": {}, "outputs": [], "source": [ "# Step 4 - Creating the sequences and the LSTM model\n", "\n", "# Check Hardware Availability for the PyTorch code\n", "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')" ] }, { "cell_type": "code", "execution_count": null, "id": "edc7888b-ee3d-40b0-9106-30113640a475", "metadata": {}, "outputs": [], "source": [ "# Step 4 - Creating the sequences and the LSTM model\n", "\n", "X_train_tensors = torch.Tensor(X_train_enc).to(device) # Convert training data to tensors and move to the designated device (GPU or CPU)\n", "y_train_tensors = torch.Tensor(y_train).view(-1, 1).to(device) # Convert training labels to tensors, reshape them and move to the device\n", "X_test_tensors = torch.Tensor(X_test_enc).to(device) # Convert testing data to tensors and move to the designated device\n", "y_test_tensors = torch.Tensor(y_test).view(-1, 1).to(device) # Convert testing labels to tensors, reshape them and move to the device" ] }, { "cell_type": "code", "execution_count": null, "id": "d8941331-bb15-43d3-aa83-938e47728b9d", "metadata": {}, "outputs": [], "source": [ "# Step 4 - Creating the sequences and the LSTM model\n", "\n", "#Create Transformer Model\n", "class TransformerModel(nn.Module):\n", " def __init__(self, input_size, hidden_layer_size, output_size, num_layers, nhead):\n", " super(TransformerModel, self).__init__()\n", " self.input_size = input_size # Number of input features\n", " self.hidden_layer_size = hidden_layer_size # Size of the hidden layer\n", " self.output_size = output_size # Size of the output layer\n", "\n", " # Embedding layer that maps input features to the hidden layer size\n", " self.embedding = nn.Linear(input_size, hidden_layer_size)\n", "\n", " # Transformer Encoder\n", " encoder_layers = nn.TransformerEncoderLayer(d_model=hidden_layer_size, nhead=nhead)\n", " self.transformer_encoder = nn.TransformerEncoder(encoder_layers, num_layers=num_layers)\n", "\n", " # Linear layer for output\n", " self.linear = nn.Linear(hidden_layer_size, output_size)\n", "\n", " def forward(self, src):\n", " # Apply embedding to the source data\n", " src = self.embedding(src)\n", "\n", " # Transformer Encoder processes the data\n", " src = src.permute(1, 0, 2) # Transformer expects [seq_len, batch, features]\n", " output = self.transformer_encoder(src)\n", "\n", " # Take only the output of the last time step for prediction\n", " output = output[-1]\n", "\n", " # Output through linear layer\n", " output = self.linear(output)\n", " return output\n", "\n", "# Parameters\n", "input_size = 3 # Number of input features\n", "hidden_layer_size = 100 # Size of the hidden layer\n", "output_size = 1 # Output size\n", "num_layers = 2 # Number of layers in the transformer\n", "nhead = 2 # Number of attention heads\n", "\n", "# Instantiate the model\n", "model = TransformerModel(input_size, hidden_layer_size, output_size, num_layers, nhead).to(device) # Create the model and move it to the designated device" ] }, { "cell_type": "code", "execution_count": null, "id": "b331b915-ca73-4a0d-93cd-4c59bf6cc766", "metadata": {}, "outputs": [], "source": [ "# Step 5 - Model Training\n", "\n", "# Define the number of epochs and batch size\n", "loss_function = nn.MSELoss() # Loss function for measuring the mean squared error loss\n", "optimizer = torch.optim.Adam(model.parameters(), lr=0.001) # Optimizer with a learning rate of 0.001\n", "epochs = 70 # Total number of epochs to train the model\n", "batch_size = 12 # Number of samples per batch\n", "\n", "# For storing the loss values\n", "train_losses = [] # List to store training loss values for each epoch\n", "test_losses = [] # List to store validation loss values for each epoch\n", "\n", "# Initialize variables for Early Stopping\n", "best_test_loss = float('inf') # Best validation loss seen so far, initialized to infinity\n", "best_model_path = 'best_model.pth' # Path to save the best performing model\n", "early_stopping_patience = 5 # Number of epochs to continue without improvement before stopping\n", "epochs_without_improvement = 0 # Counter to track epochs without improvement\n", "\n", "for epoch in range(epochs): # Loop over each epoch\n", " model.train() # Set the model to training mode\n", " train_loss = 0.0 # Reset training loss for this epoch\n", " for i in range(0, len(X_train_tensors), batch_size): # Iterate over the training data in batches\n", " X_batch = X_train_tensors[i:i + batch_size] # Get the current batch of input data\n", " y_batch = y_train_tensors[i:i + batch_size] # Get the current batch of target data\n", "\n", " # Dynamic adjustment of the batch size for the last batch of the epoch\n", " actual_batch_size = X_batch.size(0)\n", "\n", " optimizer.zero_grad() # Reset the gradients to zero\n", "\n", " # No need to manage hidden states for the Transformer model\n", " y_pred = model(X_batch) # Forward pass through the model\n", "\n", " loss = loss_function(y_pred, y_batch) # Calculate loss\n", " loss.backward() # Backpropagation\n", " optimizer.step() # Update model weights\n", " \n", " train_loss += loss.item() * actual_batch_size # Accumulate the loss\n", "\n", " # Calculate the average training loss for this epoch\n", " train_loss /= len(X_train_tensors)\n", " train_losses.append(train_loss)\n", "\n", " # Validation phase\n", " model.eval() # Set the model to evaluation mode\n", " test_loss = 0.0\n", " with torch.no_grad(): # No gradient calculations\n", " for i in range(0, len(X_test_tensors), batch_size):\n", " X_batch = X_test_tensors[i:i + batch_size]\n", " y_batch = y_test_tensors[i:i + batch_size]\n", "\n", " y_pred = model(X_batch) # Forward pass through the model for validation\n", " loss = loss_function(y_pred, y_batch) # Calculate loss\n", " test_loss += loss.item() * actual_batch_size # Accumulate the loss\n", " \n", " # Calculate the average validation loss for this epoch\n", " test_loss /= len(X_test_tensors)\n", " test_losses.append(test_loss)\n", "\n", " if test_loss < best_test_loss:\n", " best_test_loss = test_loss # Update the best test loss\n", " torch.save(model.state_dict(), best_model_path) # Save the best model\n", " epochs_without_improvement = 0 # Reset the improvement counter\n", " print(f'New best model saved at epoch {epoch+1} with test loss {test_loss}.')\n", " else:\n", " epochs_without_improvement += 1 # Increment the no-improvement counter\n", "\n", " print(f'Epoch {epoch + 1}, Train Loss: {train_loss}, Test Loss: {test_loss}')\n", "\n", " if epochs_without_improvement >= early_stopping_patience:\n", " print(f'Early Stopping after {epoch+1} epochs!') # Stop training if no improvement\n", " break" ] }, { "cell_type": "code", "execution_count": null, "id": "df8b91d4-cd5e-4359-9700-8f071cf4f5b4", "metadata": {}, "outputs": [], "source": [ "# Step 5 - Model Training\n", "\n", "#Printing the learning curve\n", "plt.plot(train_losses, label='Train Loss')\n", "plt.plot(test_losses, label='Test Loss')\n", "plt.xlabel('Epochs')\n", "plt.ylabel('Loss')\n", "plt.title('Learning Curve')\n", "plt.legend()\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": null, "id": "ddd6b5d3-6cff-4d55-8272-c56b43074fe1", "metadata": {}, "outputs": [], "source": [ "# Step 6 - Rescaling, error metrics and visualization\n", "\n", "# Load the best model weights\n", "best_model_path = 'best_model.pth'\n", "model.load_state_dict(torch.load(best_model_path, map_location=device))\n", "\n", "# Evaluate the model\n", "model.eval()\n", "test_predictions = []\n", "with torch.no_grad(): # No gradients needed for inference\n", " for i in range(len(X_test_tensors)):\n", " X_batch = X_test_tensors[i].unsqueeze(0) # Add batch dimension\n", " y_pred = model(X_batch) # No hidden state needed for Transformer\n", " test_predictions.append(y_pred.squeeze().item()) # Remove batch dimension and convert to item\n", "\n", "# Denormalization of predictions and actual values\n", "test_predictions_denorm = lastgang_scaler.inverse_transform(np.array(test_predictions).reshape(-1, 1))\n", "y_test_denorm = lastgang_scaler.inverse_transform(y_test.reshape(-1, 1))\n", "\n", "# Calculation of error metrics\n", "mse = mean_squared_error(y_test_denorm, test_predictions_denorm) # Mean Squared Error\n", "mae = mean_absolute_error(y_test_denorm, test_predictions_denorm) # Mean Absolute Error\n", "rmse = np.sqrt(mse) # Root Mean Square Error\n", "mape = np.mean(np.abs((y_test_denorm - test_predictions_denorm) / y_test_denorm)) * 100 # Mean Absolute Percentage Error\n", "\n", "print(f\"Mean Squared Error (MSE): {mse}\")\n", "print(f\"Mean Absolute Error (MAE): {mae}\")\n", "print(f\"Root Mean Square Error (RMSE): {rmse}\")\n", "print(f\"Mean Absolute Percentage Error (MAPE): {mape}%\")\n", "\n", "# Visualization of actual vs predicted values\n", "plt.figure(figsize=(12, 6))\n", "test_dates = pd.date_range(start=dfEnergyAll.index[-len(X_test):][0], periods=len(y_test), freq='15T')\n", "plt.plot(test_dates, y_test_denorm, label='Actual Values', color='#3E7A6F')\n", "plt.plot(test_dates, test_predictions_denorm, label='Predicted Values', color='#7DFFE7')\n", "plt.legend()\n", "plt.title('Comparison of Actual and Predicted Values: Transformer over 48 Hours')\n", "plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d %H:%M')) # Format x-axis for dates\n", "plt.gca().xaxis.set_major_locator(mdates.HourLocator(interval=3)) # Set major tick intervals\n", "plt.gcf().autofmt_xdate() # Auto-format date labels for better readability\n", "plt.show()" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.8" } }, "nbformat": 4, "nbformat_minor": 5 }