{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## Crude Oil Price Forecasting" ] }, { "cell_type": "code", "execution_count": 194, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import scipy\n", "import torch\n", "import os\n", "import scipy\n", "import datetime\n", "import matplotlib.pyplot as plt" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Data Preparation" ] }, { "cell_type": "code", "execution_count": 206, "metadata": {}, "outputs": [], "source": [ "# Dated brent data\n", "dated_brent_data = pd.read_csv(os.path.join('data', \n", " 'dated_brent_allbate.csv')\n", " ).rename({'assessDate':'Date'}, axis=1\n", " ).drop(columns=['Unnamed: 0', \n", " 'isCorrected', \n", " 'modDate', \n", " 'symbol'])\n", "\n", "dated_c = dated_brent_data[dated_brent_data['bate'] == 'c'].drop(columns=['bate']).rename({'value':'Closing'}, axis=1)\n", "dated_h = dated_brent_data[dated_brent_data['bate'] == 'h'].drop(columns=['bate']).rename({'value':'High'}, axis=1)\n", "dated_l = dated_brent_data[dated_brent_data['bate'] == 'l'].drop(columns=['bate']).rename({'value':'Low'}, axis=1)\n", "\n", "dated_brent_data = dated_c.merge(dated_h, on=\"Date\").merge(dated_l, on=\"Date\")\n", "dated_brent_data['Date'] = pd.to_datetime(dated_brent_data['Date'])\n", "\n", "# Date conversion\n", "\n", "def convert_date(date):\n", " format = '%m/%d/%Y'\n", " date_converted = datetime.datetime.strptime(date, format).date()\n", "\n", " return date_converted\n", "\n", "# # Crude oil volatility data\n", "# crude_volatility_data = pd.read_csv(os.path.join('data', 'cboe_ovx_futures.csv')\n", "# ).drop(columns=['Unnamed: 0', 'Volume', 'Open'])\n", "# crude_volatility_data['Date'] = pd.to_datetime(crude_volatility_data['Date'].apply(convert_date))\n", "\n", "# News data\n", "\n", "# --- #\n", "\n", "# Brent futures data\n", "brent_futures_data = pd.read_csv(os.path.join('data', 'brent_nmx.csv')\n", " ).drop(columns=['Open', 'Volume']).rename({'Close/Last':'Close'},axis=1)\n", "brent_futures_data['Date'] = pd.to_datetime(brent_futures_data['Date'].apply(convert_date))\n", "\n", "# Gasoline futures data\n", "gasoline_data = pd.read_csv(os.path.join('data', 'gasoline_nmx.csv')\n", " ).drop(columns=['Open', 'Volume']).rename({'Close/Last':'Close'},axis=1)\n", "gasoline_data['Date'] = pd.to_datetime(gasoline_data['Date'].apply(convert_date))\n", "\n", "# Dollar Index data\n", "dollar_data = pd.read_csv(os.path.join('data', 'dollar_index.csv')\n", " ).drop(columns=['Adj Close', 'Volume', 'Open']).dropna().drop_duplicates()\n", "dollar_data['Date'] = pd.to_datetime(dollar_data['Date'])\n", "\n", "# U.S. Crude Oil Production data\n", "production_data = pd.read_csv(os.path.join('data', 'U.S._Crude_Production_ThousandPerDay.csv'), delimiter = ',')\n", "\n", "# GPR \n", "gpr_data = pd.read_excel(os.path.join('data', 'data_gpr_daily_recent.xlsx')).drop(columns=['event', 'var_name', 'var_label', 'N10D', 'DAY']).rename({'date':'Date'}, axis=1)\n", "\n", "# Gold Price\n", "gold_price_data = pd.read_csv(os.path.join('data', 'GoldPrice.csv')\n", " ).drop(columns=['Open', 'Volume']).rename({'Close/Last':'Close'},axis=1)\n", "gold_price_data['Date'] = pd.to_datetime(gold_price_data['Date'].apply(convert_date))\n", "\n", "# Silver Price\n", "silver_price_data = pd.read_csv(os.path.join('data', 'Silver(CMX).csv')\n", " ).drop(columns=['Open', 'Volume']).rename({'Close/Last':'Close'},axis=1)\n", "silver_price_data['Date'] = pd.to_datetime(silver_price_data['Date'].apply(convert_date))\n", "\n", "# Platinum Price\n", "platinum_price_data = pd.read_csv(os.path.join('data', 'Platinum(NMX).csv')\n", " ).drop(columns=['Open', 'Volume']).rename({'Close/Last':'Close'},axis=1)\n", "platinum_price_data['Date'] = pd.to_datetime(platinum_price_data['Date'].apply(convert_date))\n", "\n", "# Platinum Price\n", "palladium_price_data = pd.read_csv(os.path.join('data', 'Palladium(NMX).csv')\n", " ).drop(columns=['Open', 'Volume']).rename({'Close/Last':'Close'},axis=1)\n", "palladium_price_data['Date'] = pd.to_datetime(palladium_price_data['Date'].apply(convert_date))\n", "\n", "\n", "# US Bond Rate\n", "us_bond_data = pd.read_csv(os.path.join('data', 'USBondRate.csv')\n", " ).drop(columns=['Open', 'Volume']).rename({'Close/Last':'Close'},axis=1)\n", "us_bond_data['Date'] = pd.to_datetime(us_bond_data['Date'].apply(convert_date))\n", "\n", "# S&P500\n", "sp500_data = pd.read_csv(os.path.join('data', 'S&P500 (SPX).csv')\n", " ).drop(columns=['Open']).rename({'Close/Last':'Close'},axis=1)\n", "sp500_data['Date'] = pd.to_datetime(sp500_data['Date'].apply(convert_date))\n", "\n", "# EUR to USD\n", "eur_usd_data = pd.read_csv(os.path.join('data', 'EURUSD.csv')\n", " ).drop(columns=['Open', 'Volume']).rename({'Close/Last':'Close'},axis=1)\n", "eur_usd_data['Date'] = pd.to_datetime(eur_usd_data['Date'].apply(convert_date))\n", "\n", "# Gold miners\n", "gold_miners_data = pd.read_csv(os.path.join('data', 'GoldMiners(GDX).csv')\n", " ).drop(columns=['Open', 'Volume']).rename({'Close/Last':'Close'},axis=1)\n", "gold_miners_data['Date'] = pd.to_datetime(gold_miners_data['Date'].apply(convert_date))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Predictor Variables:\n", "1. Silver Price\n", "2. Platinum Price\n", "3. Palladium Price\n", "4. Gold Price\n", "5. Gold Miners Price\n", "6. S&P500 Price\n", "7. Dollar Index (Closing), Percent Change -> NASDAQ\n", "8. Gasoline (Closing), Percent Change -> NASDAQ\n", "9. ? U.S. Crude Oil Production, MioBarrels/Day -> U.S. Energy Information Administration (EIA)\n", "10. GPR Data, Percent Change -> Economic Policy Uncertainty (EPU)\n", "11. US Bond Rate\n", "12. ? Euro to USD\n", "\n", "Target Variable:\n", "Brent Futures (Closing), $ (MA30) -> NASDAQ\n", "Dated Brent Price -> S&P Global Commodity Insights" ] }, { "cell_type": "code", "execution_count": 207, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
dateGasDXYGold(CMX)Silver(CMX)Platinum(NMX)Palladium(NMX)USBondS&P500GoldMinersGPRDBrFu
02024-08-052.3336102.6900022444.427.207915.5826.10125.40625186.3335.34210.20671172.94
12024-08-022.3176103.2099992469.828.392967.6882.50125.06255346.5636.48207.79943873.52
22024-08-012.3980104.4199982480.828.477970.5895.10122.53125446.6837.27139.87809876.93
32024-07-312.4425104.0999982473.028.938986.4925.20120.78125522.3037.93135.20684877.91
42024-07-302.3443104.5500032451.928.525971.0881.70120.18755436.4436.9595.69639674.73
.......................................
24962014-08-292.622982.7500001287.419.4921424.7909.55140.09382003.3726.69157.33633495.96
24972014-08-282.590882.4800031290.419.6091425.2898.10141.71881996.7426.4697.53470694.55
24982014-08-272.590582.4300001283.419.4751419.9894.70141.15622000.1226.11143.08210893.88
24992014-08-262.600182.6500021285.219.4591419.6890.15140.53122000.0226.19118.14370793.86
25002014-08-252.595482.5500031278.919.4311418.4891.50140.71881997.9225.62167.60015993.35
\n", "

2501 rows × 12 columns

\n", "
" ], "text/plain": [ " date Gas DXY Gold(CMX) Silver(CMX) Platinum(NMX) \\\n", "0 2024-08-05 2.3336 102.690002 2444.4 27.207 915.5 \n", "1 2024-08-02 2.3176 103.209999 2469.8 28.392 967.6 \n", "2 2024-08-01 2.3980 104.419998 2480.8 28.477 970.5 \n", "3 2024-07-31 2.4425 104.099998 2473.0 28.938 986.4 \n", "4 2024-07-30 2.3443 104.550003 2451.9 28.525 971.0 \n", "... ... ... ... ... ... ... \n", "2496 2014-08-29 2.6229 82.750000 1287.4 19.492 1424.7 \n", "2497 2014-08-28 2.5908 82.480003 1290.4 19.609 1425.2 \n", "2498 2014-08-27 2.5905 82.430000 1283.4 19.475 1419.9 \n", "2499 2014-08-26 2.6001 82.650002 1285.2 19.459 1419.6 \n", "2500 2014-08-25 2.5954 82.550003 1278.9 19.431 1418.4 \n", "\n", " Palladium(NMX) USBond S&P500 GoldMiners GPRD BrFu \n", "0 826.10 125.4062 5186.33 35.34 210.206711 72.94 \n", "1 882.50 125.0625 5346.56 36.48 207.799438 73.52 \n", "2 895.10 122.5312 5446.68 37.27 139.878098 76.93 \n", "3 925.20 120.7812 5522.30 37.93 135.206848 77.91 \n", "4 881.70 120.1875 5436.44 36.95 95.696396 74.73 \n", "... ... ... ... ... ... ... \n", "2496 909.55 140.0938 2003.37 26.69 157.336334 95.96 \n", "2497 898.10 141.7188 1996.74 26.46 97.534706 94.55 \n", "2498 894.70 141.1562 2000.12 26.11 143.082108 93.88 \n", "2499 890.15 140.5312 2000.02 26.19 118.143707 93.86 \n", "2500 891.50 140.7188 1997.92 25.62 167.600159 93.35 \n", "\n", "[2501 rows x 12 columns]" ] }, "execution_count": 207, "metadata": {}, "output_type": "execute_result" } ], "source": [ "predictor = gasoline_data.drop(columns=['High', 'Low']).rename({'Close' : 'Gas'}, axis=1).merge(\n", " dollar_data.drop(columns=['High', 'Low']).rename({'Close' : 'DXY'}, axis=1), on='Date').merge(\n", " gold_price_data.drop(columns=['High', 'Low']).rename({'Close' : 'Gold(CMX)'}, axis=1), on='Date').merge(\n", " silver_price_data.drop(columns=['High', 'Low']).rename({'Close' : 'Silver(CMX)'}, axis=1), on='Date').merge(\n", " platinum_price_data.drop(columns=['High', 'Low']).rename({'Close' : 'Platinum(NMX)'}, axis=1), on='Date').merge(\n", " palladium_price_data.drop(columns=['High', 'Low']).rename({'Close' : 'Palladium(NMX)'}, axis=1), on='Date').merge(\n", " us_bond_data.drop(columns=['High', 'Low']).rename({'Close' : 'USBond'}, axis=1), on='Date').merge(\n", " sp500_data.drop(columns=['High', 'Low']).rename({'Close' : 'S&P500'}, axis=1), on='Date').merge(\n", " gold_miners_data.drop(columns=['High', 'Low']).rename({'Close' : 'GoldMiners'}, axis=1), on='Date').merge(\n", " gpr_data[['Date', 'GPRD']])\n", "\n", "target = brent_futures_data[['Date','Close']].rename({'Close' : 'BrFu'}, axis=1)\n", "\n", "df = predictor.merge(target, on ='Date').rename({'Date' : 'date'}, axis=1)\n", "df" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "---" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Modelling" ] }, { "cell_type": "code", "execution_count": 208, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "from neuralforecast.core import NeuralForecast\n", "from neuralforecast.models import TSMixer, TSMixerx, NHITS, MLPMultivariate, NBEATSx\n", "from neuralforecast.losses.pytorch import MSE, MAE, MAPE\n", "from sklearn.preprocessing import StandardScaler\n", "import numpy as np\n", "import os" ] }, { "cell_type": "code", "execution_count": 209, "metadata": {}, "outputs": [], "source": [ "# Functions\n", "\n", "def createLag(data, amt=10):\n", " \"\"\"\n", " Create a lag inside dataframe, in business days\n", "\n", " Input:\n", " data -> Pandas dataframe \n", " amt -> int\n", "\n", " Output:\n", " Copy of pandas Dataframe\n", " \"\"\"\n", " if 'ds' in data:\n", " # Ensure 'ds' is a datetime column\n", " data['ds'] = pd.to_datetime(data['ds'], errors='coerce')\n", " \n", " # Check for any null values after conversion\n", " if data['ds'].isnull().any():\n", " print(\"Warning: Some dates couldn't be converted to datetime.\")\n", "\n", " copy = data.copy()\n", " # Apply the business day offset\n", " copy['ds'] = copy['ds'] + pd.tseries.offsets.BusinessDay(amt)\n", " return copy\n", " else:\n", " print(\"No 'ds' column found inside dataframe\")\n", " return data\n", "\n", "def trainTestValSplit(data, test_size, val_size):\n", " \"\"\"\n", " Splits data into train-test-validation sets\n", "\n", " Input:\n", " data -> Pandas dataframe\n", " test_size -> Proportion of data for test set\n", " val_size -> Proportiion of data fro validation set\n", "\n", " Output:\n", " This is not needed yet, actually\n", " \"\"\"\n", " pass\n", "\n", "def scaleStandard(df_col):\n", " \"\"\"\n", " Fits and returns a standard scaled version of a dataframe column\n", " \"\"\"\n", " scaler = StandardScaler()\n", " df_col = scaler.fit_transform(df_col)\n", " df_col = pd.DataFrame(df_col)\n", " return df_col, scaler\n", "\n", "def logReturn(data, df_col):\n", " \"\"\"\n", " Perform log return for a dataframe column\n", " \"\"\"\n", " new_col = np.log1p(data[df_col].pct_change())\n", " return new_col\n", "\n", "def transformData(data, log_return=[], standard_scale=[]):\n", " \"\"\"\n", " Perform essential transformations towards the dataframe.\n", " \n", " Args:\n", " data (pd.DataFrame): DataFrame with data to be transformed.\n", " log_return (list): List of columns for which log returns should be computed.\n", " standard_scale (list): List of columns to be standard scaled.\n", " \n", " Returns:\n", " data (pd.DataFrame): Transformed DataFrame.\n", " Optional: yScaler if 'y' is in standard_scale.\n", " \"\"\"\n", " y_log_ret = False\n", " y_std_scale = False\n", "\n", " data.sort_values(by='ds', inplace=True)\n", "\n", " # Apply log return transformation\n", " if len(log_return) > 0:\n", " for col1 in log_return:\n", " try:\n", " data[col1] = logReturn(data, col1)\n", " except Exception as e:\n", " print(f\"Error processing log return for column '{col1}': {e}\")\n", " pass\n", " \n", " if 'y' in log_return:\n", " y_log_ret = True\n", "\n", " # Apply standard scaling\n", " yScaler = None # Initialize to None\n", " if len(standard_scale) > 0:\n", " for col2 in standard_scale:\n", " try:\n", " data[col2], _ = scaleStandard(data[[col2]]) # Assuming scaleStandard handles 1D arrays\n", " except Exception as e:\n", " print(f\"Error processing standard scaling for column '{col2}': {e}\")\n", " pass\n", " \n", " if 'y' in standard_scale:\n", " try:\n", " data['y'], yScaler = scaleStandard(data['y']) # Scale 'y' and get scaler\n", " y_std_scale = True\n", " except Exception as e:\n", " print(f\"Error processing standard scaling for 'y': {e}\")\n", " pass\n", "\n", " # If 'yScaler' exists, return it along with the transformed data\n", " if yScaler:\n", " return data, yScaler\n", "\n", " return data" ] }, { "cell_type": "code", "execution_count": 210, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Total length is 2501, with validation and test size of 250 for each\n" ] } ], "source": [ "# Exogenous \n", "\n", "Y_df = df.rename({'date' : 'ds', 'BrFu' : 'y'}, axis=1\n", " )\n", "Y_df['unique_id'] = 'Dated'\n", "Y_df['ds'] = pd.to_datetime(Y_df['ds'])\n", "\n", "# We make validation and test splits\n", "n_time = len(Y_df.ds.unique())\n", "val_size = int(.1 * n_time)\n", "test_size = int(.1 * n_time)\n", "\n", "print(f'Total length is {n_time}, with validation and test size of {val_size} for each')" ] }, { "cell_type": "code", "execution_count": 211, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
dsyGasDXYGold(CMX)Silver(CMX)Platinum(NMX)Palladium(NMX)USBondS&P500GoldMinersGPRDBrFuunique_id
02024-08-0572.942.3336102.6900022444.427.207915.5826.10125.40625186.3335.34210.20671172.94Dated
12024-08-0273.522.3176103.2099992469.828.392967.6882.50125.06255346.5636.48207.79943873.52Dated
22024-08-0176.932.3980104.4199982480.828.477970.5895.10122.53125446.6837.27139.87809876.93Dated
32024-07-3177.912.4425104.0999982473.028.938986.4925.20120.78125522.3037.93135.20684877.91Dated
42024-07-3074.732.3443104.5500032451.928.525971.0881.70120.18755436.4436.9595.69639674.73Dated
.............................................
24962014-08-2995.962.622982.7500001287.419.4921424.7909.55140.09382003.3726.69157.33633495.96Dated
24972014-08-2894.552.590882.4800031290.419.6091425.2898.10141.71881996.7426.4697.53470694.55Dated
24982014-08-2793.882.590582.4300001283.419.4751419.9894.70141.15622000.1226.11143.08210893.88Dated
24992014-08-2693.862.600182.6500021285.219.4591419.6890.15140.53122000.0226.19118.14370793.86Dated
25002014-08-2593.352.595482.5500031278.919.4311418.4891.50140.71881997.9225.62167.60015993.35Dated
\n", "

2501 rows × 14 columns

\n", "
" ], "text/plain": [ " ds y Gas DXY Gold(CMX) Silver(CMX) \\\n", "0 2024-08-05 72.94 2.3336 102.690002 2444.4 27.207 \n", "1 2024-08-02 73.52 2.3176 103.209999 2469.8 28.392 \n", "2 2024-08-01 76.93 2.3980 104.419998 2480.8 28.477 \n", "3 2024-07-31 77.91 2.4425 104.099998 2473.0 28.938 \n", "4 2024-07-30 74.73 2.3443 104.550003 2451.9 28.525 \n", "... ... ... ... ... ... ... \n", "2496 2014-08-29 95.96 2.6229 82.750000 1287.4 19.492 \n", "2497 2014-08-28 94.55 2.5908 82.480003 1290.4 19.609 \n", "2498 2014-08-27 93.88 2.5905 82.430000 1283.4 19.475 \n", "2499 2014-08-26 93.86 2.6001 82.650002 1285.2 19.459 \n", "2500 2014-08-25 93.35 2.5954 82.550003 1278.9 19.431 \n", "\n", " Platinum(NMX) Palladium(NMX) USBond S&P500 GoldMiners \\\n", "0 915.5 826.10 125.4062 5186.33 35.34 \n", "1 967.6 882.50 125.0625 5346.56 36.48 \n", "2 970.5 895.10 122.5312 5446.68 37.27 \n", "3 986.4 925.20 120.7812 5522.30 37.93 \n", "4 971.0 881.70 120.1875 5436.44 36.95 \n", "... ... ... ... ... ... \n", "2496 1424.7 909.55 140.0938 2003.37 26.69 \n", "2497 1425.2 898.10 141.7188 1996.74 26.46 \n", "2498 1419.9 894.70 141.1562 2000.12 26.11 \n", "2499 1419.6 890.15 140.5312 2000.02 26.19 \n", "2500 1418.4 891.50 140.7188 1997.92 25.62 \n", "\n", " GPRD BrFu unique_id \n", "0 210.206711 72.94 Dated \n", "1 207.799438 73.52 Dated \n", "2 139.878098 76.93 Dated \n", "3 135.206848 77.91 Dated \n", "4 95.696396 74.73 Dated \n", "... ... ... ... \n", "2496 157.336334 95.96 Dated \n", "2497 97.534706 94.55 Dated \n", "2498 143.082108 93.88 Dated \n", "2499 118.143707 93.86 Dated \n", "2500 167.600159 93.35 Dated \n", "\n", "[2501 rows x 14 columns]" ] }, "execution_count": 211, "metadata": {}, "output_type": "execute_result" } ], "source": [ "Y_df_test = Y_df.copy().rename({'y' : 'BrFu'}, axis=1)\n", "\n", "# Y_df_test = createLag(Y_df_test, amt=30)\n", "last_df = Y_df[['ds', 'y']].merge(Y_df_test, on = 'ds')\n", "last_df" ] }, { "cell_type": "code", "execution_count": 212, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Error processing standard scaling for 'y': Expected 2D array, got 1D array instead:\n", "array=[0.57819032 0.6095309 0.79379192 ... 1.70969345 1.70861274 1.68105464].\n", "Reshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample.\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
dsyGasDXYGold(CMX)Silver(CMX)Platinum(NMX)Palladium(NMX)USBondS&P500GoldMinersGPRDBrFuunique_id
25002014-08-250.5781900.6961931.0144142.6010051.767282-0.488608-0.829241-1.4410771.9963841.3172661.74720493.35Dated
24992014-08-260.6095310.6685361.1143622.6750762.043063-0.048414-0.742670-1.4615732.1581391.4887411.70218093.86Dated
24982014-08-270.7937920.8075121.3469332.7071542.062844-0.023911-0.723329-1.6125232.2592121.6075700.43182993.88Dated
24972014-08-280.8467470.8844321.2854272.6844082.1701310.110428-0.677127-1.7168812.3355511.7068450.34446294.55Dated
24962014-08-290.6749140.7146881.3719212.6228772.074015-0.019687-0.743898-1.7522852.2488741.559436-0.39451395.96Dated
.............................................
42024-07-301.8220871.196264-2.818213-0.773021-0.0282043.813639-0.701149-0.565209-1.2168680.0161630.75835574.73Dated
32024-07-311.7458971.140777-2.870109-0.764272-0.0009753.817864-0.718724-0.468305-1.223561-0.018432-0.36013077.91Dated
22024-08-011.7096931.140259-2.879720-0.784686-0.0321613.773084-0.723943-0.501855-1.220149-0.0710780.49175576.93Dated
12024-08-021.7086131.156853-2.837434-0.779436-0.0358843.770549-0.730927-0.539126-1.220250-0.0590450.02532573.52Dated
02024-08-051.6810551.148729-2.856654-0.797808-0.0424013.760410-0.728855-0.527938-1.222370-0.1447820.95032272.94Dated
\n", "

2501 rows × 14 columns

\n", "
" ], "text/plain": [ " ds y Gas DXY Gold(CMX) Silver(CMX) \\\n", "2500 2014-08-25 0.578190 0.696193 1.014414 2.601005 1.767282 \n", "2499 2014-08-26 0.609531 0.668536 1.114362 2.675076 2.043063 \n", "2498 2014-08-27 0.793792 0.807512 1.346933 2.707154 2.062844 \n", "2497 2014-08-28 0.846747 0.884432 1.285427 2.684408 2.170131 \n", "2496 2014-08-29 0.674914 0.714688 1.371921 2.622877 2.074015 \n", "... ... ... ... ... ... ... \n", "4 2024-07-30 1.822087 1.196264 -2.818213 -0.773021 -0.028204 \n", "3 2024-07-31 1.745897 1.140777 -2.870109 -0.764272 -0.000975 \n", "2 2024-08-01 1.709693 1.140259 -2.879720 -0.784686 -0.032161 \n", "1 2024-08-02 1.708613 1.156853 -2.837434 -0.779436 -0.035884 \n", "0 2024-08-05 1.681055 1.148729 -2.856654 -0.797808 -0.042401 \n", "\n", " Platinum(NMX) Palladium(NMX) USBond S&P500 GoldMiners GPRD \\\n", "2500 -0.488608 -0.829241 -1.441077 1.996384 1.317266 1.747204 \n", "2499 -0.048414 -0.742670 -1.461573 2.158139 1.488741 1.702180 \n", "2498 -0.023911 -0.723329 -1.612523 2.259212 1.607570 0.431829 \n", "2497 0.110428 -0.677127 -1.716881 2.335551 1.706845 0.344462 \n", "2496 -0.019687 -0.743898 -1.752285 2.248874 1.559436 -0.394513 \n", "... ... ... ... ... ... ... \n", "4 3.813639 -0.701149 -0.565209 -1.216868 0.016163 0.758355 \n", "3 3.817864 -0.718724 -0.468305 -1.223561 -0.018432 -0.360130 \n", "2 3.773084 -0.723943 -0.501855 -1.220149 -0.071078 0.491755 \n", "1 3.770549 -0.730927 -0.539126 -1.220250 -0.059045 0.025325 \n", "0 3.760410 -0.728855 -0.527938 -1.222370 -0.144782 0.950322 \n", "\n", " BrFu unique_id \n", "2500 93.35 Dated \n", "2499 93.86 Dated \n", "2498 93.88 Dated \n", "2497 94.55 Dated \n", "2496 95.96 Dated \n", "... ... ... \n", "4 74.73 Dated \n", "3 77.91 Dated \n", "2 76.93 Dated \n", "1 73.52 Dated \n", "0 72.94 Dated \n", "\n", "[2501 rows x 14 columns]" ] }, "execution_count": 212, "metadata": {}, "output_type": "execute_result" } ], "source": [ "test_data = transformData(last_df,\n", " #log_return = ['y', 'BrFu', 'Gas', 'DXY', 'Gold(CMX)', 'Silver(CMX)', 'Platinum(NMX)', 'Palladium(NMX)', 'USBond', 'S&P500', 'GoldMiners', 'GPRD'],\n", " standard_scale = ['y', 'Gas', 'DXY', 'Gold(CMX)', 'Silver(CMX)', 'Platinum(NMX)', 'Palladium(NMX)', 'USBond', 'S&P500', 'GoldMiners', 'GPRD']\n", " )\n", "test_data.dropna(inplace=True)\n", "test_data" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import pandas as pd\n", "import numpy as np\n", "import seaborn as sns\n", "import matplotlib.pyplot as plt\n", "import os\n", "\n", "def plot_correlation_heatmap(df):\n", " \"\"\"\n", " Plot a correlation heatmap for all columns in a pandas DataFrame,\n", " showing only the lower triangle of the matrix.\n", " \n", " Input:\n", " df -> pandas DataFrame\n", " \n", " Output:\n", " Correlation heatmap (lower triangle)\n", " \"\"\"\n", " corr_matrix = df.corr()\n", "\n", " mask = np.triu(np.ones_like(corr_matrix, dtype=bool))\n", "\n", " plt.figure(figsize=(10, 8))\n", "\n", " # Lower triangle correlation heatmap\n", " sns.heatmap(corr_matrix, mask=mask, annot=True, cmap='coolwarm', vmin=-1, vmax=1, \n", " square=True, linewidths=.5, cbar_kws={\"shrink\": .8})\n", "\n", " plt.title(\"Lower Triangle Correlation Heatmap\", fontsize=16)\n", "\n", " plt.show()\n", "\n", "test = pd.read_csv(os.path.join('artifacts', '4b950ff8-8caf-4f33-9a68-345ea4464fc8', 'transformed_dataset.csv'))\n", "# plot_correlation_heatmap(test_data.drop(columns=['ds', 'unique_id']))\n", "plot_correlation_heatmap(test.drop(columns=['Date', 'Unnamed: 0']))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "---" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Statsforecast" ] }, { "cell_type": "code", "execution_count": 341, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "e:\\VM\\miniconda3\\envs\\multi-its\\lib\\site-packages\\statsforecast\\core.py:510: FutureWarning: The `df` argument of the StatsForecast constructor as well as reusing stored dfs from other methods is deprecated and will raise an error in a future version. Please provide the `df` argument to the corresponding method instead, e.g. fit/forecast.\n", " warnings.warn(\n", "e:\\VM\\miniconda3\\envs\\multi-its\\lib\\site-packages\\statsforecast\\core.py:510: FutureWarning: The `df` argument of the StatsForecast constructor as well as reusing stored dfs from other methods is deprecated and will raise an error in a future version. Please provide the `df` argument to the corresponding method instead, e.g. fit/forecast.\n", " warnings.warn(\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Dated - ARIMA(5,0,2)(0,0,1)[30] with zero mean \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "e:\\VM\\miniconda3\\envs\\multi-its\\lib\\site-packages\\statsforecast\\core.py:528: FutureWarning: In a future version the predictions will have the id as a column. You can set the `NIXTLA_ID_AS_COL` environment variable to adopt the new behavior and to suppress this warning.\n", " warnings.warn(\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
dsAutoARIMAAutoARIMA-lo-95AutoARIMA-hi-95
unique_id
Dated2024-08-060.000161-0.0577620.058084
Dated2024-08-070.005220-0.0527050.063145
Dated2024-08-08-0.002106-0.0600390.055827
Dated2024-08-09-0.002936-0.0611250.055254
Dated2024-08-10-0.002981-0.0611850.055223
Dated2024-08-11-0.002235-0.0605560.056086
Dated2024-08-12-0.001244-0.0595780.057090
Dated2024-08-13-0.001598-0.0599810.056785
Dated2024-08-14-0.001097-0.0594800.057286
Dated2024-08-15-0.000821-0.0592390.057597
Dated2024-08-16-0.000784-0.0592020.057635
Dated2024-08-17-0.001267-0.0597130.057179
Dated2024-08-18-0.001201-0.0596480.057245
Dated2024-08-19-0.000658-0.0591210.057806
Dated2024-08-20-0.000674-0.0591380.057790
Dated2024-08-21-0.000226-0.0587010.058248
Dated2024-08-22-0.001502-0.0599780.056973
Dated2024-08-23-0.000578-0.0590610.057904
Dated2024-08-240.000512-0.0579710.058994
Dated2024-08-250.000194-0.0582940.058681
Dated2024-08-260.000129-0.0583590.058617
Dated2024-08-27-0.000743-0.0592340.057748
Dated2024-08-28-0.000714-0.0592050.057778
Dated2024-08-290.000102-0.0583910.058595
Dated2024-08-300.000239-0.0582550.058732
Dated2024-08-310.000167-0.0583280.058662
Dated2024-09-01-0.001718-0.0602140.056777
Dated2024-09-020.000138-0.0583580.058634
Dated2024-09-030.001258-0.0572390.059754
Dated2024-09-040.000057-0.0584400.058554
\n", "
" ], "text/plain": [ " ds AutoARIMA AutoARIMA-lo-95 AutoARIMA-hi-95\n", "unique_id \n", "Dated 2024-08-06 0.000161 -0.057762 0.058084\n", "Dated 2024-08-07 0.005220 -0.052705 0.063145\n", "Dated 2024-08-08 -0.002106 -0.060039 0.055827\n", "Dated 2024-08-09 -0.002936 -0.061125 0.055254\n", "Dated 2024-08-10 -0.002981 -0.061185 0.055223\n", "Dated 2024-08-11 -0.002235 -0.060556 0.056086\n", "Dated 2024-08-12 -0.001244 -0.059578 0.057090\n", "Dated 2024-08-13 -0.001598 -0.059981 0.056785\n", "Dated 2024-08-14 -0.001097 -0.059480 0.057286\n", "Dated 2024-08-15 -0.000821 -0.059239 0.057597\n", "Dated 2024-08-16 -0.000784 -0.059202 0.057635\n", "Dated 2024-08-17 -0.001267 -0.059713 0.057179\n", "Dated 2024-08-18 -0.001201 -0.059648 0.057245\n", "Dated 2024-08-19 -0.000658 -0.059121 0.057806\n", "Dated 2024-08-20 -0.000674 -0.059138 0.057790\n", "Dated 2024-08-21 -0.000226 -0.058701 0.058248\n", "Dated 2024-08-22 -0.001502 -0.059978 0.056973\n", "Dated 2024-08-23 -0.000578 -0.059061 0.057904\n", "Dated 2024-08-24 0.000512 -0.057971 0.058994\n", "Dated 2024-08-25 0.000194 -0.058294 0.058681\n", "Dated 2024-08-26 0.000129 -0.058359 0.058617\n", "Dated 2024-08-27 -0.000743 -0.059234 0.057748\n", "Dated 2024-08-28 -0.000714 -0.059205 0.057778\n", "Dated 2024-08-29 0.000102 -0.058391 0.058595\n", "Dated 2024-08-30 0.000239 -0.058255 0.058732\n", "Dated 2024-08-31 0.000167 -0.058328 0.058662\n", "Dated 2024-09-01 -0.001718 -0.060214 0.056777\n", "Dated 2024-09-02 0.000138 -0.058358 0.058634\n", "Dated 2024-09-03 0.001258 -0.057239 0.059754\n", "Dated 2024-09-04 0.000057 -0.058440 0.058554" ] }, "execution_count": 341, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pandas as pd\n", "from statsforecast.models import AutoARIMA\n", "from statsforecast import StatsForecast\n", "from statsforecast.arima import arima_string\n", "\n", "# Create a list of models and instantiation parameters\n", "models = [\n", " AutoARIMA(season_length=30, max_p=5, max_d=2, max_q=5, seasonal=True),\n", "]\n", "\n", "# Instantiate StatsForecast class as sf\n", "sf = StatsForecast(\n", " df=test_data[['ds','y','unique_id']], \n", " models=models,\n", " freq='D', \n", " n_jobs=2\n", ")\n", "\n", "forecasts_df = sf.fit().predict(h=30, level=[95])\n", "\n", "for model, uid in zip(sf.fitted_, forecasts_df.index.unique()):\n", " print(uid, ' - ', arima_string(model[0].model_))\n", "\n", "forecasts_df\n", "\n", "# Check parameters\n", "# https://stackoverflow.com/questions/77436740/statforecast-autoarima-how-to-run-different-models-for-each-unique-ids" ] }, { "cell_type": "code", "execution_count": 342, "metadata": {}, "outputs": [ { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import matplotlib.pyplot as plt\n", "import pandas as pd\n", "\n", "def plot_forecast(test_data, forecasts_df, horizon=30):\n", " \"\"\"\n", " Plots historical data and forecasted values, emphasizing the forecast.\n", " \n", " Args:\n", " test_data (pd.DataFrame): Historical data with columns 'ds' (date) and 'y' (values).\n", " forecasts_df (pd.DataFrame): Forecasted values with index 'ds' and columns for predictions.\n", " horizon (int): Number of forecasted steps.\n", " \"\"\"\n", " # Prepare the forecasted data\n", " forecasted_dates = pd.date_range(test_data['ds'].max(), periods=horizon+1, freq='D')[1:]\n", " forecasted_values = forecasts_df['AutoARIMA'].values[:horizon]\n", "\n", " # Combine forecasted values with their corresponding dates\n", " forecast_df = pd.DataFrame({'ds': forecasted_dates, 'forecast': forecasted_values})\n", "\n", " # Plot the historical and forecasted data\n", " plt.figure(figsize=(10, 6))\n", " \n", " # Plot historical data with lighter color and transparency\n", " plt.plot(test_data['ds'], test_data['y'], label='Historical', color='blue', alpha=0.6, linewidth=2)\n", " \n", " # Plot forecasted data with a thicker, dashed line to stand out\n", " plt.plot(forecast_df['ds'], forecast_df['forecast'], label='Forecast', color='orange' , linewidth=3)\n", " \n", " # Add labels and title\n", " plt.xlabel('Date')\n", " plt.ylabel('Values')\n", " plt.title('Crude Oil Price Forecast vs Historical Data')\n", " plt.legend()\n", " \n", " # Show the plot\n", " plt.show()\n", "\n", "# Example usage:\n", "plot_forecast(last_df, forecasts_df, horizon=30)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "---" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Neuralforecast" ] }, { "cell_type": "code", "execution_count": 41, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Seed set to 12345678\n", "Seed set to 12345678\n", "Seed set to 12345678\n" ] } ], "source": [ "horizon = 15\n", "input_size = 30\n", "models = [\n", " TSMixer(h=horizon,\n", " input_size=input_size,\n", " n_series=1,\n", " max_steps=1000,\n", " val_check_steps=50,\n", " early_stop_patience_steps=5,\n", " scaler_type='identity',\n", " loss=MAE(),\n", " valid_loss=MAE(),\n", " random_seed=12345678,\n", " ),\n", " TSMixerx(h=horizon,\n", " input_size=input_size,\n", " n_series=1,\n", " max_steps=1000,\n", " val_check_steps=50,\n", " early_stop_patience_steps=5,\n", " scaler_type='identity',\n", " dropout=0.7,\n", " loss=MAE(),\n", " valid_loss=MAE(),\n", " random_seed=12345678,\n", " hist_exog_list=['Gas', 'DXY', 'Gold(CMX)', 'Silver(CMX)', 'Platinum(NMX)', 'Palladium(NMX)', 'USBond', 'S&P500', 'GoldMiners', 'GPRD'],\n", " ),\n", " NBEATSx(h=horizon,\n", " input_size=horizon,\n", " max_steps=1000,\n", " val_check_steps=50,\n", " early_stop_patience_steps=5,\n", " scaler_type='identity',\n", " loss=MAE(),\n", " valid_loss=MAE(),\n", " random_seed=12345678,\n", " hist_exog_list=['Gas', 'DXY', 'Gold(CMX)', 'Silver(CMX)', 'Platinum(NMX)', 'Palladium(NMX)', 'USBond', 'S&P500', 'GoldMiners', 'GPRD']\n", " ),\n", "]" ] }, { "cell_type": "code", "execution_count": 42, "metadata": {}, "outputs": [], "source": [ "nf = NeuralForecast(\n", " models=models,\n", " freq='D')" ] }, { "cell_type": "code", "execution_count": 43, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "GPU available: True (cuda), used: True\n", "TPU available: False, using: 0 TPU cores\n", "HPU available: False, using: 0 HPUs\n", "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n", "\n", " | Name | Type | Params | Mode \n", "-------------------------------------------------------------------\n", "0 | loss | MAE | 0 | train\n", "1 | valid_loss | MAE | 0 | train\n", "2 | padder | ConstantPad1d | 0 | train\n", "3 | scaler | TemporalNorm | 0 | train\n", "4 | norm | ReversibleInstanceNorm1d | 2 | train\n", "5 | mixing_layers | Sequential | 2.5 K | train\n", "6 | out | Linear | 465 | train\n", "-------------------------------------------------------------------\n", "3.0 K Trainable params\n", "0 Non-trainable params\n", "3.0 K Total params\n", "0.012 Total estimated model params size (MB)\n", "29 Modules in train mode\n", "0 Modules in eval mode\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Epoch 549: 100%|██████████| 1/1 [00:00<00:00, 15.68it/s, v_num=12, train_loss_step=3.290, train_loss_epoch=3.290, valid_loss=4.060]" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Trainer already configured with model summary callbacks: []. Skipping setting a default `ModelSummary` callback.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "GPU available: True (cuda), used: True\n", "TPU available: False, using: 0 TPU cores\n", "HPU available: False, using: 0 HPUs\n", "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 66.03it/s]" ] }, { "name": "stderr", "output_type": "stream", "text": [ "GPU available: True (cuda), used: True\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "TPU available: False, using: 0 TPU cores\n", "HPU available: False, using: 0 HPUs\n", "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n", "\n", " | Name | Type | Params | Mode \n", "-------------------------------------------------------------------------\n", "0 | loss | MAE | 0 | train\n", "1 | valid_loss | MAE | 0 | train\n", "2 | padder | ConstantPad1d | 0 | train\n", "3 | scaler | TemporalNorm | 0 | train\n", "4 | norm | ReversibleInstanceNorm1d | 2 | train\n", "5 | temporal_projection | Linear | 465 | train\n", "6 | feature_mixer_hist | FeatureMixing | 7.6 K | train\n", "7 | first_mixing | MixingLayer | 12.4 K | train\n", "8 | mixing_block | Sequential | 24.8 K | train\n", "9 | out | Linear | 65 | train\n", "-------------------------------------------------------------------------\n", "45.3 K Trainable params\n", "0 Non-trainable params\n", "45.3 K Total params\n", "0.181 Total estimated model params size (MB)\n", "48 Modules in train mode\n", "0 Modules in eval mode\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Epoch 299: 100%|██████████| 1/1 [00:00<00:00, 12.99it/s, v_num=14, train_loss_step=3.880, train_loss_epoch=3.880, valid_loss=6.120]" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Trainer already configured with model summary callbacks: []. Skipping setting a default `ModelSummary` callback.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "GPU available: True (cuda), used: True\n", "TPU available: False, using: 0 TPU cores\n", "HPU available: False, using: 0 HPUs\n", "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 68.88it/s]" ] }, { "name": "stderr", "output_type": "stream", "text": [ "GPU available: True (cuda), used: True\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "TPU available: False, using: 0 TPU cores\n", "HPU available: False, using: 0 HPUs\n", "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n", "\n", " | Name | Type | Params | Mode \n", "-------------------------------------------------------\n", "0 | loss | MAE | 0 | train\n", "1 | valid_loss | MAE | 0 | train\n", "2 | padder_train | ConstantPad1d | 0 | train\n", "3 | scaler | TemporalNorm | 0 | train\n", "4 | blocks | ModuleList | 2.7 M | train\n", "-------------------------------------------------------\n", "2.7 M Trainable params\n", "930 Non-trainable params\n", "2.7 M Total params\n", "10.668 Total estimated model params size (MB)\n", "32 Modules in train mode\n", "0 Modules in eval mode\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Epoch 549: 100%|██████████| 1/1 [00:00<00:00, 14.88it/s, v_num=16, train_loss_step=2.540, train_loss_epoch=2.540, valid_loss=4.240]" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Trainer already configured with model summary callbacks: []. Skipping setting a default `ModelSummary` callback.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "GPU available: True (cuda), used: True\n", "TPU available: False, using: 0 TPU cores\n", "HPU available: False, using: 0 HPUs\n", "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 95.13it/s] \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "e:\\VM\\miniconda3\\envs\\multi-its\\lib\\site-packages\\neuralforecast\\core.py:209: FutureWarning: In a future version the predictions will have the id as a column. You can set the `NIXTLA_ID_AS_COL` environment variable to adopt the new behavior and to suppress this warning.\n", " warnings.warn(\n" ] } ], "source": [ "Y_hat_df = nf.cross_validation(df=last_df,\n", " val_size=val_size,\n", " test_size=test_size,\n", " n_windows=None\n", " )\n", "Y_hat_df = Y_hat_df.reset_index()" ] }, { "cell_type": "code", "execution_count": 44, "metadata": {}, "outputs": [], "source": [ "# Y_hat_df['y'] = yScaler.inverse_transform(Y_hat_df[['y']])\n", "# Y_hat_df['TSMixer'] = yScaler.inverse_transform(Y_hat_df[['TSMixer']])\n", "# Y_hat_df['TSMixerx'] = yScaler.inverse_transform(Y_hat_df[['TSMixerx']])\n", "# Y_hat_df['NBEATSx'] = yScaler.inverse_transform(Y_hat_df[['NBEATSx']])" ] }, { "cell_type": "code", "execution_count": 45, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
unique_iddscutoffTSMixerTSMixerxNBEATSxy
0Dated2023-08-082023-08-0782.47447285.80295682.13466682.92
1Dated2023-08-092023-08-0782.75922486.31986282.28601184.40
2Dated2023-08-102023-08-0783.01303985.74777282.53678982.82
3Dated2023-08-112023-08-0783.14147286.65340482.56916083.19
4Dated2023-08-142023-08-0782.93872186.65132982.85130382.51
........................
3535Dated2024-07-302024-07-1583.34606984.19444383.03405874.73
3536Dated2024-07-312024-07-1583.28162485.32398283.13826077.91
3537Dated2024-08-012024-07-1582.87615284.95921382.85265476.93
3538Dated2024-08-022024-07-1583.14497484.94340582.72012373.52
3539Dated2024-08-052024-07-1583.18104684.93431182.85495072.94
\n", "

3540 rows × 7 columns

\n", "
" ], "text/plain": [ " unique_id ds cutoff TSMixer TSMixerx NBEATSx y\n", "0 Dated 2023-08-08 2023-08-07 82.474472 85.802956 82.134666 82.92\n", "1 Dated 2023-08-09 2023-08-07 82.759224 86.319862 82.286011 84.40\n", "2 Dated 2023-08-10 2023-08-07 83.013039 85.747772 82.536789 82.82\n", "3 Dated 2023-08-11 2023-08-07 83.141472 86.653404 82.569160 83.19\n", "4 Dated 2023-08-14 2023-08-07 82.938721 86.651329 82.851303 82.51\n", "... ... ... ... ... ... ... ...\n", "3535 Dated 2024-07-30 2024-07-15 83.346069 84.194443 83.034058 74.73\n", "3536 Dated 2024-07-31 2024-07-15 83.281624 85.323982 83.138260 77.91\n", "3537 Dated 2024-08-01 2024-07-15 82.876152 84.959213 82.852654 76.93\n", "3538 Dated 2024-08-02 2024-07-15 83.144974 84.943405 82.720123 73.52\n", "3539 Dated 2024-08-05 2024-07-15 83.181046 84.934311 82.854950 72.94\n", "\n", "[3540 rows x 7 columns]" ] }, "execution_count": 45, "metadata": {}, "output_type": "execute_result" } ], "source": [ "Y_hat_df" ] }, { "cell_type": "code", "execution_count": 46, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 46, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import matplotlib.pyplot as plt\n", "Y_plot = Y_hat_df[Y_hat_df['unique_id']=='Dated']\n", "cutoffs = Y_hat_df['cutoff'].unique()[::horizon]\n", "Y_plot = Y_plot[Y_hat_df['cutoff'].isin(cutoffs)]\n", "\n", "plt.figure(figsize=(20,5))\n", "plt.plot(Y_plot['ds'], Y_plot['y'], label='True')\n", "for model in models:\n", " plt.plot(Y_plot['ds'], Y_plot[f'{model}'], label=f'{model}')\n", "plt.xlabel('Datestamp')\n", "plt.ylabel('OT')\n", "plt.grid()\n", "plt.legend()" ] }, { "cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "TSMixer horizon 2 - MAE: 1.445\n", "TSMixer horizon 2 - MSE: 3.415\n", "TSMixer horizon 2 - MAPE: 0.018\n", "TSMixerx horizon 2 - MAE: 3.171\n", "TSMixerx horizon 2 - MSE: 14.609\n", "TSMixerx horizon 2 - MAPE: 0.040\n", "NBEATSx horizon 2 - MAE: 1.364\n", "NBEATSx horizon 2 - MSE: 3.087\n", "NBEATSx horizon 2 - MAPE: 0.017\n" ] } ], "source": [ "from neuralforecast.losses.numpy import mse, mae, mape\n", "\n", "for model in models:\n", " mae_model = mae(Y_hat_df['y'], Y_hat_df[f'{model}']) \n", " mse_model = mse(Y_hat_df['y'], Y_hat_df[f'{model}'])\n", " mape_model = mape(Y_hat_df['y'], Y_hat_df[f'{model}'])\n", " print(f'{model} horizon {horizon} - MAE: {mae_model:.3f}')\n", " print(f'{model} horizon {horizon} - MSE: {mse_model:.3f}')\n", " print(f'{model} horizon {horizon} - MAPE: {mape_model:.3f}')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Hyperparameter Tuning" ] }, { "cell_type": "code", "execution_count": 107, "metadata": {}, "outputs": [], "source": [ "from neuralforecast.losses.pytorch import MAE\n", "from neuralforecast.auto import AutoNHITS, AutoTSMixer\n", "from neuralforecast import NeuralForecast" ] }, { "cell_type": "code", "execution_count": 96, "metadata": {}, "outputs": [], "source": [ "import optuna\n", "optuna.logging.set_verbosity(optuna.logging.WARNING) # Use this to disable training prints from optuna" ] }, { "cell_type": "code", "execution_count": 122, "metadata": {}, "outputs": [], "source": [ "horizon_len = 12\n", "\n", "def config_nhits(trial):\n", " return {\n", " \"max_steps\": 1000, # Number of SGD steps\n", " \"input_size\" : trial.suggest_categorical(\"input_size\", [horizon_len, horizon_len*2, horizon_len*3]), # Size of input window\n", " \"learning_rate\": trial.suggest_loguniform(\"learning_rate\", 1e-5, 1e-1), # Initial Learning rate\n", " \"n_pool_kernel_size\": trial.suggest_categorical(\"n_pool_kernel_size\", [[2, 2, 2], [16, 8, 1]]), # MaxPool's Kernel size\n", " \"n_freq_downsample\": trial.suggest_categorical(\"n_freq_downsample\", [[168, 24, 1], [24, 12, 1], [1, 1, 1]]), # Interpolation expressivity ratios\n", " \"val_check_steps\": 50, # Compute validation every 50 steps\n", " \"early_stop_patience_steps\": 5, # Stops at 5 steps max if loss doesn't get beter\n", " \"random_seed\": trial.suggest_int(\"random_seed\", 1, 10), # Random seed\n", " }\n", "\n", "def config_tsmixer(trial):\n", " return {\n", " \"max_steps\": 1000,\n", " \"n_series\" : 1,\n", " \"input_size\" : trial.suggest_categorical(\"input_size\", [horizon_len, horizon_len*2, horizon_len*3]),\n", " \"learning_rate\": trial.suggest_loguniform(\"learning_rate\", 1e-5, 1e-1),\n", " \"ff_dim\": trial.suggest_categorical(\"ff_dim\", [32,64,128]),\n", " \"n_block\": trial.suggest_categorical(\"n_block\", [2,4,8]),\n", " \"val_check_steps\": 50,\n", " \"early_stop_patience_steps\": 5,\n", " \"scaler_type\": 'identity',\n", " }" ] }, { "cell_type": "code", "execution_count": 123, "metadata": {}, "outputs": [], "source": [ "model = [AutoNHITS(h=horizon_len,\n", " loss=MAE(),\n", " valid_loss=MAE(),\n", " config=config_nhits,\n", " search_alg=optuna.samplers.TPESampler(),\n", " backend='optuna',\n", " num_samples=10),\n", " AutoTSMixer(h=horizon_len,\n", " n_series=1,\n", " loss=MAE(),\n", " valid_loss=MAE(),\n", " config=config_tsmixer,\n", " search_alg=optuna.samplers.TPESampler(),\n", " backend='optuna',\n", " num_samples=10)]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "nf = NeuralForecast(models=model, freq='D')\n", "nf.fit(df=last_df, val_size=24)" ] }, { "cell_type": "code", "execution_count": 133, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
numbervaluedatetime_startdatetime_completedurationparams_ff_dimparams_input_sizeparams_learning_rateparams_n_blockuser_attrs_METRICSstate
002.9300352024-09-22 11:47:26.2909772024-09-22 11:47:55.2308300 days 00:00:28.939853128120.0000202{'loss': tensor(2.9300), 'train_loss': tensor(...COMPLETE
113.1612882024-09-22 11:47:55.2308302024-09-22 11:48:05.8779610 days 00:00:10.64713164240.0012222{'loss': tensor(3.1613), 'train_loss': tensor(...COMPLETE
222.7004252024-09-22 11:48:05.8789722024-09-22 11:48:21.4981220 days 00:00:15.61915032360.0000258{'loss': tensor(2.7004), 'train_loss': tensor(...COMPLETE
332.9850462024-09-22 11:48:21.4996652024-09-22 11:48:37.4708860 days 00:00:15.971221128360.0705604{'loss': tensor(2.9850), 'train_loss': tensor(...COMPLETE
442.9067832024-09-22 11:48:37.4718952024-09-22 11:49:12.6089070 days 00:00:35.13701232360.0000314{'loss': tensor(2.9068), 'train_loss': tensor(...COMPLETE
552.7799322024-09-22 11:49:12.6104702024-09-22 11:49:29.3367630 days 00:00:16.72629332120.0010102{'loss': tensor(2.7799), 'train_loss': tensor(...COMPLETE
662.7761752024-09-22 11:49:29.3367632024-09-22 11:50:10.0767480 days 00:00:40.739985128120.0009248{'loss': tensor(2.7762), 'train_loss': tensor(...COMPLETE
773.0373412024-09-22 11:50:10.0767482024-09-22 11:50:19.1310970 days 00:00:09.054349128240.0000142{'loss': tensor(3.0373), 'train_loss': tensor(...COMPLETE
883.2022432024-09-22 11:50:19.1310972024-09-22 11:50:32.9492190 days 00:00:13.81812232240.0006312{'loss': tensor(3.2022), 'train_loss': tensor(...COMPLETE
993.2456592024-09-22 11:50:32.9502202024-09-22 11:50:53.9809850 days 00:00:21.03076564240.0081698{'loss': tensor(3.2457), 'train_loss': tensor(...COMPLETE
\n", "
" ], "text/plain": [ " number value datetime_start datetime_complete \\\n", "0 0 2.930035 2024-09-22 11:47:26.290977 2024-09-22 11:47:55.230830 \n", "1 1 3.161288 2024-09-22 11:47:55.230830 2024-09-22 11:48:05.877961 \n", "2 2 2.700425 2024-09-22 11:48:05.878972 2024-09-22 11:48:21.498122 \n", "3 3 2.985046 2024-09-22 11:48:21.499665 2024-09-22 11:48:37.470886 \n", "4 4 2.906783 2024-09-22 11:48:37.471895 2024-09-22 11:49:12.608907 \n", "5 5 2.779932 2024-09-22 11:49:12.610470 2024-09-22 11:49:29.336763 \n", "6 6 2.776175 2024-09-22 11:49:29.336763 2024-09-22 11:50:10.076748 \n", "7 7 3.037341 2024-09-22 11:50:10.076748 2024-09-22 11:50:19.131097 \n", "8 8 3.202243 2024-09-22 11:50:19.131097 2024-09-22 11:50:32.949219 \n", "9 9 3.245659 2024-09-22 11:50:32.950220 2024-09-22 11:50:53.980985 \n", "\n", " duration params_ff_dim params_input_size \\\n", "0 0 days 00:00:28.939853 128 12 \n", "1 0 days 00:00:10.647131 64 24 \n", "2 0 days 00:00:15.619150 32 36 \n", "3 0 days 00:00:15.971221 128 36 \n", "4 0 days 00:00:35.137012 32 36 \n", "5 0 days 00:00:16.726293 32 12 \n", "6 0 days 00:00:40.739985 128 12 \n", "7 0 days 00:00:09.054349 128 24 \n", "8 0 days 00:00:13.818122 32 24 \n", "9 0 days 00:00:21.030765 64 24 \n", "\n", " params_learning_rate params_n_block \\\n", "0 0.000020 2 \n", "1 0.001222 2 \n", "2 0.000025 8 \n", "3 0.070560 4 \n", "4 0.000031 4 \n", "5 0.001010 2 \n", "6 0.000924 8 \n", "7 0.000014 2 \n", "8 0.000631 2 \n", "9 0.008169 8 \n", "\n", " user_attrs_METRICS state \n", "0 {'loss': tensor(2.9300), 'train_loss': tensor(... COMPLETE \n", "1 {'loss': tensor(3.1613), 'train_loss': tensor(... COMPLETE \n", "2 {'loss': tensor(2.7004), 'train_loss': tensor(... COMPLETE \n", "3 {'loss': tensor(2.9850), 'train_loss': tensor(... COMPLETE \n", "4 {'loss': tensor(2.9068), 'train_loss': tensor(... COMPLETE \n", "5 {'loss': tensor(2.7799), 'train_loss': tensor(... COMPLETE \n", "6 {'loss': tensor(2.7762), 'train_loss': tensor(... COMPLETE \n", "7 {'loss': tensor(3.0373), 'train_loss': tensor(... COMPLETE \n", "8 {'loss': tensor(3.2022), 'train_loss': tensor(... COMPLETE \n", "9 {'loss': tensor(3.2457), 'train_loss': tensor(... COMPLETE " ] }, "execution_count": 133, "metadata": {}, "output_type": "execute_result" } ], "source": [ "results = nf.models[1].results.trials_dataframe()\n", "results.drop(columns='user_attrs_ALL_PARAMS')" ] }, { "cell_type": "code", "execution_count": 128, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Trainer already configured with model summary callbacks: []. Skipping setting a default `ModelSummary` callback.\n", "GPU available: True (cuda), used: True\n", "TPU available: False, using: 0 TPU cores\n", "HPU available: False, using: 0 HPUs\n", "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 73.97it/s]" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Trainer already configured with model summary callbacks: []. Skipping setting a default `ModelSummary` callback.\n", "GPU available: True (cuda), used: True\n", "TPU available: False, using: 0 TPU cores\n", "HPU available: False, using: 0 HPUs\n", "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 53.50it/s]\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "e:\\VM\\miniconda3\\envs\\multi-its\\lib\\site-packages\\neuralforecast\\core.py:209: FutureWarning: In a future version the predictions will have the id as a column. You can set the `NIXTLA_ID_AS_COL` environment variable to adopt the new behavior and to suppress this warning.\n", " warnings.warn(\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
unique_iddsAutoNHITSAutoTSMixer
0Dated2024-08-0671.88922980.282181
1Dated2024-08-0771.92971077.726891
2Dated2024-08-0871.96659978.719620
3Dated2024-08-0971.98569579.689644
4Dated2024-08-1071.99101381.262947
5Dated2024-08-1172.06185981.586983
6Dated2024-08-1272.13781780.715485
7Dated2024-08-1372.14060280.055084
8Dated2024-08-1472.21176980.489311
9Dated2024-08-1572.25774481.713593
10Dated2024-08-1672.28853679.390053
11Dated2024-08-1772.29813481.773094
\n", "
" ], "text/plain": [ " unique_id ds AutoNHITS AutoTSMixer\n", "0 Dated 2024-08-06 71.889229 80.282181\n", "1 Dated 2024-08-07 71.929710 77.726891\n", "2 Dated 2024-08-08 71.966599 78.719620\n", "3 Dated 2024-08-09 71.985695 79.689644\n", "4 Dated 2024-08-10 71.991013 81.262947\n", "5 Dated 2024-08-11 72.061859 81.586983\n", "6 Dated 2024-08-12 72.137817 80.715485\n", "7 Dated 2024-08-13 72.140602 80.055084\n", "8 Dated 2024-08-14 72.211769 80.489311\n", "9 Dated 2024-08-15 72.257744 81.713593\n", "10 Dated 2024-08-16 72.288536 79.390053\n", "11 Dated 2024-08-17 72.298134 81.773094" ] }, "execution_count": 128, "metadata": {}, "output_type": "execute_result" } ], "source": [ "Y_hat_df_optuna = nf.predict()\n", "Y_hat_df_optuna = Y_hat_df_optuna.reset_index()\n", "Y_hat_df_optuna" ] }, { "cell_type": "code", "execution_count": 132, "metadata": {}, "outputs": [ { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "\n", "fig, ax = plt.subplots(1, 1, figsize = (20, 7))\n", "plot_df = pd.concat([Y_df, Y_hat_df]).reset_index()\n", "\n", "#plt.plot(plot_df['ds'], plot_df['y'], label='y')\n", "plt.plot(Y_hat_df_optuna['ds'], Y_hat_df_optuna['AutoNHITS'], label='AutoNHITS')\n", "plt.plot(Y_hat_df_optuna['ds'], Y_hat_df_optuna['AutoTSMixer'], label='AutoTSMixer')\n", "\n", "ax.set_title('AirPassengers Forecast', fontsize=22)\n", "ax.set_ylabel('Monthly Passengers', fontsize=20)\n", "ax.set_xlabel('Timestamp [t]', fontsize=20)\n", "ax.legend(prop={'size': 15})\n", "ax.grid()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Feature Engineering" ] }, { "cell_type": "code", "execution_count": 184, "metadata": {}, "outputs": [], "source": [ "X_df = gasoline_data.drop(columns=['High', 'Low']).rename({'Close' : 'Gas'}, axis=1).merge(\n", " dollar_data.drop(columns=['High', 'Low']).rename({'Close' : 'DXY'}, axis=1), on='Date').merge(\n", " gold_price_data.drop(columns=['High', 'Low']).rename({'Close' : 'Gold(CMX)'}, axis=1), on='Date').merge(\n", " silver_price_data.drop(columns=['High', 'Low']).rename({'Close' : 'Silver(CMX)'}, axis=1), on='Date').merge(\n", " platinum_price_data.drop(columns=['High', 'Low']).rename({'Close' : 'Platinum(NMX)'}, axis=1), on='Date').merge(\n", " palladium_price_data.drop(columns=['High', 'Low']).rename({'Close' : 'Palladium(NMX)'}, axis=1), on='Date').merge(\n", " us_bond_data.drop(columns=['High', 'Low']).rename({'Close' : 'USBond'}, axis=1), on='Date').merge(\n", " sp500_data.drop(columns=['High', 'Low']).rename({'Close' : 'S&P500'}, axis=1), on='Date').merge(\n", " gold_miners_data.drop(columns=['High', 'Low']).rename({'Close' : 'GoldMiners'}, axis=1), on='Date').merge(\n", " gpr_data[['Date', 'GPRD']])\n", "\n", "y_df = brent_futures_data[['Date','Close']].rename({'Close' : 'BrFu'}, axis=1)\n", "\n", "df = X_df.merge(y_df, on ='Date').rename({'Date' : 'ds'}, axis=1)\n", "df['ds'] = pd.to_datetime(df['ds'])\n", "\n", "y_df = df['BrFu']\n", "\n", "df.drop(columns=['BrFu'], inplace=True)" ] }, { "cell_type": "code", "execution_count": 187, "metadata": {}, "outputs": [], "source": [ "df['unique_id'] = 1\n", "\n", "y_df = y_df.rename({'BrFu' : 'y'})" ] }, { "cell_type": "code", "execution_count": 189, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Feature Extraction: 100%|██████████| 10/10 [00:22<00:00, 2.20s/it]\n" ] } ], "source": [ "from tsfresh import extract_features\n", "extracted_features = extract_features(df, column_id=\"unique_id\", column_sort=\"ds\")" ] }, { "cell_type": "code", "execution_count": 193, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
dsGasDXYGold(CMX)Silver(CMX)Platinum(NMX)Palladium(NMX)USBondS&P500GoldMinersGPRDunique_id
02024-08-052.3336102.6900022444.427.207915.5826.10125.40625186.3335.34210.2067111
12024-08-022.3176103.2099992469.828.392967.6882.50125.06255346.5636.48207.7994381
22024-08-012.3980104.4199982480.828.477970.5895.10122.53125446.6837.27139.8780981
32024-07-312.4425104.0999982473.028.938986.4925.20120.78125522.3037.93135.2068481
42024-07-302.3443104.5500032451.928.525971.0881.70120.18755436.4436.9595.6963961
.......................................
24962014-08-292.622982.7500001287.419.4921424.7909.55140.09382003.3726.69157.3363341
24972014-08-282.590882.4800031290.419.6091425.2898.10141.71881996.7426.4697.5347061
24982014-08-272.590582.4300001283.419.4751419.9894.70141.15622000.1226.11143.0821081
24992014-08-262.600182.6500021285.219.4591419.6890.15140.53122000.0226.19118.1437071
25002014-08-252.595482.5500031278.919.4311418.4891.50140.71881997.9225.62167.6001591
\n", "

2501 rows × 12 columns

\n", "
" ], "text/plain": [ " ds Gas DXY Gold(CMX) Silver(CMX) Platinum(NMX) \\\n", "0 2024-08-05 2.3336 102.690002 2444.4 27.207 915.5 \n", "1 2024-08-02 2.3176 103.209999 2469.8 28.392 967.6 \n", "2 2024-08-01 2.3980 104.419998 2480.8 28.477 970.5 \n", "3 2024-07-31 2.4425 104.099998 2473.0 28.938 986.4 \n", "4 2024-07-30 2.3443 104.550003 2451.9 28.525 971.0 \n", "... ... ... ... ... ... ... \n", "2496 2014-08-29 2.6229 82.750000 1287.4 19.492 1424.7 \n", "2497 2014-08-28 2.5908 82.480003 1290.4 19.609 1425.2 \n", "2498 2014-08-27 2.5905 82.430000 1283.4 19.475 1419.9 \n", "2499 2014-08-26 2.6001 82.650002 1285.2 19.459 1419.6 \n", "2500 2014-08-25 2.5954 82.550003 1278.9 19.431 1418.4 \n", "\n", " Palladium(NMX) USBond S&P500 GoldMiners GPRD unique_id \n", "0 826.10 125.4062 5186.33 35.34 210.206711 1 \n", "1 882.50 125.0625 5346.56 36.48 207.799438 1 \n", "2 895.10 122.5312 5446.68 37.27 139.878098 1 \n", "3 925.20 120.7812 5522.30 37.93 135.206848 1 \n", "4 881.70 120.1875 5436.44 36.95 95.696396 1 \n", "... ... ... ... ... ... ... \n", "2496 909.55 140.0938 2003.37 26.69 157.336334 1 \n", "2497 898.10 141.7188 1996.74 26.46 97.534706 1 \n", "2498 894.70 141.1562 2000.12 26.11 143.082108 1 \n", "2499 890.15 140.5312 2000.02 26.19 118.143707 1 \n", "2500 891.50 140.7188 1997.92 25.62 167.600159 1 \n", "\n", "[2501 rows x 12 columns]" ] }, "execution_count": 193, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df" ] }, { "cell_type": "code", "execution_count": 190, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "e:\\VM\\miniconda3\\envs\\multi-its\\lib\\site-packages\\tsfresh\\utilities\\dataframe_functions.py:198: RuntimeWarning: The columns ['Gas__query_similarity_count__query_None__threshold_0.0'\n", " 'DXY__query_similarity_count__query_None__threshold_0.0'\n", " 'Platinum(NMX)__query_similarity_count__query_None__threshold_0.0'\n", " 'Palladium(NMX)__query_similarity_count__query_None__threshold_0.0'\n", " 'Silver(CMX)__query_similarity_count__query_None__threshold_0.0'\n", " 'Gold(CMX)__query_similarity_count__query_None__threshold_0.0'\n", " 'USBond__query_similarity_count__query_None__threshold_0.0'\n", " 'S&P500__query_similarity_count__query_None__threshold_0.0'\n", " 'GoldMiners__query_similarity_count__query_None__threshold_0.0'\n", " 'GPRD__query_similarity_count__query_None__threshold_0.0'] did not have any finite values. Filling with zeros.\n", " warnings.warn(\n" ] }, { "ename": "AssertionError", "evalue": "X and y must contain the same number of samples.", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mAssertionError\u001b[0m Traceback (most recent call last)", "Cell \u001b[1;32mIn[190], line 5\u001b[0m\n\u001b[0;32m 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtsfresh\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mutilities\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mdataframe_functions\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m impute\n\u001b[0;32m 4\u001b[0m impute(extracted_features)\n\u001b[1;32m----> 5\u001b[0m features_filtered \u001b[38;5;241m=\u001b[39m \u001b[43mselect_features\u001b[49m\u001b[43m(\u001b[49m\u001b[43mextracted_features\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43my_df\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[1;32me:\\VM\\miniconda3\\envs\\multi-its\\lib\\site-packages\\tsfresh\\feature_selection\\selection.py:154\u001b[0m, in \u001b[0;36mselect_features\u001b[1;34m(X, y, test_for_binary_target_binary_feature, test_for_binary_target_real_feature, test_for_real_target_binary_feature, test_for_real_target_real_feature, fdr_level, hypotheses_independent, n_jobs, show_warnings, chunksize, ml_task, multiclass, n_significant)\u001b[0m\n\u001b[0;32m 150\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(y, (pd\u001b[38;5;241m.\u001b[39mSeries, np\u001b[38;5;241m.\u001b[39mndarray)), (\n\u001b[0;32m 151\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mThe type of target vector y must be one of: \u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpandas.Series, numpy.ndarray\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 152\u001b[0m )\n\u001b[0;32m 153\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(y) \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m1\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124my must contain at least two samples.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m--> 154\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(X) \u001b[38;5;241m==\u001b[39m \u001b[38;5;28mlen\u001b[39m(y), \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mX and y must contain the same number of samples.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 155\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m (\n\u001b[0;32m 156\u001b[0m \u001b[38;5;28mlen\u001b[39m(\u001b[38;5;28mset\u001b[39m(y)) \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m1\u001b[39m\n\u001b[0;32m 157\u001b[0m ), \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mFeature selection is only possible if more than 1 label/class is provided\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 159\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(y, pd\u001b[38;5;241m.\u001b[39mSeries) \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mset\u001b[39m(X\u001b[38;5;241m.\u001b[39mindex) \u001b[38;5;241m!=\u001b[39m \u001b[38;5;28mset\u001b[39m(y\u001b[38;5;241m.\u001b[39mindex):\n", "\u001b[1;31mAssertionError\u001b[0m: X and y must contain the same number of samples." ] } ], "source": [ "from tsfresh import select_features\n", "from tsfresh.utilities.dataframe_functions import impute\n", "\n", "impute(extracted_features)\n", "features_filtered = select_features(extracted_features, y_df)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
y__variance_larger_than_standard_deviationy__has_duplicate_maxy__has_duplicate_miny__has_duplicatey__sum_valuesy__abs_energyy__mean_abs_changey__mean_changey__mean_second_derivative_centraly__median...GPRD__fourier_entropy__bins_5GPRD__fourier_entropy__bins_10GPRD__fourier_entropy__bins_100GPRD__permutation_entropy__dimension_3__tau_1GPRD__permutation_entropy__dimension_4__tau_1GPRD__permutation_entropy__dimension_5__tau_1GPRD__permutation_entropy__dimension_6__tau_1GPRD__permutation_entropy__dimension_7__tau_1GPRD__query_similarity_count__query_None__threshold_0.0GPRD__mean_n_absolute_max__number_of_maxima_7
Dated0.00.00.01.04.547474e-132501.00.0625320.000441-0.000012-0.142643...0.1704670.3903381.8948351.7913833.1719784.7585246.4187777.498423NaN6.830437
\n", "

1 rows × 9396 columns

\n", "
" ], "text/plain": [ " y__variance_larger_than_standard_deviation y__has_duplicate_max \\\n", "Dated 0.0 0.0 \n", "\n", " y__has_duplicate_min y__has_duplicate y__sum_values y__abs_energy \\\n", "Dated 0.0 1.0 4.547474e-13 2501.0 \n", "\n", " y__mean_abs_change y__mean_change y__mean_second_derivative_central \\\n", "Dated 0.062532 0.000441 -0.000012 \n", "\n", " y__median ... GPRD__fourier_entropy__bins_5 \\\n", "Dated -0.142643 ... 0.170467 \n", "\n", " GPRD__fourier_entropy__bins_10 GPRD__fourier_entropy__bins_100 \\\n", "Dated 0.390338 1.894835 \n", "\n", " GPRD__permutation_entropy__dimension_3__tau_1 \\\n", "Dated 1.791383 \n", "\n", " GPRD__permutation_entropy__dimension_4__tau_1 \\\n", "Dated 3.171978 \n", "\n", " GPRD__permutation_entropy__dimension_5__tau_1 \\\n", "Dated 4.758524 \n", "\n", " GPRD__permutation_entropy__dimension_6__tau_1 \\\n", "Dated 6.418777 \n", "\n", " GPRD__permutation_entropy__dimension_7__tau_1 \\\n", "Dated 7.498423 \n", "\n", " GPRD__query_similarity_count__query_None__threshold_0.0 \\\n", "Dated NaN \n", "\n", " GPRD__mean_n_absolute_max__number_of_maxima_7 \n", "Dated 6.830437 \n", "\n", "[1 rows x 9396 columns]" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "extracted_features" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Save and load models" ] }, { "cell_type": "code", "execution_count": 160, "metadata": {}, "outputs": [], "source": [ "nf.save(path='./checkpoints/test_run/',\n", " model_index=None, \n", " overwrite=True,\n", " save_dataset=True)" ] }, { "cell_type": "code", "execution_count": 158, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "e:\\VM\\miniconda3\\envs\\multi-its\\lib\\site-packages\\neuralforecast\\common\\_base_model.py:444: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.\n", " content = torch.load(f, **kwargs)\n", "Seed set to 8\n", "e:\\VM\\miniconda3\\envs\\multi-its\\lib\\site-packages\\neuralforecast\\common\\_base_model.py:444: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.\n", " content = torch.load(f, **kwargs)\n", "Seed set to 1\n", "e:\\VM\\miniconda3\\envs\\multi-its\\lib\\site-packages\\torch\\storage.py:414: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.\n", " return torch.load(io.BytesIO(b))\n", "GPU available: True (cuda), used: True\n", "TPU available: False, using: 0 TPU cores\n", "HPU available: False, using: 0 HPUs\n", "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 58.57it/s]" ] }, { "name": "stderr", "output_type": "stream", "text": [ "GPU available: True (cuda), used: True\n", "TPU available: False, using: 0 TPU cores\n", "HPU available: False, using: 0 HPUs\n", "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 31.66it/s]\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "e:\\VM\\miniconda3\\envs\\multi-its\\lib\\site-packages\\neuralforecast\\core.py:209: FutureWarning: In a future version the predictions will have the id as a column. You can set the `NIXTLA_ID_AS_COL` environment variable to adopt the new behavior and to suppress this warning.\n", " warnings.warn(\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
unique_iddsAutoNHITSAutoTSMixer
0Dated2024-08-0671.88922980.282181
1Dated2024-08-0771.92971077.726891
2Dated2024-08-0871.96659978.719620
3Dated2024-08-0971.98569579.689644
4Dated2024-08-1071.99101381.262947
\n", "
" ], "text/plain": [ " unique_id ds AutoNHITS AutoTSMixer\n", "0 Dated 2024-08-06 71.889229 80.282181\n", "1 Dated 2024-08-07 71.929710 77.726891\n", "2 Dated 2024-08-08 71.966599 78.719620\n", "3 Dated 2024-08-09 71.985695 79.689644\n", "4 Dated 2024-08-10 71.991013 81.262947" ] }, "execution_count": 158, "metadata": {}, "output_type": "execute_result" } ], "source": [ "nf2 = NeuralForecast.load(path='./checkpoints/test_run/')\n", "Y_hat_df = nf2.predict().reset_index()\n", "Y_hat_df.head()" ] }, { "cell_type": "code", "execution_count": 159, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
unique_iddsAutoNHITSAutoTSMixer
0Dated2024-08-0671.88922980.282181
1Dated2024-08-0771.92971077.726891
2Dated2024-08-0871.96659978.719620
3Dated2024-08-0971.98569579.689644
4Dated2024-08-1071.99101381.262947
5Dated2024-08-1172.06185981.586983
6Dated2024-08-1272.13781780.715485
7Dated2024-08-1372.14060280.055084
8Dated2024-08-1472.21176980.489311
9Dated2024-08-1572.25774481.713593
10Dated2024-08-1672.28853679.390053
11Dated2024-08-1772.29813481.773094
\n", "
" ], "text/plain": [ " unique_id ds AutoNHITS AutoTSMixer\n", "0 Dated 2024-08-06 71.889229 80.282181\n", "1 Dated 2024-08-07 71.929710 77.726891\n", "2 Dated 2024-08-08 71.966599 78.719620\n", "3 Dated 2024-08-09 71.985695 79.689644\n", "4 Dated 2024-08-10 71.991013 81.262947\n", "5 Dated 2024-08-11 72.061859 81.586983\n", "6 Dated 2024-08-12 72.137817 80.715485\n", "7 Dated 2024-08-13 72.140602 80.055084\n", "8 Dated 2024-08-14 72.211769 80.489311\n", "9 Dated 2024-08-15 72.257744 81.713593\n", "10 Dated 2024-08-16 72.288536 79.390053\n", "11 Dated 2024-08-17 72.298134 81.773094" ] }, "execution_count": 159, "metadata": {}, "output_type": "execute_result" } ], "source": [ "Y_hat_df" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Test" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "\n", "test = pd.read_csv('artifacts\\cf12e42a-5a61-4bb0-bce2-0c0b9b246e53\\dataset.csv')" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "# Select columns that end with 'Close' except for 'Date'\n", "close_columns_df = pd.concat([test['Date'], test.filter(regex=r'Close$')], axis=1)" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 2014-08-25\n", "1 2014-08-26\n", "2 2014-08-27\n", "3 2014-08-28\n", "4 2014-08-29\n", " ... \n", "2495 2024-07-29\n", "2496 2024-07-30\n", "2497 2024-07-31\n", "2498 2024-08-01\n", "2499 2024-08-02\n", "Name: Date, Length: 2500, dtype: datetime64[ns]" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.to_datetime(close_columns_df['Date'])" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "TimeSeriesDataset(n_data=2,500, n_groups=1)" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pickle as pkl\n", "import pandas as pd\n", "\n", "object = 'artifacts/498f27cb-5e23-4503-9323-a4333f22becf/model/dataset.pkl'\n", "\n", "df = pd.read_pickle(object)\n", "df" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "TimeSeriesDataset(n_data=2,500, n_groups=1)" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "object" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "multi-its", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.19" } }, "nbformat": 4, "nbformat_minor": 2 }