Spaces:

EDS-lab
/

DAM-price-forecast

Sleeping

App Files Files Community

DAM-price-forecast / app.py

mmmapms

Update app.py

c01e48a verified 9 months ago

raw

history blame

14 kB

	import streamlit as st
	import pandas as pd
	import numpy as np
	import plotly.graph_objs as go
	import requests
	from io import StringIO
	import base64

	#@st.cache_data(ttl=86400) # TTL is set for 86400 seconds (24 hours)
	def load_data_predictions(github_token):

	url = 'https://api.github.com/repos/mmmapms/Forecast_DAM_V2/contents/Predictions.csv'
	headers = {'Authorization': f'token {github_token}'}

	response = requests.get(url, headers=headers)

	if response.status_code == 200:

	file_content = response.json()['content']
	decoded_content = base64.b64decode(file_content).decode('utf-8')
	csv_content = StringIO(decoded_content)

	df = pd.read_csv(csv_content, encoding='utf-8')

	df = df.rename(columns={
	'Price': 'Real Price',
	'DNN1': 'Neural Network 1',
	'DNN2': 'Neural Network 2',
	'DNN3': 'Neural Network 3',
	'DNN4': 'Neural Network 4',
	'DNN_Ensemble': 'Neural Network Ensemble',
	'LEAR56': 'Regularized Linear Model 1',
	'LEAR84': 'Regularized Linear Model 2',
	'LEAR112': 'Regularized Linear Model 3',
	'LEAR730': 'Regularized Linear Model 4',
	'LEAR_Ensemble': 'Regularized Linear Model Ensemble',
	'Persis': 'Persistence Model',
	'Hybrid_Ensemble': 'Hybrid Ensemble'
	})
	df['Date'] = pd.to_datetime(df['Date'], dayfirst=True)
	df_filtered = df.dropna(subset=['Real Price'])
	return df, df_filtered
	else:
	st.error("Failed to download data. Please check your GitHub token and repository details.")
	return pd.DataFrame(), pd.DataFrame()


	github_token = st.secrets["GitHub_Token_Margarida"]

	if github_token:
	df, df_filtered = load_data_predictions(github_token)
	# Your existing logic to use df and df_filtered
	else:
	st.warning("Please enter your GitHub Personal Access Token to proceed.")


	#@st.cache_data
	#def load_data_predictions():
	# df = pd.read_csv('Predictions.csv')
	# df = df.rename(columns={
	# 'Price': 'Real Price',
	# 'DNN1': 'Neural Network 1',
	# 'DNN2': 'Neural Network 2',
	# 'DNN3': 'Neural Network 3',
	# 'DNN4': 'Neural Network 4',
	# 'DNN_Ensemble': 'Neural Network Ensemble',
	# 'LEAR56': 'Regularized Linear Model 1',
	# 'LEAR84': 'Regularized Linear Model 2',
	# 'LEAR112': 'Regularized Linear Model 3',
	# 'LEAR730': 'Regularized Linear Model 4',
	# 'LEAR_Ensemble': 'Regularized Linear Model Ensemble',
	# 'Persis': 'Persistence Model',
	# 'Hybrid_Ensemble': 'Hybrid Ensemble'
	#})
	# df['Date'] = pd.to_datetime(df['Date'], dayfirst=True)
	# df_filtered = df.dropna(subset=['Real Price'])
	# return df, df_filtered

	#df, df_filtered = load_data_predictions()

	min_date_allowed_pred = df_filtered['Date'].min().date()
	max_date_allowed_pred = df_filtered['Date'].max().date()

	end_date = df['Date'].max().date()
	start_date = end_date - pd.Timedelta(days=7)

	models_corr_matrix = ['Persistence Model', 'Neural Network 1', 'Neural Network 2', 'Neural Network 3',
	'Neural Network 4', 'Regularized Linear Model 1',
	'Regularized Linear Model 2', 'Regularized Linear Model 3',
	'Regularized Linear Model 4', 'Hybrid Ensemble']

	def conformal_predictions(data):
	data['Residuals'] = data['Hybrid Ensemble'] - data['Real Price']
	data.set_index('Date', inplace=True)
	data['Hour'] = data.index.hour
	min_date = data.index.min()
	for date in data.index.normalize().unique():
	if date >= min_date + pd.DateOffset(days=30):
	start_date = date - pd.DateOffset(days=30)
	end_date = date
	calculation_window = data[start_date:end_date-pd.DateOffset(hours=1)]
	quantiles = calculation_window.groupby('Hour')['Residuals'].quantile(0.9)
	# Use .loc to safely access and modify data
	if date in data.index:
	current_day_data = data.loc[date.strftime('%Y-%m-%d')]
	for hour in current_day_data['Hour'].unique():
	if hour in quantiles.index:
	hour_quantile = quantiles[hour]
	idx = (data.index.normalize() == date) & (data.Hour == hour)
	data.loc[idx, 'Quantile_90'] = hour_quantile
	data.loc[idx, 'Lower_Interval'] = data.loc[idx, 'Hybrid Ensemble'] - hour_quantile
	data.loc[idx, 'Upper_Interval'] = data.loc[idx, 'Hybrid Ensemble'] + hour_quantile
	data.reset_index(inplace=True)
	return data

	# Main layout of the app
	col1, col2 = st.columns([5, 2]) # Adjust the ratio to better fit your layout needs
	with col1:
	st.title("Belgium: Electricity Price Forecasting")

	with col2:
	upper_space = col2.empty()
	upper_space = col2.empty()
	col2_1, col2_2 = st.columns(2) # Create two columns within the right column for side-by-side images
	with col2_1:
	st.image("C:/Users/mmascare/Documents/KU_Leuven_logo.png", width=100) # Adjust the path and width as needed
	with col2_2:
	st.image("C:/Users/mmascare/Documents/energyville_logo.png", width=100)

	upper_space.markdown("""


	""", unsafe_allow_html=True)


	# Sidebar for inputs
	with st.sidebar:
	st.write("### Variables Selection for Graph")
	st.write("Select which variables you'd like to include in the graph. This will affect the displayed charts and available data for download.")
	selected_variables = st.multiselect("Select variables to display:", options=['Real Price', 'Neural Network 1', 'Neural Network 2', 'Neural Network 3', 'Neural Network 4', 'Regularized Linear Model 1', 'Regularized Linear Model 2','Regularized Linear Model 3', 'Regularized Linear Model 4', 'Hybrid Ensemble', 'Persistence Model'], default=['Real Price', 'Hybrid Ensemble'])
	st.write("### Model Selection for Scatter Plot")
	model_selection = st.selectbox("Select which model's predictions to display:", options=['Neural Network 1', 'Neural Network 2', 'Neural Network 3', 'Neural Network 4', 'Regularized Linear Model 1', 'Regularized Linear Model 2','Regularized Linear Model 3', 'Regularized Linear Model 4', 'Hybrid Ensemble', 'Persistence Model'], index=8) # Adjust the index as needed to default to your desired option

	st.write("### Date Range for Metrics Calculation")
	st.write("Select the date range to calculate the metrics for the predictions. This will influence the accuracy metrics displayed below. The complete dataset ranges from 10/03/2024 until today.")
	start_date_pred, end_date_pred = st.date_input("Select Date Range for Metrics Calculation:", [min_date_allowed_pred, max_date_allowed_pred])

	# Main content
	if not selected_variables:
	st.warning("Please select at least one variable to display.")
	else:
	st.write("## Belgian Day-Ahead Electricity Prices")

	# Call conformal_predictions if 'Hybrid Ensemble' is selected
	if 'Hybrid Ensemble' in selected_variables:
	df = conformal_predictions(df) # Make sure this function modifies df correctly

	temp_df = df[(df['Date'] >= pd.Timestamp(start_date))] # Ensure correct date filtering

	# Initialize Plotly figure
	fig = go.Figure()

	for variable in selected_variables:
	fig.add_trace(go.Scatter(x=temp_df['Date'], y=temp_df[variable], mode='lines', name=variable))

	# Check if conformal predictions should be added for Hybrid Ensemble
	if variable == 'Hybrid Ensemble' and 'Quantile_90' in df.columns:
	# Add the lower interval trace
	fig.add_trace(go.Scatter(
	x=temp_df['Date'],
	y=temp_df['Lower_Interval'],
	mode='lines',
	line=dict(width=0),
	showlegend=False
	))

	# Add the upper interval trace and fill to the lower interval
	fig.add_trace(go.Scatter(
	x=temp_df['Date'],
	y=temp_df['Upper_Interval'],
	mode='lines',
	line=dict(width=0),
	fill='tonexty', # Fill between this trace and the previous one
	fillcolor='rgba(68, 68, 68, 0.3)',
	name='Conformal Prediction'
	))

	fig.update_layout(xaxis_title="Date", yaxis_title="Price [EUR/MWh]")
	st.plotly_chart(fig, use_container_width=True)
	st.write("The graph presented here illustrates the day-ahead electricity price forecasts for Belgium, covering the period from one week ago up to tomorrow. It incorporates predictions from three distinct models: a Neural Network, a Regularized Linear Model, and Persistence, alongside the actual electricity prices up until today.")


	if not selected_variables:
	st.warning("Please select at least one variable to display.")
	else:
	# Plotting
	st.write("## Scatter Plot: Real Price vs Model Predictions")
	# Filter based on the selected date range for plotting
	plot_df = df[(df['Date'] >= pd.Timestamp(min_date_allowed_pred)) & (df['Date'] <= pd.Timestamp(max_date_allowed_pred))]

	model_column = model_selection

	# Create the scatter plot
	fig = go.Figure()
	fig.add_trace(go.Scatter(x=plot_df['Real Price'], y=plot_df[model_column], mode='markers', name=f"Real Price vs {model_selection} Predictions"))

	# Calculate the line of best fit
	m, b = np.polyfit(plot_df['Real Price'], plot_df[model_column], 1)
	# Calculate the y-values based on the line of best fit
	regression_line = m * plot_df['Real Price'] + b

	# Format the equation to display as the legend name
	equation = f"y = {m:.2f}x + {b:.2f}"

	# Add the line of best fit to the figure with the equation as the legend name
	fig.add_trace(go.Scatter(x=plot_df['Real Price'], y=regression_line, mode='lines', name=equation, line=dict(color='black')))

	# Update layout with appropriate titles
	fig.update_layout(
	title=f"Scatter Plot of Real Price vs {model_selection} Predictions from {min_date_allowed_pred} to {max_date_allowed_pred}",
	xaxis_title="Real Price [EUR/MWh]",
	yaxis_title=f"{model_selection} Predictions [EUR/MWh]",
	xaxis=dict(range=[-160, 160]), # Setting the x-axis range
	yaxis=dict(range=[-150, 150]) # Setting the y-axis range
	)
	st.plotly_chart(fig, use_container_width=True)


	# Calculating and displaying metrics
	if start_date_pred and end_date_pred:
	st.header("Accuracy Metrics")
	#st.write(f"The accuracy metrics are calculated from {start_date_pred} to {end_date_pred}, this intervale can be changed in the sidebar.")
	st.write(f"The accuracy metrics are calculated from {start_date_pred} to {end_date_pred}. This interval can be changed in the sidebar. Evaluate the forecasting accuracy of our models with key performance indicators. The table summarizes the Mean Absolute Error (MAE), Symmetric Mean Absolute Percentage Error (SMAPE), and Root Mean Square Error (RMSE) for the selected models over your selected date range. Lower values indicate higher precision and reliability of the forecasts.")
	filtered_df = df_filtered[(df_filtered['Date'] >= pd.Timestamp(start_date_pred)) & (df_filtered['Date'] <= pd.Timestamp(end_date_pred))]

	# List of models for convenience
	models = [
	'Neural Network 1', 'Neural Network 2', 'Neural Network 3', 'Neural Network 4',
	'Regularized Linear Model 1', 'Regularized Linear Model 2', 'Regularized Linear Model 3', 'Regularized Linear Model 4',
	'Persistence Model', 'Hybrid Ensemble'
	]

	# Placeholder for results
	results = {'Metric': ['MAE', 'sMAPE', 'RMSE', 'rMAE']}

	p_real = filtered_df['Real Price']

	# Iterate through each model to calculate and store metrics
	for model in models:
	# Assuming column names in filtered_df match the model names directly for simplicity
	p_pred = filtered_df[model]

	mae = np.mean(np.abs(p_real - p_pred))
	smape = 100 * np.mean(np.abs(p_real - p_pred) / ((np.abs(p_real) + np.abs(p_pred)) / 2))
	rmse = np.sqrt(np.mean((p_real - p_pred) ** 2))
	rmae = mae/np.mean(np.abs(p_real - filtered_df['Persistence Model']))

	# Store the results
	results[model] = [f"{mae:.2f}", f"{smape:.2f}%", f"{rmse:.2f}", f"{rmae:.2f}"]

	# Convert the results to a DataFrame for display
	metrics_df = pd.DataFrame(results)

	transposed_metrics_df = metrics_df.set_index('Metric').T
	col1, col2 = st.columns([3, 2])

	# Display the transposed DataFrame
	with col1:
	# Assuming 'transposed_metrics_df' is your final DataFrame with metrics
	st.dataframe(transposed_metrics_df, hide_index=False)

	with col2:
	st.markdown("""
	<style>
	.big-font {
	font-size: 20px;
	font-weight: 500;
	}
	</style>
	<div class="big-font">
	Equations
	</div>
	""", unsafe_allow_html=True)

	# Rendering LaTeX equations
	st.markdown(r"""
	$\text{MAE} = \frac{1}{n}\sum_{i=1}^{n}\|y_i - \hat{y}_i\|$


	$\text{sMAPE} =100\frac{1}{n} \sum_{i=1}^{n} \frac{\|y_i - \hat{y}_i\|}{\left(\|y_i\| + \|\hat{y}_i\|\right)/2}$


	$\text{RMSE} = \sqrt{\frac{1}{n}\sum_{i=1}^{n}\left(y_i - \hat{y}_i\right)^2}$


	$\text{rMAE} = \frac{\text{MAE}}{MAE_{\text{Persistence Model}}}$
	""")


	st.write("## Correlation Matrix")


	models_df = df_filtered[models_corr_matrix]
	corr_matrix = models_df.corr()

	fig = go.Figure(data=go.Heatmap(
	z=corr_matrix.values,
	x=corr_matrix.columns,
	y=corr_matrix.index))
	fig.update_layout(
	yaxis_autorange='reversed' # Ensure the y-axis starts from the top
	)
	st.plotly_chart(fig, use_container_width=True)

	st.write("## Access Predictions")
	st.write("If you are interested in accessing the predictions made by the models, please contact Margarida Mascarenhas (KU Leuven PhD Student) at margarida.mascarenhas@kuleuven.be")