Spaces:
Sleeping
Sleeping
import streamlit as st | |
import pandas as pd | |
import numpy as np | |
import plotly.graph_objs as go | |
import requests | |
from io import StringIO | |
import base64 | |
def load_data_predictions(github_token): | |
""" | |
Fetch Predictions.csv from the GitHub 'Forecast_DAM_V2' repository | |
via the blob SHA. This works for files larger than 1 MB. | |
""" | |
owner = "mmmapms" | |
repo = "Forecast_DAM_V2" | |
file_path = "Predictions.csv" | |
# 1. Get file metadata (including SHA) from the “contents” endpoint | |
url_contents = f"https://api.github.com/repos/{owner}/{repo}/contents/{file_path}" | |
headers_contents = { | |
"Authorization": f"token {github_token}", | |
} | |
response_contents = requests.get(url_contents, headers=headers_contents) | |
if response_contents.status_code != 200: | |
st.error("Failed to download file metadata. Check token and file path.") | |
return pd.DataFrame(), pd.DataFrame() | |
json_data = response_contents.json() | |
# We expect "sha" to be present for the file | |
if "sha" not in json_data: | |
st.error("No 'sha' field found in JSON response. File might be missing.") | |
return pd.DataFrame(), pd.DataFrame() | |
sha = json_data["sha"] | |
# 2. Use the “blobs” endpoint to fetch the raw file content | |
url_blob = f"https://api.github.com/repos/{owner}/{repo}/git/blobs/{sha}" | |
headers_blob = { | |
"Authorization": f"token {github_token}", | |
"Accept": "application/vnd.github.v3.raw", # crucial for large files | |
} | |
response_blob = requests.get(url_blob, headers=headers_blob) | |
if response_blob.status_code != 200: | |
st.error(f"Failed to fetch raw blob. Status code: {response_blob.status_code}") | |
return pd.DataFrame(), pd.DataFrame() | |
# The response body is the raw CSV text | |
csv_text = response_blob.text | |
csv_content = StringIO(csv_text) | |
# 3. Read the CSV into a Pandas DataFrame | |
df = pd.read_csv(csv_content, encoding='utf-8') | |
# 4. Rename columns as needed | |
df = df.rename(columns={ | |
'Price': 'Real Price', | |
'DNN1': 'Neural Network 1', | |
'DNN2': 'Neural Network 2', | |
'DNN3': 'Neural Network 3', | |
'DNN4': 'Neural Network 4', | |
'LEAR56': 'Regularized Linear Model 1', | |
'LEAR84': 'Regularized Linear Model 2', | |
'LEAR112': 'Regularized Linear Model 3', | |
'LEAR730': 'Regularized Linear Model 4', | |
'Persis': 'Persistence Model', | |
'Hybrid_Ensemble': 'Hybrid Ensemble', | |
'Weighted_Ensemble': 'Weighted Ensemble' | |
}) | |
# 5. Parse dates and filter | |
df['Date'] = pd.to_datetime(df['Date'], dayfirst=True) | |
df_filtered = df.dropna(subset=['Real Price']) | |
return df, df_filtered | |
github_token = st.secrets["GitHub_Token_Margarida"] | |
if github_token: | |
df, df_filtered = load_data_predictions(github_token) | |
else: | |
st.warning("Please enter your GitHub Personal Access Token to proceed.") | |
min_date_allowed_pred = df_filtered['Date'].min().date() | |
max_date_allowed_pred = df_filtered['Date'].max().date() | |
end_date = df['Date'].max().date() | |
start_date = end_date - pd.Timedelta(days=7) | |
models_corr_matrix = ['Real Price', 'Persistence Model', 'Neural Network 1', 'Neural Network 2', 'Neural Network 3', | |
'Neural Network 4', 'Regularized Linear Model 1', | |
'Regularized Linear Model 2', 'Regularized Linear Model 3', | |
'Regularized Linear Model 4', 'Weighted Ensemble'] | |
def conformal_predictions(data): | |
data['Residuals'] = data['Weighted Ensemble'] - data['Real Price'] | |
data.set_index('Date', inplace=True) | |
data['Hour'] = data.index.hour | |
min_date = data.index.min() | |
for date in data.index.normalize().unique(): | |
if date >= min_date + pd.DateOffset(days=30): | |
start_date = date - pd.DateOffset(days=30) | |
end_date = date | |
calculation_window = data[start_date:end_date-pd.DateOffset(hours=1)] | |
quantiles = calculation_window.groupby('Hour')['Residuals'].quantile(0.9) | |
# Use .loc to safely access and modify data | |
if date in data.index: | |
current_day_data = data.loc[date.strftime('%Y-%m-%d')] | |
for hour in current_day_data['Hour'].unique(): | |
if hour in quantiles.index: | |
hour_quantile = quantiles[hour] | |
idx = (data.index.normalize() == date) & (data.Hour == hour) | |
data.loc[idx, 'Quantile_90'] = hour_quantile | |
data.loc[idx, 'Lower_Interval'] = data.loc[idx, 'Weighted Ensemble'] - hour_quantile | |
data.loc[idx, 'Upper_Interval'] = data.loc[idx, 'Weighted Ensemble'] + hour_quantile | |
data.reset_index(inplace=True) | |
return data | |
# Main layout of the app | |
col1, col2 = st.columns([5, 2]) # Adjust the ratio to better fit your layout needs | |
with col1: | |
st.title("Belgium: Electricity Price Forecasting") | |
with col2: | |
upper_space = col2.empty() | |
upper_space = col2.empty() | |
col2_1, col2_2 = st.columns(2) # Create two columns within the right column for side-by-side images | |
with col2_1: | |
st.image("KU_Leuven_logo.png", width=100) # Adjust the path and width as needed | |
with col2_2: | |
st.image("energyville_logo.png", width=100) | |
upper_space.markdown(""" | |
| |
| |
""", unsafe_allow_html=True) | |
# Sidebar for inputs | |
with st.sidebar: | |
st.write("### Variables Selection for Graph") | |
st.write("Select which variables you'd like to include in the graph. This will affect the displayed charts and available data for download.") | |
selected_variables = st.multiselect("Select variables to display:", options=['Real Price', 'Neural Network 1', 'Neural Network 2', 'Neural Network 3', 'Neural Network 4', 'Regularized Linear Model 1', 'Regularized Linear Model 2','Regularized Linear Model 3', 'Regularized Linear Model 4', 'Weighted Ensemble', 'Persistence Model'], default=['Real Price', 'Weighted Ensemble']) | |
st.write("### Model Selection for Scatter Plot") | |
model_selection = st.selectbox("Select which model's predictions to display:", options=['Neural Network 1', 'Neural Network 2', 'Neural Network 3', 'Neural Network 4', 'Regularized Linear Model 1', 'Regularized Linear Model 2','Regularized Linear Model 3', 'Regularized Linear Model 4', 'Weighted Ensemble', 'Persistence Model'], index=8) # Adjust the index as needed to default to your desired option | |
st.write("### Date Range for Metrics Calculation") | |
st.write("Select the date range to calculate the metrics for the predictions. This will influence the accuracy metrics displayed below. The complete dataset ranges from 10/03/2024 until today.") | |
start_date_pred, end_date_pred = st.date_input("Select Date Range for Metrics Calculation:", [min_date_allowed_pred, max_date_allowed_pred]) | |
# Main content | |
if not selected_variables: | |
st.warning("Please select at least one variable to display.") | |
else: | |
st.write("## Belgian Day-Ahead Electricity Prices") | |
# Call conformal_predictions if 'Hybrid Ensemble' is selected | |
if 'Weighted Ensemble' in selected_variables: | |
df = conformal_predictions(df) # Make sure this function modifies df correctly | |
temp_df = df[(df['Date'] >= pd.Timestamp(start_date))] # Ensure correct date filtering | |
# Initialize Plotly figure | |
fig = go.Figure() | |
for variable in selected_variables: | |
fig.add_trace(go.Scatter(x=temp_df['Date'], y=temp_df[variable], mode='lines', name=variable)) | |
# Check if conformal predictions should be added for Hybrid Ensemble | |
if variable == 'Weighted Ensemble' and 'Quantile_90' in df.columns: | |
# Add the lower interval trace | |
fig.add_trace(go.Scatter( | |
x=temp_df['Date'], | |
y=temp_df['Lower_Interval'], | |
mode='lines', | |
line=dict(width=0), | |
showlegend=False | |
)) | |
# Add the upper interval trace and fill to the lower interval | |
fig.add_trace(go.Scatter( | |
x=temp_df['Date'], | |
y=temp_df['Upper_Interval'], | |
mode='lines', | |
line=dict(width=0), | |
fill='tonexty', # Fill between this trace and the previous one | |
fillcolor='rgba(68, 68, 68, 0.3)', | |
name='P10/P90 prediction intervals' | |
)) | |
fig.update_layout(xaxis_title="Date", yaxis_title="Price [EUR/MWh]") | |
st.plotly_chart(fig, use_container_width=True) | |
st.write("The graph presented here illustrates the day-ahead electricity price forecasts for Belgium, covering the period from one week ago up to tomorrow. The forecasts are made every morning on day D at 08.00 for day D+1.") | |
if not selected_variables: | |
st.warning("Please select at least one variable to display.") | |
else: | |
# Plotting | |
st.write("## Scatter Plot: Real Price vs Model Predictions") | |
# Filter based on the selected date range for plotting | |
plot_df = df[(df['Date'] >= pd.Timestamp(min_date_allowed_pred)) & (df['Date'] <= pd.Timestamp(max_date_allowed_pred))] | |
model_column = model_selection | |
# Create the scatter plot | |
fig = go.Figure() | |
fig.add_trace(go.Scatter(x=plot_df['Real Price'], y=plot_df[model_column], mode='markers', name=f"Real Price vs {model_selection} Predictions")) | |
m, b = np.polyfit(plot_df['Real Price'], plot_df[model_column], 1) | |
regression_line = m * plot_df['Real Price'] + b | |
# Add the line of best fit to the figure with the equation as the legend name | |
fig.add_trace(go.Scatter(x=plot_df['Real Price'], y=regression_line, mode='lines', line=dict(color='black'), showlegend=False)) | |
# Update layout with appropriate titles | |
fig.update_layout( | |
title=f"Scatter Plot of Real Price vs {model_selection} Predictions from {min_date_allowed_pred} to {max_date_allowed_pred}", | |
xaxis_title="Real Price [EUR/MWh]", | |
yaxis_title=f"{model_selection} Predictions [EUR/MWh]", | |
xaxis=dict(range=[-160, 160]), # Setting the x-axis range | |
yaxis=dict(range=[-150, 150]), # Setting the y-axis range | |
showlegend=False | |
) | |
st.plotly_chart(fig, use_container_width=True) | |
# Calculating and displaying metrics | |
if start_date_pred and end_date_pred: | |
st.header("Accuracy Metrics") | |
#st.write(f"The accuracy metrics are calculated from {start_date_pred} to {end_date_pred}, this intervale can be changed in the sidebar.") | |
st.write(f"The accuracy metrics are calculated from **{start_date_pred}** to **{end_date_pred}**. This interval can be changed in the sidebar.") | |
filtered_df = df_filtered[(df_filtered['Date'] >= pd.Timestamp(start_date_pred)) & (df_filtered['Date'] < (pd.Timestamp(end_date_pred)+ pd.Timedelta(days=1)))] | |
# List of models for convenience | |
models = [ | |
'Neural Network 1', 'Neural Network 2', 'Neural Network 3', 'Neural Network 4', | |
'Regularized Linear Model 1', 'Regularized Linear Model 2', 'Regularized Linear Model 3', 'Regularized Linear Model 4', | |
'Persistence Model', 'Weighted Ensemble' | |
] | |
# Placeholder for results | |
results = {'Metric': ['MAE', 'rMAE']} #'sMAPE', 'RMSE', | |
p_real = filtered_df['Real Price'] | |
# Iterate through each model to calculate and store metrics | |
for model in models: | |
# Assuming column names in filtered_df match the model names directly for simplicity | |
p_pred = filtered_df[model] | |
mae = np.mean(np.abs(p_real - p_pred)) | |
#smape = 100 * np.mean(np.abs(p_real - p_pred) / ((np.abs(p_real) + np.abs(p_pred)) / 2)) | |
#rmse = np.sqrt(np.mean((p_real - p_pred) ** 2)) | |
rmae = mae/np.mean(np.abs(p_real - filtered_df['Persistence Model'])) | |
# Store the results | |
results[model] = [f"{mae:.2f}", f"{rmae:.2f}"] #f"{smape:.2f}%", f"{rmse:.2f}", | |
# Convert the results to a DataFrame for display | |
metrics_df = pd.DataFrame(results) | |
transposed_metrics_df = metrics_df.set_index('Metric').T | |
col1, col2 = st.columns([3, 2]) | |
# Display the transposed DataFrame | |
with col1: | |
# Assuming 'transposed_metrics_df' is your final DataFrame with metrics | |
st.dataframe(transposed_metrics_df, hide_index=False) | |
with col2: | |
st.markdown(""" | |
<style> | |
.big-font { | |
font-size: 20px; | |
font-weight: 500; | |
} | |
</style> | |
<div class="big-font"> | |
Equations | |
</div> | |
""", unsafe_allow_html=True) | |
# Rendering LaTeX equations | |
st.markdown(r""" | |
$\text{MAE} = \frac{1}{n}\sum_{i=1}^{n}|y_i - \hat{y}_i|$ | |
$\text{rMAE} = \frac{\text{MAE}}{MAE_{\text{Persistence Model}}}$ | |
""") | |
st.write("## Correlation Matrix") | |
models_df = df_filtered[models_corr_matrix] | |
corr_matrix = models_df.corr() | |
fig = go.Figure(data=go.Heatmap( | |
z=corr_matrix.values, | |
x=corr_matrix.columns, | |
y=corr_matrix.index)) | |
fig.update_layout( | |
yaxis_autorange='reversed' # Ensure the y-axis starts from the top | |
) | |
st.plotly_chart(fig, use_container_width=True) | |
st.write("## Access Predictions") | |
st.write("All forecasts are provided on an 'AS IS' BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. If you are interested in accessing the predictions made by the models, the models themselves or how these can be incorporated into your workflows, please contact Margarida Mascarenhas (PhD Student, KU Leuven) at margarida.mascarenhas@kuleuven.be or Hussain Kazmi (assistant professor, KU Leuven) at hussain.kazmi@kuleuven.be.") | |