Spaces:
Running
Running
File size: 13,610 Bytes
e67fcfa 01f14f6 33fccb4 e67fcfa 01f14f6 7fe8c1e 66f645e 7fe8c1e 66f645e 7896fa0 7fe8c1e deb692e 66f645e 01f14f6 9e2e619 01f14f6 c58f85c 01f14f6 e67fcfa f03d4c1 7097d3e 7896fa0 c01e48a 7896fa0 c01e48a 7896fa0 c01e48a bab8a32 c01e48a 263de36 c01e48a 263de36 c01e48a 7097d3e e67fcfa 7896fa0 2c9db99 7896fa0 2c9db99 e67fcfa c01e48a 7896fa0 c01e48a e67fcfa 2c9db99 e67fcfa c01e48a 7896fa0 c01e48a f03d4c1 c01e48a e67fcfa f03d4c1 3b1c0d9 2c9db99 3b1c0d9 2c9db99 3b1c0d9 f03d4c1 3b1c0d9 2cc779d 3b1c0d9 c01e48a 2cc779d c01e48a 3b1c0d9 e67fcfa 2c9db99 9d2161a f535f34 2c9db99 5c85bea 7896fa0 2c9db99 f03d4c1 2c9db99 f03d4c1 2c9db99 f03d4c1 2c9db99 3b1c0d9 7097d3e 3b1c0d9 9e69184 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 |
import streamlit as st
import pandas as pd
import numpy as np
import plotly.graph_objs as go
import requests
from io import StringIO
import base64
def load_data_predictions(github_token):
"""
Fetch Predictions.csv from the GitHub 'Forecast_DAM_V2' repository
via the blob SHA. This works for files larger than 1 MB.
"""
owner = "mmmapms"
repo = "Forecast_DAM_V2"
file_path = "Predictions.csv"
# 1. Get file metadata (including SHA) from the “contents” endpoint
url_contents = f"https://api.github.com/repos/{owner}/{repo}/contents/{file_path}"
headers_contents = {
"Authorization": f"token {github_token}",
}
response_contents = requests.get(url_contents, headers=headers_contents)
if response_contents.status_code != 200:
st.error("Failed to download file metadata. Check token and file path.")
return pd.DataFrame(), pd.DataFrame()
json_data = response_contents.json()
# We expect "sha" to be present for the file
if "sha" not in json_data:
st.error("No 'sha' field found in JSON response. File might be missing.")
return pd.DataFrame(), pd.DataFrame()
sha = json_data["sha"]
# 2. Use the “blobs” endpoint to fetch the raw file content
url_blob = f"https://api.github.com/repos/{owner}/{repo}/git/blobs/{sha}"
headers_blob = {
"Authorization": f"token {github_token}",
"Accept": "application/vnd.github.v3.raw", # crucial for large files
}
response_blob = requests.get(url_blob, headers=headers_blob)
if response_blob.status_code != 200:
st.error(f"Failed to fetch raw blob. Status code: {response_blob.status_code}")
return pd.DataFrame(), pd.DataFrame()
# The response body is the raw CSV text
csv_text = response_blob.text
csv_content = StringIO(csv_text)
# 3. Read the CSV into a Pandas DataFrame
df = pd.read_csv(csv_content, encoding='utf-8')
# 4. Rename columns as needed
df = df.rename(columns={
'Price': 'Real Price',
'DNN1': 'Neural Network 1',
'DNN2': 'Neural Network 2',
'DNN3': 'Neural Network 3',
'DNN4': 'Neural Network 4',
'LEAR56': 'Regularized Linear Model 1',
'LEAR84': 'Regularized Linear Model 2',
'LEAR112': 'Regularized Linear Model 3',
'LEAR730': 'Regularized Linear Model 4',
'Persis': 'Persistence Model',
'Hybrid_Ensemble': 'Hybrid Ensemble',
'Weighted_Ensemble': 'Weighted Ensemble'
})
# 5. Parse dates and filter
df['Date'] = pd.to_datetime(df['Date'], dayfirst=True)
df_filtered = df.dropna(subset=['Real Price'])
return df, df_filtered
github_token = st.secrets["GitHub_Token_Margarida"]
if github_token:
df, df_filtered = load_data_predictions(github_token)
else:
st.warning("Please enter your GitHub Personal Access Token to proceed.")
min_date_allowed_pred = df_filtered['Date'].min().date()
max_date_allowed_pred = df_filtered['Date'].max().date()
end_date = df['Date'].max().date()
start_date = end_date - pd.Timedelta(days=7)
models_corr_matrix = ['Real Price', 'Persistence Model', 'Neural Network 1', 'Neural Network 2', 'Neural Network 3',
'Neural Network 4', 'Regularized Linear Model 1',
'Regularized Linear Model 2', 'Regularized Linear Model 3',
'Regularized Linear Model 4', 'Weighted Ensemble']
def conformal_predictions(data):
data['Residuals'] = data['Weighted Ensemble'] - data['Real Price']
data.set_index('Date', inplace=True)
data['Hour'] = data.index.hour
min_date = data.index.min()
for date in data.index.normalize().unique():
if date >= min_date + pd.DateOffset(days=30):
start_date = date - pd.DateOffset(days=30)
end_date = date
calculation_window = data[start_date:end_date-pd.DateOffset(hours=1)]
quantiles = calculation_window.groupby('Hour')['Residuals'].quantile(0.9)
# Use .loc to safely access and modify data
if date in data.index:
current_day_data = data.loc[date.strftime('%Y-%m-%d')]
for hour in current_day_data['Hour'].unique():
if hour in quantiles.index:
hour_quantile = quantiles[hour]
idx = (data.index.normalize() == date) & (data.Hour == hour)
data.loc[idx, 'Quantile_90'] = hour_quantile
data.loc[idx, 'Lower_Interval'] = data.loc[idx, 'Weighted Ensemble'] - hour_quantile
data.loc[idx, 'Upper_Interval'] = data.loc[idx, 'Weighted Ensemble'] + hour_quantile
data.reset_index(inplace=True)
return data
# Main layout of the app
col1, col2 = st.columns([5, 2]) # Adjust the ratio to better fit your layout needs
with col1:
st.title("Belgium: Electricity Price Forecasting")
with col2:
upper_space = col2.empty()
upper_space = col2.empty()
col2_1, col2_2 = st.columns(2) # Create two columns within the right column for side-by-side images
with col2_1:
st.image("KU_Leuven_logo.png", width=100) # Adjust the path and width as needed
with col2_2:
st.image("energyville_logo.png", width=100)
upper_space.markdown("""
""", unsafe_allow_html=True)
# Sidebar for inputs
with st.sidebar:
st.write("### Variables Selection for Graph")
st.write("Select which variables you'd like to include in the graph. This will affect the displayed charts and available data for download.")
selected_variables = st.multiselect("Select variables to display:", options=['Real Price', 'Neural Network 1', 'Neural Network 2', 'Neural Network 3', 'Neural Network 4', 'Regularized Linear Model 1', 'Regularized Linear Model 2','Regularized Linear Model 3', 'Regularized Linear Model 4', 'Weighted Ensemble', 'Persistence Model'], default=['Real Price', 'Weighted Ensemble'])
st.write("### Model Selection for Scatter Plot")
model_selection = st.selectbox("Select which model's predictions to display:", options=['Neural Network 1', 'Neural Network 2', 'Neural Network 3', 'Neural Network 4', 'Regularized Linear Model 1', 'Regularized Linear Model 2','Regularized Linear Model 3', 'Regularized Linear Model 4', 'Weighted Ensemble', 'Persistence Model'], index=8) # Adjust the index as needed to default to your desired option
st.write("### Date Range for Metrics Calculation")
st.write("Select the date range to calculate the metrics for the predictions. This will influence the accuracy metrics displayed below. The complete dataset ranges from 10/03/2024 until today.")
start_date_pred, end_date_pred = st.date_input("Select Date Range for Metrics Calculation:", [min_date_allowed_pred, max_date_allowed_pred])
# Main content
if not selected_variables:
st.warning("Please select at least one variable to display.")
else:
st.write("## Belgian Day-Ahead Electricity Prices")
# Call conformal_predictions if 'Hybrid Ensemble' is selected
if 'Weighted Ensemble' in selected_variables:
df = conformal_predictions(df) # Make sure this function modifies df correctly
temp_df = df[(df['Date'] >= pd.Timestamp(start_date))] # Ensure correct date filtering
# Initialize Plotly figure
fig = go.Figure()
for variable in selected_variables:
fig.add_trace(go.Scatter(x=temp_df['Date'], y=temp_df[variable], mode='lines', name=variable))
# Check if conformal predictions should be added for Hybrid Ensemble
if variable == 'Weighted Ensemble' and 'Quantile_90' in df.columns:
# Add the lower interval trace
fig.add_trace(go.Scatter(
x=temp_df['Date'],
y=temp_df['Lower_Interval'],
mode='lines',
line=dict(width=0),
showlegend=False
))
# Add the upper interval trace and fill to the lower interval
fig.add_trace(go.Scatter(
x=temp_df['Date'],
y=temp_df['Upper_Interval'],
mode='lines',
line=dict(width=0),
fill='tonexty', # Fill between this trace and the previous one
fillcolor='rgba(68, 68, 68, 0.3)',
name='P10/P90 prediction intervals'
))
fig.update_layout(xaxis_title="Date", yaxis_title="Price [EUR/MWh]")
st.plotly_chart(fig, use_container_width=True)
st.write("The graph presented here illustrates the day-ahead electricity price forecasts for Belgium, covering the period from one week ago up to tomorrow. The forecasts are made every morning on day D at 08.00 for day D+1.")
if not selected_variables:
st.warning("Please select at least one variable to display.")
else:
# Plotting
st.write("## Scatter Plot: Real Price vs Model Predictions")
# Filter based on the selected date range for plotting
plot_df = df[(df['Date'] >= pd.Timestamp(min_date_allowed_pred)) & (df['Date'] <= pd.Timestamp(max_date_allowed_pred))]
model_column = model_selection
# Create the scatter plot
fig = go.Figure()
fig.add_trace(go.Scatter(x=plot_df['Real Price'], y=plot_df[model_column], mode='markers', name=f"Real Price vs {model_selection} Predictions"))
m, b = np.polyfit(plot_df['Real Price'], plot_df[model_column], 1)
regression_line = m * plot_df['Real Price'] + b
# Add the line of best fit to the figure with the equation as the legend name
fig.add_trace(go.Scatter(x=plot_df['Real Price'], y=regression_line, mode='lines', line=dict(color='black'), showlegend=False))
# Update layout with appropriate titles
fig.update_layout(
title=f"Scatter Plot of Real Price vs {model_selection} Predictions from {min_date_allowed_pred} to {max_date_allowed_pred}",
xaxis_title="Real Price [EUR/MWh]",
yaxis_title=f"{model_selection} Predictions [EUR/MWh]",
xaxis=dict(range=[-160, 160]), # Setting the x-axis range
yaxis=dict(range=[-150, 150]), # Setting the y-axis range
showlegend=False
)
st.plotly_chart(fig, use_container_width=True)
# Calculating and displaying metrics
if start_date_pred and end_date_pred:
st.header("Accuracy Metrics")
#st.write(f"The accuracy metrics are calculated from {start_date_pred} to {end_date_pred}, this intervale can be changed in the sidebar.")
st.write(f"The accuracy metrics are calculated from **{start_date_pred}** to **{end_date_pred}**. This interval can be changed in the sidebar.")
filtered_df = df_filtered[(df_filtered['Date'] >= pd.Timestamp(start_date_pred)) & (df_filtered['Date'] < (pd.Timestamp(end_date_pred)+ pd.Timedelta(days=1)))]
# List of models for convenience
models = [
'Neural Network 1', 'Neural Network 2', 'Neural Network 3', 'Neural Network 4',
'Regularized Linear Model 1', 'Regularized Linear Model 2', 'Regularized Linear Model 3', 'Regularized Linear Model 4',
'Persistence Model', 'Weighted Ensemble'
]
# Placeholder for results
results = {'Metric': ['MAE', 'rMAE']} #'sMAPE', 'RMSE',
p_real = filtered_df['Real Price']
# Iterate through each model to calculate and store metrics
for model in models:
# Assuming column names in filtered_df match the model names directly for simplicity
p_pred = filtered_df[model]
mae = np.mean(np.abs(p_real - p_pred))
#smape = 100 * np.mean(np.abs(p_real - p_pred) / ((np.abs(p_real) + np.abs(p_pred)) / 2))
#rmse = np.sqrt(np.mean((p_real - p_pred) ** 2))
rmae = mae/np.mean(np.abs(p_real - filtered_df['Persistence Model']))
# Store the results
results[model] = [f"{mae:.2f}", f"{rmae:.2f}"] #f"{smape:.2f}%", f"{rmse:.2f}",
# Convert the results to a DataFrame for display
metrics_df = pd.DataFrame(results)
transposed_metrics_df = metrics_df.set_index('Metric').T
col1, col2 = st.columns([3, 2])
# Display the transposed DataFrame
with col1:
# Assuming 'transposed_metrics_df' is your final DataFrame with metrics
st.dataframe(transposed_metrics_df, hide_index=False)
with col2:
st.markdown("""
<style>
.big-font {
font-size: 20px;
font-weight: 500;
}
</style>
<div class="big-font">
Equations
</div>
""", unsafe_allow_html=True)
# Rendering LaTeX equations
st.markdown(r"""
$\text{MAE} = \frac{1}{n}\sum_{i=1}^{n}|y_i - \hat{y}_i|$
$\text{rMAE} = \frac{\text{MAE}}{MAE_{\text{Persistence Model}}}$
""")
st.write("## Correlation Matrix")
models_df = df_filtered[models_corr_matrix]
corr_matrix = models_df.corr()
fig = go.Figure(data=go.Heatmap(
z=corr_matrix.values,
x=corr_matrix.columns,
y=corr_matrix.index))
fig.update_layout(
yaxis_autorange='reversed' # Ensure the y-axis starts from the top
)
st.plotly_chart(fig, use_container_width=True)
st.write("## Access Predictions")
st.write("All forecasts are provided on an 'AS IS' BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. If you are interested in accessing the predictions made by the models, the models themselves or how these can be incorporated into your workflows, please contact Margarida Mascarenhas (PhD Student, KU Leuven) at margarida.mascarenhas@kuleuven.be or Hussain Kazmi (assistant professor, KU Leuven) at hussain.kazmi@kuleuven.be.")
|