Spaces:
Sleeping
Sleeping
import pandas as pd | |
import streamlit as st | |
import plotly.express as px | |
from utils import get_leaderboard | |
def header() -> None: | |
st.title("EnFoBench - Gas Demand") | |
st.divider() | |
def logos() -> None: | |
left, right = st.columns(2) | |
with left: | |
st.image("./images/ku_leuven_logo.png") | |
with right: | |
st.image("./images/energyville_logo.png") | |
def model_selector(models: list[str]) -> set[str]: | |
# Group models by their prefix | |
model_groups: dict[str, list[str]] = {} | |
for model in models: | |
group, model_name = model.split(".", maxsplit=1) | |
if group not in model_groups: | |
model_groups[group] = [] | |
model_groups[group].append(model_name) | |
models_to_plot = set() | |
st.header("Models to include") | |
left, right = st.columns(2) | |
with left: | |
select_none = st.button("Select None", use_container_width=True) | |
if select_none: | |
for model in models: | |
st.session_state[model] = False | |
with right: | |
select_all = st.button("Select All", use_container_width=True) | |
if select_all: | |
for model in models: | |
st.session_state[model] = True | |
for model_group, models in model_groups.items(): | |
st.text(model_group) | |
for model_name in models: | |
to_plot = st.checkbox( | |
model_name, value=True, key=f"{model_group}.{model_name}" | |
) | |
if to_plot: | |
models_to_plot.add(f"{model_group}.{model_name}") | |
return models_to_plot | |
def overview_view(data): | |
st.markdown( | |
""" | |
[EnFoBench](https://github.com/attila-balint-kul/energy-forecast-benchmark-toolkit) | |
is a community driven benchmarking framework for energy forecasting models. | |
This dashboard presents the results of the gas demand forecasting usecase. All models were cross-validated | |
on **365 days** of day ahead forecasting horizon *(10AM until midnight of the next day)*. | |
""" | |
) | |
st.divider() | |
st.markdown("## Leaderboard") | |
leaderboard = get_leaderboard(data, ["MAE.mean", "RMSE.mean", "rMAE.mean"]) | |
left, middle, right = st.columns(3) | |
with left: | |
best_models_mae = ( | |
leaderboard.sort_values("MAE.mean", ascending=False) | |
.head(10) | |
.sort_values("MAE.mean") | |
) | |
fig = px.bar(best_models_mae, x="MAE.mean", y=best_models_mae.index) | |
fig.update_layout( | |
title="Top 10 models by MAE", xaxis_title="", yaxis_title="Model" | |
) | |
st.plotly_chart(fig, use_container_width=True) | |
with middle: | |
best_models_mae = ( | |
leaderboard.sort_values("RMSE.mean", ascending=False) | |
.head(10) | |
.sort_values("RMSE.mean") | |
) | |
fig = px.bar(best_models_mae, x="RMSE.mean", y=best_models_mae.index) | |
fig.update_layout(title="Top 10 models by RMSE", xaxis_title="", yaxis_title="") | |
st.plotly_chart(fig, use_container_width=True) | |
with right: | |
best_models_mae = ( | |
leaderboard.sort_values("rMAE.mean", ascending=False) | |
.head(10) | |
.sort_values("rMAE.mean") | |
) | |
fig = px.bar(best_models_mae, x="rMAE.mean", y=best_models_mae.index) | |
fig.update_layout(title="Top 10 models by rMAE", xaxis_title="", yaxis_title="") | |
st.plotly_chart(fig, use_container_width=True) | |
st.dataframe(leaderboard, use_container_width=True) | |
def buildings_view(data): | |
buildings = ( | |
data[ | |
[ | |
"unique_id", | |
"metadata.cluster_size", | |
"metadata.building_class", | |
"metadata.location_id", | |
"metadata.timezone", | |
"dataset.available_history.days", | |
] | |
] | |
.groupby("unique_id") | |
.first() | |
.rename( | |
columns={ | |
"metadata.cluster_size": "Cluster size", | |
"metadata.building_class": "Building class", | |
"metadata.location_id": "Location ID", | |
"metadata.timezone": "Timezone", | |
"dataset.available_history.days": "Available history (days)", | |
} | |
) | |
) | |
st.metric("Number of buildings", len(buildings)) | |
st.divider() | |
st.markdown("### Buildings") | |
st.dataframe( | |
buildings, | |
use_container_width=True, | |
column_config={ | |
"Available history (days)": st.column_config.ProgressColumn( | |
"Available history (days)", | |
help="Available training data during the first prediction.", | |
format="%f", | |
min_value=0, | |
max_value=float(buildings["Available history (days)"].max()), | |
), | |
}, | |
) | |
left, right = st.columns(2, gap="large") | |
with left: | |
st.markdown("#### Building classes") | |
fig = px.pie( | |
buildings.groupby("Building class").size().reset_index(), | |
values=0, | |
names="Building class", | |
) | |
st.plotly_chart(fig, use_container_width=True) | |
with right: | |
st.markdown("#### Timezones") | |
fig = px.pie( | |
buildings.groupby("Timezone").size().reset_index(), | |
values=0, | |
names="Timezone", | |
) | |
st.plotly_chart(fig, use_container_width=True) | |
def models_view(data): | |
models = ( | |
data[ | |
[ | |
"model", | |
"cv_config.folds", | |
"cv_config.horizon", | |
"cv_config.step", | |
"cv_config.time", | |
"model_info.repository", | |
"model_info.tag", | |
"model_info.variate_type", | |
] | |
] | |
.groupby("model") | |
.first() | |
.rename( | |
columns={ | |
"cv_config.folds": "CV Folds", | |
"cv_config.horizon": "CV Horizon", | |
"cv_config.step": "CV Step", | |
"cv_config.time": "CV Time", | |
"model_info.repository": "Image Repository", | |
"model_info.tag": "Image Tag", | |
"model_info.variate_type": "Variate type", | |
} | |
) | |
) | |
st.metric("Number of models", len(models)) | |
st.divider() | |
st.markdown("### Models") | |
st.dataframe(models, use_container_width=True) | |
left, right = st.columns(2, gap="large") | |
with left: | |
st.markdown("#### Variate types") | |
fig = px.pie( | |
models.groupby("Variate type").size().reset_index(), | |
values=0, | |
names="Variate type", | |
) | |
st.plotly_chart(fig, use_container_width=True) | |
with right: | |
st.markdown("#### Frameworks") | |
_df = models.copy() | |
_df["Framework"] = _df.index.str.split(".").str[0] | |
fig = px.pie( | |
_df.groupby("Framework").size().reset_index(), | |
values=0, | |
names="Framework", | |
) | |
st.plotly_chart(fig, use_container_width=True) | |
def performance_view(data: pd.DataFrame, models_to_plot: set[str]): | |
data_to_plot = data[data["model"].isin(models_to_plot)].sort_values( | |
by="model", ascending=True | |
) | |
left, right = st.columns(2, gap="small") | |
with left: | |
metric = st.selectbox("Metric", ["MAE", "RMSE", "MBE", "rMAE"], index=0) | |
with right: | |
aggregation = st.selectbox( | |
"Aggregation", ["min", "mean", "median", "max", "std"], index=1 | |
) | |
st.markdown(f"#### {aggregation.capitalize()} {metric} per building") | |
rank_df = ( | |
data_to_plot.groupby(["model"]) | |
.agg("median", numeric_only=True) | |
.sort_values(by=f"{metric}.{aggregation}") | |
.reset_index() | |
.rename_axis("rank") | |
.reset_index()[["rank", "model"]] | |
) | |
fig = px.box( | |
data_to_plot.merge(rank_df, on="model").sort_values(by="rank"), | |
x=f"{metric}.{aggregation}", | |
y="model", | |
color="model", | |
points="all", | |
) | |
fig.update_layout(showlegend=False, height=40 * len(models_to_plot)) | |
st.plotly_chart(fig, use_container_width=True) | |
st.divider() | |
left, right = st.columns(2, gap="large") | |
with left: | |
x_metric = st.selectbox( | |
"Metric", ["MAE", "RMSE", "MBE", "rMAE"], index=0, key="x_metric" | |
) | |
x_aggregation = st.selectbox( | |
"Aggregation", | |
["min", "mean", "median", "max", "std"], | |
index=1, | |
key="x_aggregation", | |
) | |
with right: | |
y_metric = st.selectbox( | |
"Aggregation", ["MAE", "RMSE", "MBE", "rMAE"], index=1, key="y_metric" | |
) | |
y_aggregation = st.selectbox( | |
"Aggregation", | |
["min", "mean", "median", "max", "std"], | |
index=1, | |
key="y_aggregation", | |
) | |
st.markdown( | |
f"#### {x_aggregation.capitalize()} {x_metric} vs {y_aggregation.capitalize()} {y_metric}" | |
) | |
fig = px.scatter( | |
data_to_plot, | |
x=f"{x_metric}.{x_aggregation}", | |
y=f"{y_metric}.{y_aggregation}", | |
color="model", | |
) | |
fig.update_layout(height=600) | |
st.plotly_chart(fig, use_container_width=True) | |
st.divider() | |
left, right = st.columns(2, gap="small") | |
with left: | |
metric = st.selectbox( | |
"Metric", ["MAE", "RMSE", "MBE", "rMAE"], index=0, key="table_metric" | |
) | |
with right: | |
aggregation = st.selectbox( | |
"Aggregation across folds", | |
["min", "mean", "median", "max", "std"], | |
index=1, | |
key="table_aggregation", | |
) | |
metrics_table = data_to_plot.groupby(["model"]).agg( | |
aggregation, numeric_only=True | |
)[ | |
[ | |
f"{metric}.min", | |
f"{metric}.mean", | |
f"{metric}.median", | |
f"{metric}.max", | |
f"{metric}.std", | |
] | |
] | |
def custom_table(styler): | |
styler.background_gradient(cmap="seismic", axis=0) | |
styler.format(precision=2) | |
# center text and increase font size | |
styler.map(lambda x: "text-align: center; font-size: 14px;") | |
return styler | |
st.markdown(f"#### {aggregation.capitalize()} {metric} stats per model") | |
styled_table = metrics_table.style.pipe(custom_table) | |
st.dataframe(styled_table, use_container_width=True) | |
metrics_per_building_table = ( | |
data_to_plot.groupby(["model", "unique_id"]) | |
.apply(aggregation, numeric_only=True) | |
.reset_index() | |
.pivot(index="model", columns="unique_id", values=f"{metric}.{aggregation}") | |
) | |
metrics_per_building_table.insert( | |
0, "median", metrics_per_building_table.median(axis=1) | |
) | |
metrics_per_building_table.insert( | |
0, "mean", metrics_per_building_table.mean(axis=1) | |
) | |
metrics_per_building_table = metrics_per_building_table.sort_values(by="mean") | |
def custom_table(styler): | |
styler.background_gradient(cmap="seismic", axis=None) | |
styler.format(precision=2) | |
# center text and increase font size | |
styler.map(lambda x: "text-align: center; font-size: 14px;") | |
return styler | |
st.markdown(f"#### {aggregation.capitalize()} {metric} stats per building") | |
styled_table = metrics_per_building_table.style.pipe(custom_table) | |
st.dataframe(styled_table, use_container_width=True) | |
def computation_view(data, models_to_plot: set[str]): | |
data_to_plot = data[data["model"].isin(models_to_plot)].sort_values( | |
by="model", ascending=True | |
) | |
st.markdown("#### Computational Resources") | |
fig = px.parallel_coordinates( | |
data_to_plot.groupby("model").mean(numeric_only=True).reset_index(), | |
dimensions=[ | |
"model", | |
"resource_usage.CPU", | |
"resource_usage.memory", | |
"MAE.mean", | |
"RMSE.mean", | |
"MBE.mean", | |
"rMAE.mean", | |
], | |
color="rMAE.mean", | |
color_continuous_scale=px.colors.diverging.Portland, | |
) | |
st.plotly_chart(fig, use_container_width=True) | |
st.divider() | |
left, center, right = st.columns(3, gap="small") | |
with left: | |
metric = st.selectbox("Metric", ["MAE", "RMSE", "MBE", "rMAE"], index=0) | |
with center: | |
aggregation_per_building = st.selectbox( | |
"Aggregation per building", ["min", "mean", "median", "max", "std"], index=1 | |
) | |
with right: | |
aggregation_per_model = st.selectbox( | |
"Aggregation per model", ["min", "mean", "median", "max", "std"], index=1 | |
) | |
st.markdown( | |
f"#### {aggregation_per_model.capitalize()} {aggregation_per_building.capitalize()} {metric} vs CPU usage" | |
) | |
aggregated_data = ( | |
data_to_plot.groupby("model") | |
.agg(aggregation_per_building, numeric_only=True) | |
.reset_index() | |
) | |
fig = px.scatter( | |
aggregated_data, | |
x="resource_usage.CPU", | |
y=f"{metric}.{aggregation_per_model}", | |
color="model", | |
log_x=True, | |
) | |
fig.update_layout(height=600) | |
st.plotly_chart(fig, use_container_width=True) | |