Spaces:

EDS-lab
/

EnFoBench-GasDemand

Sleeping

File size: 13,074 Bytes

f7b117b

import pandas as pd
import streamlit as st
import plotly.express as px

from utils import get_leaderboard


def header() -> None:
    st.title("EnFoBench - Gas Demand")
    st.divider()


def logos() -> None:
    left, right = st.columns(2)
    with left:
        st.image("./images/ku_leuven_logo.png")
    with right:
        st.image("./images/energyville_logo.png")


def model_selector(models: list[str]) -> set[str]:
    # Group models by their prefix
    model_groups: dict[str, list[str]] = {}
    for model in models:
        group, model_name = model.split(".", maxsplit=1)
        if group not in model_groups:
            model_groups[group] = []
        model_groups[group].append(model_name)

    models_to_plot = set()

    st.header("Models to include")
    left, right = st.columns(2)
    with left:
        select_none = st.button("Select None", use_container_width=True)
        if select_none:
            for model in models:
                st.session_state[model] = False
    with right:
        select_all = st.button("Select All", use_container_width=True)
        if select_all:
            for model in models:
                st.session_state[model] = True

    for model_group, models in model_groups.items():
        st.text(model_group)
        for model_name in models:
            to_plot = st.checkbox(
                model_name, value=True, key=f"{model_group}.{model_name}"
            )
            if to_plot:
                models_to_plot.add(f"{model_group}.{model_name}")
    return models_to_plot


def overview_view(data):
    st.markdown(
        """
    [EnFoBench](https://github.com/attila-balint-kul/energy-forecast-benchmark-toolkit) 
    is a community driven benchmarking framework for energy forecasting models. 
    
    This dashboard presents the results of the gas demand forecasting usecase. All models were cross-validated
    on **365 days** of day ahead forecasting horizon *(10AM until midnight of the next day)*.
    """
    )

    st.divider()
    st.markdown("## Leaderboard")

    leaderboard = get_leaderboard(data, ["MAE.mean", "RMSE.mean", "rMAE.mean"])

    left, middle, right = st.columns(3)
    with left:
        best_models_mae = (
            leaderboard.sort_values("MAE.mean", ascending=False)
            .head(10)
            .sort_values("MAE.mean")
        )
        fig = px.bar(best_models_mae, x="MAE.mean", y=best_models_mae.index)
        fig.update_layout(
            title="Top 10 models by MAE", xaxis_title="", yaxis_title="Model"
        )
        st.plotly_chart(fig, use_container_width=True)

    with middle:
        best_models_mae = (
            leaderboard.sort_values("RMSE.mean", ascending=False)
            .head(10)
            .sort_values("RMSE.mean")
        )
        fig = px.bar(best_models_mae, x="RMSE.mean", y=best_models_mae.index)
        fig.update_layout(title="Top 10 models by RMSE", xaxis_title="", yaxis_title="")
        st.plotly_chart(fig, use_container_width=True)

    with right:
        best_models_mae = (
            leaderboard.sort_values("rMAE.mean", ascending=False)
            .head(10)
            .sort_values("rMAE.mean")
        )
        fig = px.bar(best_models_mae, x="rMAE.mean", y=best_models_mae.index)
        fig.update_layout(title="Top 10 models by rMAE", xaxis_title="", yaxis_title="")
        st.plotly_chart(fig, use_container_width=True)

    st.dataframe(leaderboard, use_container_width=True)


def buildings_view(data):
    buildings = (
        data[
            [
                "unique_id",
                "metadata.cluster_size",
                "metadata.building_class",
                "metadata.location_id",
                "metadata.timezone",
                "dataset.available_history.days",
            ]
        ]
        .groupby("unique_id")
        .first()
        .rename(
            columns={
                "metadata.cluster_size": "Cluster size",
                "metadata.building_class": "Building class",
                "metadata.location_id": "Location ID",
                "metadata.timezone": "Timezone",
                "dataset.available_history.days": "Available history (days)",
            }
        )
    )

    st.metric("Number of buildings", len(buildings))
    st.divider()

    st.markdown("### Buildings")
    st.dataframe(
        buildings,
        use_container_width=True,
        column_config={
            "Available history (days)": st.column_config.ProgressColumn(
                "Available history (days)",
                help="Available training data during the first prediction.",
                format="%f",
                min_value=0,
                max_value=float(buildings["Available history (days)"].max()),
            ),
        },
    )

    left, right = st.columns(2, gap="large")
    with left:
        st.markdown("#### Building classes")
        fig = px.pie(
            buildings.groupby("Building class").size().reset_index(),
            values=0,
            names="Building class",
        )
        st.plotly_chart(fig, use_container_width=True)

    with right:
        st.markdown("#### Timezones")
        fig = px.pie(
            buildings.groupby("Timezone").size().reset_index(),
            values=0,
            names="Timezone",
        )
        st.plotly_chart(fig, use_container_width=True)


def models_view(data):
    models = (
        data[
            [
                "model",
                "cv_config.folds",
                "cv_config.horizon",
                "cv_config.step",
                "cv_config.time",
                "model_info.repository",
                "model_info.tag",
                "model_info.variate_type",
            ]
        ]
        .groupby("model")
        .first()
        .rename(
            columns={
                "cv_config.folds": "CV Folds",
                "cv_config.horizon": "CV Horizon",
                "cv_config.step": "CV Step",
                "cv_config.time": "CV Time",
                "model_info.repository": "Image Repository",
                "model_info.tag": "Image Tag",
                "model_info.variate_type": "Variate type",
            }
        )
    )

    st.metric("Number of models", len(models))
    st.divider()

    st.markdown("### Models")
    st.dataframe(models, use_container_width=True)

    left, right = st.columns(2, gap="large")
    with left:
        st.markdown("#### Variate types")
        fig = px.pie(
            models.groupby("Variate type").size().reset_index(),
            values=0,
            names="Variate type",
        )
        st.plotly_chart(fig, use_container_width=True)

    with right:
        st.markdown("#### Frameworks")
        _df = models.copy()
        _df["Framework"] = _df.index.str.split(".").str[0]
        fig = px.pie(
            _df.groupby("Framework").size().reset_index(),
            values=0,
            names="Framework",
        )
        st.plotly_chart(fig, use_container_width=True)


def performance_view(data: pd.DataFrame, models_to_plot: set[str]):
    data_to_plot = data[data["model"].isin(models_to_plot)].sort_values(
        by="model", ascending=True
    )

    left, right = st.columns(2, gap="small")
    with left:
        metric = st.selectbox("Metric", ["MAE", "RMSE", "MBE", "rMAE"], index=0)
    with right:
        aggregation = st.selectbox(
            "Aggregation", ["min", "mean", "median", "max", "std"], index=1
        )
    st.markdown(f"#### {aggregation.capitalize()} {metric} per building")

    rank_df = (
        data_to_plot.groupby(["model"])
        .agg("median", numeric_only=True)
        .sort_values(by=f"{metric}.{aggregation}")
        .reset_index()
        .rename_axis("rank")
        .reset_index()[["rank", "model"]]
    )

    fig = px.box(
        data_to_plot.merge(rank_df, on="model").sort_values(by="rank"),
        x=f"{metric}.{aggregation}",
        y="model",
        color="model",
        points="all",
    )
    fig.update_layout(showlegend=False, height=40 * len(models_to_plot))
    st.plotly_chart(fig, use_container_width=True)

    st.divider()

    left, right = st.columns(2, gap="large")
    with left:
        x_metric = st.selectbox(
            "Metric", ["MAE", "RMSE", "MBE", "rMAE"], index=0, key="x_metric"
        )
        x_aggregation = st.selectbox(
            "Aggregation",
            ["min", "mean", "median", "max", "std"],
            index=1,
            key="x_aggregation",
        )
    with right:
        y_metric = st.selectbox(
            "Aggregation", ["MAE", "RMSE", "MBE", "rMAE"], index=1, key="y_metric"
        )
        y_aggregation = st.selectbox(
            "Aggregation",
            ["min", "mean", "median", "max", "std"],
            index=1,
            key="y_aggregation",
        )

    st.markdown(
        f"#### {x_aggregation.capitalize()} {x_metric} vs {y_aggregation.capitalize()} {y_metric}"
    )
    fig = px.scatter(
        data_to_plot,
        x=f"{x_metric}.{x_aggregation}",
        y=f"{y_metric}.{y_aggregation}",
        color="model",
    )
    fig.update_layout(height=600)
    st.plotly_chart(fig, use_container_width=True)

    st.divider()

    left, right = st.columns(2, gap="small")
    with left:
        metric = st.selectbox(
            "Metric", ["MAE", "RMSE", "MBE", "rMAE"], index=0, key="table_metric"
        )
    with right:
        aggregation = st.selectbox(
            "Aggregation across folds",
            ["min", "mean", "median", "max", "std"],
            index=1,
            key="table_aggregation",
        )

    metrics_table = data_to_plot.groupby(["model"]).agg(
        aggregation, numeric_only=True
    )[
        [
            f"{metric}.min",
            f"{metric}.mean",
            f"{metric}.median",
            f"{metric}.max",
            f"{metric}.std",
        ]
    ]

    def custom_table(styler):
        styler.background_gradient(cmap="seismic", axis=0)
        styler.format(precision=2)

        # center text and increase font size
        styler.map(lambda x: "text-align: center; font-size: 14px;")
        return styler

    st.markdown(f"#### {aggregation.capitalize()} {metric} stats per model")
    styled_table = metrics_table.style.pipe(custom_table)
    st.dataframe(styled_table, use_container_width=True)

    metrics_per_building_table = (
        data_to_plot.groupby(["model", "unique_id"])
        .apply(aggregation, numeric_only=True)
        .reset_index()
        .pivot(index="model", columns="unique_id", values=f"{metric}.{aggregation}")
    )
    metrics_per_building_table.insert(
        0, "median", metrics_per_building_table.median(axis=1)
    )
    metrics_per_building_table.insert(
        0, "mean", metrics_per_building_table.mean(axis=1)
    )
    metrics_per_building_table = metrics_per_building_table.sort_values(by="mean")

    def custom_table(styler):
        styler.background_gradient(cmap="seismic", axis=None)
        styler.format(precision=2)

        # center text and increase font size
        styler.map(lambda x: "text-align: center; font-size: 14px;")
        return styler

    st.markdown(f"#### {aggregation.capitalize()} {metric} stats per building")
    styled_table = metrics_per_building_table.style.pipe(custom_table)
    st.dataframe(styled_table, use_container_width=True)


def computation_view(data, models_to_plot: set[str]):
    data_to_plot = data[data["model"].isin(models_to_plot)].sort_values(
        by="model", ascending=True
    )

    st.markdown("#### Computational Resources")
    fig = px.parallel_coordinates(
        data_to_plot.groupby("model").mean(numeric_only=True).reset_index(),
        dimensions=[
            "model",
            "resource_usage.CPU",
            "resource_usage.memory",
            "MAE.mean",
            "RMSE.mean",
            "MBE.mean",
            "rMAE.mean",
        ],
        color="rMAE.mean",
        color_continuous_scale=px.colors.diverging.Portland,
    )
    st.plotly_chart(fig, use_container_width=True)

    st.divider()

    left, center, right = st.columns(3, gap="small")
    with left:
        metric = st.selectbox("Metric", ["MAE", "RMSE", "MBE", "rMAE"], index=0)
    with center:
        aggregation_per_building = st.selectbox(
            "Aggregation per building", ["min", "mean", "median", "max", "std"], index=1
        )
    with right:
        aggregation_per_model = st.selectbox(
            "Aggregation per model", ["min", "mean", "median", "max", "std"], index=1
        )

    st.markdown(
        f"#### {aggregation_per_model.capitalize()} {aggregation_per_building.capitalize()} {metric} vs CPU usage"
    )
    aggregated_data = (
        data_to_plot.groupby("model")
        .agg(aggregation_per_building, numeric_only=True)
        .reset_index()
    )
    fig = px.scatter(
        aggregated_data,
        x="resource_usage.CPU",
        y=f"{metric}.{aggregation_per_model}",
        color="model",
        log_x=True,
    )
    fig.update_layout(height=600)
    st.plotly_chart(fig, use_container_width=True)