Spaces:

EDS-lab
/

DAM-price-forecast

Running

File size: 13,610 Bytes

e67fcfa
 
 
 
01f14f6
 
33fccb4
e67fcfa
01f14f6
7fe8c1e
 
 
 
 
 
 
66f645e
7fe8c1e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66f645e
 
 
 
 
 
 
 
 
 
7896fa0
 
7fe8c1e
 
 
 
 
 
 
deb692e
66f645e
01f14f6
9e2e619
01f14f6
 
 
 
c58f85c
01f14f6
e67fcfa
 
 
 
 
 
f03d4c1
7097d3e
 
7896fa0
c01e48a
 
7896fa0
c01e48a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7896fa0
 
c01e48a
 
 
 
 
 
bab8a32
c01e48a
 
 
 
 
 
 
263de36
c01e48a
263de36
c01e48a
 
 
 
 
7097d3e
e67fcfa
 
 
 
 
7896fa0
2c9db99
7896fa0
2c9db99
e67fcfa
 
 
 
 
 
 
 
 
c01e48a
 
7896fa0
c01e48a
 
 
 
 
e67fcfa
 
 
2c9db99
e67fcfa
c01e48a
7896fa0
c01e48a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f03d4c1
c01e48a
 
e67fcfa
 
f03d4c1
3b1c0d9
 
 
 
 
 
 
 
 
 
 
 
 
 
2c9db99
3b1c0d9
2c9db99
3b1c0d9
f03d4c1
3b1c0d9
 
2cc779d
3b1c0d9
 
c01e48a
 
 
 
 
2cc779d
 
c01e48a
3b1c0d9
e67fcfa
 
 
 
 
2c9db99
9d2161a
f535f34
2c9db99
 
5c85bea
 
7896fa0
2c9db99
 
 
f03d4c1
2c9db99
 
 
 
 
 
 
 
 
f03d4c1
 
2c9db99
 
 
f03d4c1
2c9db99
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3b1c0d9
 
7097d3e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3b1c0d9
9e69184

import streamlit as st
import pandas as pd
import numpy as np
import plotly.graph_objs as go
import requests
from io import StringIO
import base64

def load_data_predictions(github_token):
    """
    Fetch Predictions.csv from the GitHub 'Forecast_DAM_V2' repository 
    via the blob SHA. This works for files larger than 1 MB.
    """
    owner = "mmmapms"
    repo = "Forecast_DAM_V2"
    file_path = "Predictions.csv"

    # 1. Get file metadata (including SHA) from the “contents” endpoint
    url_contents = f"https://api.github.com/repos/{owner}/{repo}/contents/{file_path}"
    headers_contents = {
        "Authorization": f"token {github_token}",
    }
    response_contents = requests.get(url_contents, headers=headers_contents)

    if response_contents.status_code != 200:
        st.error("Failed to download file metadata. Check token and file path.")
        return pd.DataFrame(), pd.DataFrame()

    json_data = response_contents.json()
    # We expect "sha" to be present for the file
    if "sha" not in json_data:
        st.error("No 'sha' field found in JSON response. File might be missing.")
        return pd.DataFrame(), pd.DataFrame()

    sha = json_data["sha"]

    # 2. Use the “blobs” endpoint to fetch the raw file content
    url_blob = f"https://api.github.com/repos/{owner}/{repo}/git/blobs/{sha}"
    headers_blob = {
        "Authorization": f"token {github_token}",
        "Accept": "application/vnd.github.v3.raw",  # crucial for large files
    }
    response_blob = requests.get(url_blob, headers=headers_blob)

    if response_blob.status_code != 200:
        st.error(f"Failed to fetch raw blob. Status code: {response_blob.status_code}")
        return pd.DataFrame(), pd.DataFrame()

    # The response body is the raw CSV text
    csv_text = response_blob.text
    csv_content = StringIO(csv_text)

    # 3. Read the CSV into a Pandas DataFrame
    df = pd.read_csv(csv_content, encoding='utf-8')

    # 4. Rename columns as needed
    df = df.rename(columns={
        'Price': 'Real Price',
        'DNN1': 'Neural Network 1',
        'DNN2': 'Neural Network 2',
        'DNN3': 'Neural Network 3',
        'DNN4': 'Neural Network 4',
        'LEAR56': 'Regularized Linear Model 1',
        'LEAR84': 'Regularized Linear Model 2',
        'LEAR112': 'Regularized Linear Model 3',
        'LEAR730': 'Regularized Linear Model 4',
        'Persis': 'Persistence Model',
        'Hybrid_Ensemble': 'Hybrid Ensemble',
        'Weighted_Ensemble': 'Weighted Ensemble'
    })

    # 5. Parse dates and filter
    df['Date'] = pd.to_datetime(df['Date'], dayfirst=True)
    df_filtered = df.dropna(subset=['Real Price'])

    return df, df_filtered


github_token = st.secrets["GitHub_Token_Margarida"]

if github_token:
    df, df_filtered = load_data_predictions(github_token)
else:
    st.warning("Please enter your GitHub Personal Access Token to proceed.")


min_date_allowed_pred = df_filtered['Date'].min().date()
max_date_allowed_pred = df_filtered['Date'].max().date()

end_date = df['Date'].max().date()
start_date = end_date - pd.Timedelta(days=7)

models_corr_matrix = ['Real Price', 'Persistence Model', 'Neural Network 1', 'Neural Network 2', 'Neural Network 3', 
               'Neural Network 4', 'Regularized Linear Model 1', 
               'Regularized Linear Model 2', 'Regularized Linear Model 3', 
               'Regularized Linear Model 4', 'Weighted Ensemble']

def conformal_predictions(data):
    data['Residuals'] = data['Weighted Ensemble'] - data['Real Price']
    data.set_index('Date', inplace=True)
    data['Hour'] = data.index.hour
    min_date = data.index.min()
    for date in data.index.normalize().unique():
        if date >= min_date + pd.DateOffset(days=30):
            start_date = date - pd.DateOffset(days=30)
            end_date = date
            calculation_window = data[start_date:end_date-pd.DateOffset(hours=1)]
            quantiles = calculation_window.groupby('Hour')['Residuals'].quantile(0.9)
            # Use .loc to safely access and modify data
            if date in data.index:
                current_day_data = data.loc[date.strftime('%Y-%m-%d')]
                for hour in current_day_data['Hour'].unique():
                    if hour in quantiles.index:
                        hour_quantile = quantiles[hour]
                        idx = (data.index.normalize() == date) & (data.Hour == hour)
                        data.loc[idx, 'Quantile_90'] = hour_quantile
                        data.loc[idx, 'Lower_Interval'] = data.loc[idx, 'Weighted Ensemble'] - hour_quantile
                        data.loc[idx, 'Upper_Interval'] = data.loc[idx, 'Weighted Ensemble'] + hour_quantile
    data.reset_index(inplace=True)
    return data

# Main layout of the app
col1, col2 = st.columns([5, 2])  # Adjust the ratio to better fit your layout needs
with col1:

    st.title("Belgium: Electricity Price Forecasting")

with col2:
    upper_space = col2.empty()
    upper_space = col2.empty()
    col2_1, col2_2 = st.columns(2)  # Create two columns within the right column for side-by-side images
    with col2_1:
        st.image("KU_Leuven_logo.png", width=100)   # Adjust the path and width as needed
    with col2_2:
        st.image("energyville_logo.png", width=100) 

upper_space.markdown("""
&nbsp;  
&nbsp;  
""", unsafe_allow_html=True)


# Sidebar for inputs
with st.sidebar:
    st.write("### Variables Selection for Graph")
    st.write("Select which variables you'd like to include in the graph. This will affect the displayed charts and available data for download.")
    selected_variables = st.multiselect("Select variables to display:", options=['Real Price', 'Neural Network 1', 'Neural Network 2', 'Neural Network 3', 'Neural Network 4', 'Regularized Linear Model 1', 'Regularized Linear Model 2','Regularized Linear Model 3', 'Regularized Linear Model 4', 'Weighted Ensemble', 'Persistence Model'], default=['Real Price', 'Weighted Ensemble'])
    st.write("### Model Selection for Scatter Plot")
    model_selection = st.selectbox("Select which model's predictions to display:", options=['Neural Network 1', 'Neural Network 2', 'Neural Network 3', 'Neural Network 4', 'Regularized Linear Model 1', 'Regularized Linear Model 2','Regularized Linear Model 3', 'Regularized Linear Model 4', 'Weighted Ensemble', 'Persistence Model'], index=8) # Adjust the index as needed to default to your desired option

    st.write("### Date Range for Metrics Calculation")
    st.write("Select the date range to calculate the metrics for the predictions. This will influence the accuracy metrics displayed below. The complete dataset ranges from 10/03/2024 until today.")
    start_date_pred, end_date_pred = st.date_input("Select Date Range for Metrics Calculation:", [min_date_allowed_pred, max_date_allowed_pred])

# Main content
if not selected_variables:
    st.warning("Please select at least one variable to display.")
else:
    st.write("## Belgian Day-Ahead Electricity Prices")

    # Call conformal_predictions if 'Hybrid Ensemble' is selected
    if 'Weighted Ensemble' in selected_variables:
        df = conformal_predictions(df)  # Make sure this function modifies df correctly

    temp_df = df[(df['Date'] >= pd.Timestamp(start_date))]  # Ensure correct date filtering

    # Initialize Plotly figure
    fig = go.Figure()

    for variable in selected_variables:
        fig.add_trace(go.Scatter(x=temp_df['Date'], y=temp_df[variable], mode='lines', name=variable))

        # Check if conformal predictions should be added for Hybrid Ensemble
        if variable == 'Weighted Ensemble' and 'Quantile_90' in df.columns:
            # Add the lower interval trace
            fig.add_trace(go.Scatter(
                x=temp_df['Date'],
                y=temp_df['Lower_Interval'],
                mode='lines',
                line=dict(width=0),
                showlegend=False
            ))

            # Add the upper interval trace and fill to the lower interval
            fig.add_trace(go.Scatter(
                x=temp_df['Date'],
                y=temp_df['Upper_Interval'],
                mode='lines',
                line=dict(width=0),
                fill='tonexty',  # Fill between this trace and the previous one
                fillcolor='rgba(68, 68, 68, 0.3)',
                name='P10/P90 prediction intervals'
            ))

    fig.update_layout(xaxis_title="Date", yaxis_title="Price [EUR/MWh]")
    st.plotly_chart(fig, use_container_width=True)
    st.write("The graph presented here illustrates the day-ahead electricity price forecasts for Belgium, covering the period from one week ago up to tomorrow. The forecasts are made every morning on day D at 08.00 for day D+1.")


if not selected_variables:
    st.warning("Please select at least one variable to display.")
else:
    # Plotting
    st.write("## Scatter Plot: Real Price vs Model Predictions")
    # Filter based on the selected date range for plotting
    plot_df = df[(df['Date'] >= pd.Timestamp(min_date_allowed_pred)) & (df['Date'] <= pd.Timestamp(max_date_allowed_pred))]
    
    model_column = model_selection

    # Create the scatter plot
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=plot_df['Real Price'], y=plot_df[model_column], mode='markers', name=f"Real Price vs {model_selection} Predictions"))

    m, b = np.polyfit(plot_df['Real Price'], plot_df[model_column], 1)

    regression_line = m * plot_df['Real Price'] + b

    # Add the line of best fit to the figure with the equation as the legend name
    fig.add_trace(go.Scatter(x=plot_df['Real Price'], y=regression_line, mode='lines', line=dict(color='black'), showlegend=False))

    # Update layout with appropriate titles
    fig.update_layout(
        title=f"Scatter Plot of Real Price vs {model_selection} Predictions from {min_date_allowed_pred} to {max_date_allowed_pred}",
        xaxis_title="Real Price [EUR/MWh]",
        yaxis_title=f"{model_selection} Predictions [EUR/MWh]",
        xaxis=dict(range=[-160, 160]),  # Setting the x-axis range
        yaxis=dict(range=[-150, 150]),   # Setting the y-axis range
        showlegend=False
    )
    st.plotly_chart(fig, use_container_width=True)


# Calculating and displaying metrics
if start_date_pred and end_date_pred:
    st.header("Accuracy Metrics")
    #st.write(f"The accuracy metrics are calculated from {start_date_pred} to {end_date_pred}, this intervale can be changed in the sidebar.")
    st.write(f"The accuracy metrics are calculated from **{start_date_pred}** to **{end_date_pred}**. This interval can be changed in the sidebar.")
    filtered_df = df_filtered[(df_filtered['Date'] >= pd.Timestamp(start_date_pred)) & (df_filtered['Date'] < (pd.Timestamp(end_date_pred)+ pd.Timedelta(days=1)))]
    # List of models for convenience
    models = [
        'Neural Network 1', 'Neural Network 2', 'Neural Network 3', 'Neural Network 4',
        'Regularized Linear Model 1', 'Regularized Linear Model 2', 'Regularized Linear Model 3', 'Regularized Linear Model 4',
        'Persistence Model', 'Weighted Ensemble'
    ]
    
    # Placeholder for results
    results = {'Metric': ['MAE', 'rMAE']} #'sMAPE', 'RMSE',
    
    p_real = filtered_df['Real Price']
    
    # Iterate through each model to calculate and store metrics
    for model in models:
        # Assuming column names in filtered_df match the model names directly for simplicity
        p_pred = filtered_df[model]
        
        mae = np.mean(np.abs(p_real - p_pred))
        #smape = 100 * np.mean(np.abs(p_real - p_pred) / ((np.abs(p_real) + np.abs(p_pred)) / 2))
        #rmse = np.sqrt(np.mean((p_real - p_pred) ** 2))
        rmae = mae/np.mean(np.abs(p_real - filtered_df['Persistence Model']))

        # Store the results
        results[model] = [f"{mae:.2f}", f"{rmae:.2f}"] #f"{smape:.2f}%", f"{rmse:.2f}",
    
    # Convert the results to a DataFrame for display
    metrics_df = pd.DataFrame(results)
    
    transposed_metrics_df = metrics_df.set_index('Metric').T
    col1, col2 = st.columns([3, 2])

    # Display the transposed DataFrame
    with col1:
        # Assuming 'transposed_metrics_df' is your final DataFrame with metrics
        st.dataframe(transposed_metrics_df,  hide_index=False)

with col2:
    st.markdown("""
        <style>
        .big-font {
            font-size: 20px;
            font-weight: 500;
        }
        </style>
        <div class="big-font">
        Equations
        </div>
        """, unsafe_allow_html=True)
    
    # Rendering LaTeX equations
    st.markdown(r"""
    $\text{MAE} = \frac{1}{n}\sum_{i=1}^{n}|y_i - \hat{y}_i|$
    
                
    $\text{rMAE} = \frac{\text{MAE}}{MAE_{\text{Persistence Model}}}$
    """)


st.write("## Correlation Matrix")


models_df = df_filtered[models_corr_matrix]
corr_matrix = models_df.corr()

fig = go.Figure(data=go.Heatmap(
    z=corr_matrix.values,
    x=corr_matrix.columns,
    y=corr_matrix.index))
fig.update_layout(
    yaxis_autorange='reversed'  # Ensure the y-axis starts from the top
)
st.plotly_chart(fig, use_container_width=True)

st.write("## Access Predictions")
st.write("All forecasts are provided on an 'AS IS' BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. If you are interested in accessing the predictions made by the models, the models themselves or how these can be incorporated into your workflows, please contact Margarida Mascarenhas (PhD Student, KU Leuven) at margarida.mascarenhas@kuleuven.be or Hussain Kazmi (assistant professor, KU Leuven) at hussain.kazmi@kuleuven.be.")