hbg-weather / infer.py
Robzy's picture
time series for gradio
43106f9
raw
history blame
3.09 kB
import datetime
import pandas as pd
from xgboost import XGBRegressor
import hopsworks
import json
from functions import util
import os
# Set up
api_key = os.getenv('HOPSWORKS_API_KEY')
project_name = os.getenv('HOPSWORKS_PROJECT')
project = hopsworks.login(project=project_name, api_key_value=api_key)
fs = project.get_feature_store()
secrets = util.secrets_api(project.name)
location_str = secrets.get_secret("SENSOR_LOCATION_JSON").value
location = json.loads(location_str)
country=location['country']
city=location['city']
street=location['street']
AQI_API_KEY = secrets.get_secret("AQI_API_KEY").value
location_str = secrets.get_secret("SENSOR_LOCATION_JSON").value
location = json.loads(location_str)
today = datetime.datetime.now() - datetime.timedelta(0)
feature_view = fs.get_feature_view(
name='air_quality_fv',
version=1,
)
### Retreive model
mr = project.get_model_registry()
retrieved_model = mr.get_model(
name="air_quality_xgboost_model",
version=1,
)
saved_model_dir = retrieved_model.download()
retrieved_xgboost_model = XGBRegressor()
retrieved_xgboost_model.load_model(saved_model_dir + "/model.json")
### Retrieve features
weather_fg = fs.get_feature_group(
name='weather',
version=1,
)
today_timestamp = pd.to_datetime(today)
batch_data = weather_fg.filter(weather_fg.date >= today_timestamp ).read()
### Predict and upload
batch_data['predicted_pm25'] = retrieved_xgboost_model.predict(
batch_data[['temperature_2m_mean', 'precipitation_sum', 'wind_speed_10m_max', 'wind_direction_10m_dominant']])
batch_data['street'] = street
batch_data['city'] = city
batch_data['country'] = country
# Fill in the number of days before the date on which you made the forecast (base_date)
batch_data['days_before_forecast_day'] = range(1, len(batch_data)+1)
batch_data = batch_data.sort_values(by=['date'])
#batch_data['date'] = batch_data['date'].dt.tz_convert(None).astype('datetime64[ns]')
plt = util.plot_air_quality_forecast(city, street, batch_data, file_path="./img/pm25_forecast.png")
monitor_fg = fs.get_or_create_feature_group(
name='aq_predictions',
description='Air Quality prediction monitoring',
version=1,
primary_key=['city','street','date','days_before_forecast_day'],
event_time="date"
)
print(f"Batch data: {batch_data}")
monitor_fg.insert(batch_data, write_options={"wait_for_job": True})
monitoring_df = monitor_fg.filter(monitor_fg.days_before_forecast_day == 1).read()
# Hindcast monitoring
air_quality_fg = fs.get_feature_group(
name='air_quality',
version=1,
)
air_quality_df = air_quality_fg.read()
outcome_df = air_quality_df[['date', 'pm25']]
preds_df = monitoring_df[['date', 'predicted_pm25']]
hindcast_df = pd.merge(preds_df, outcome_df, on="date")
hindcast_df = hindcast_df.sort_values(by=['date'])
if len(hindcast_df) == 0:
hindcast_df = util.backfill_predictions_for_monitoring(weather_fg, air_quality_df, monitor_fg, retrieved_xgboost_model)
plt = util.plot_air_quality_forecast(city, street, hindcast_df, file_path="./img/pm25_hindcast_1day.png", hindcast=True)