Spaces:
Running
Running
st
Browse files- README.md +8 -2
- app_streamlit.py +2 -2
- merge_df.py +39 -15
README.md
CHANGED
@@ -12,7 +12,7 @@ short_description: Air quality forecasting for Lahore, Pakistan!
|
|
12 |
|
13 |
# Air Quality Monitoring for Lahore, Pakistan
|
14 |
|
15 |
-
### Dashboard link
|
16 |
|
17 |
# Architecture & pipeline
|
18 |
|
@@ -53,4 +53,10 @@ Weather data features:
|
|
53 |
|
54 |
HuggingFace's Streamlit Spaces is used to display the hindcast, forecast, and real air quality using an interactive line graph. GitHub Actions is used to call the feature and inference pipeline daily by levraging schedulin.
|
55 |
|
56 |
-
Note that backfilling, feature group creation and model training is only performed once
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
|
13 |
# Air Quality Monitoring for Lahore, Pakistan
|
14 |
|
15 |
+
### [Dashboard link](https://huggingface.co/spaces/Robzy/hbg-weather)
|
16 |
|
17 |
# Architecture & pipeline
|
18 |
|
|
|
53 |
|
54 |
HuggingFace's Streamlit Spaces is used to display the hindcast, forecast, and real air quality using an interactive line graph. GitHub Actions is used to call the feature and inference pipeline daily by levraging schedulin.
|
55 |
|
56 |
+
Note that backfilling, feature group creation and model training is only performed once
|
57 |
+
|
58 |
+
|
59 |
+
|
60 |
+
### Aknowledments
|
61 |
+
|
62 |
+
* [HuggingFace restart scheduler](https://huggingface.co/spaces/davanstrien/restart/blob/main/app.py)
|
app_streamlit.py
CHANGED
@@ -51,9 +51,9 @@ st.plotly_chart(fig)
|
|
51 |
|
52 |
HF_TOKEN = os.getenv("HF_TOKEN")
|
53 |
def restart():
|
54 |
-
restart_space("
|
55 |
|
56 |
time_start = datetime.now()
|
57 |
scheduler = BackgroundScheduler()
|
58 |
job = scheduler.add_job(restart, "interval", minutes=2)
|
59 |
-
scheduler.start()
|
|
|
51 |
|
52 |
HF_TOKEN = os.getenv("HF_TOKEN")
|
53 |
def restart():
|
54 |
+
restart_space("davanstrien/restart", token=HF_TOKEN)
|
55 |
|
56 |
time_start = datetime.now()
|
57 |
scheduler = BackgroundScheduler()
|
58 |
job = scheduler.add_job(restart, "interval", minutes=2)
|
59 |
+
scheduler.start()
|
merge_df.py
CHANGED
@@ -2,6 +2,11 @@ import datetime
|
|
2 |
import pandas as pd
|
3 |
import hopsworks
|
4 |
import os
|
|
|
|
|
|
|
|
|
|
|
5 |
|
6 |
os.environ['HOPSWORKS_PROJECT'] = os.getenv('HOPSWORKS_PROJECT')
|
7 |
os.environ['HOPSWORKS_API_KEY'] = os.getenv('HOPSWORKS_API_KEY')
|
@@ -9,6 +14,16 @@ os.environ['HOPSWORKS_API_KEY'] = os.getenv('HOPSWORKS_API_KEY')
|
|
9 |
project = hopsworks.login()
|
10 |
fs = project.get_feature_store()
|
11 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
|
13 |
def get_merged_dataframe():
|
14 |
|
@@ -26,28 +41,37 @@ def get_merged_dataframe():
|
|
26 |
version=1,
|
27 |
)
|
28 |
|
29 |
-
|
30 |
-
|
31 |
-
|
|
|
32 |
|
33 |
-
|
34 |
-
|
35 |
-
predicted_data = predicted_data[['date','predicted_pm25']]
|
36 |
-
#predicted_data = predicted_data.rename(columns={"predicted_pm25" : "pm25"})
|
37 |
-
predicted_data = predicted_data.sort_values(by=['date'], ascending=True)
|
38 |
|
39 |
-
#merge the dataframes
|
40 |
-
selected_features = selected_features.reset_index(drop=True)
|
41 |
-
predicted_data = predicted_data.reset_index(drop=True)
|
42 |
|
43 |
-
|
44 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
|
46 |
|
|
|
|
|
47 |
|
48 |
-
|
|
|
|
|
49 |
combined_df['date'] = pd.to_datetime(combined_df['date'], utc=True).dt.tz_convert(None).astype('datetime64[ns]')
|
50 |
|
51 |
-
|
|
|
|
|
|
|
|
|
52 |
|
53 |
print(get_merged_dataframe())
|
|
|
2 |
import pandas as pd
|
3 |
import hopsworks
|
4 |
import os
|
5 |
+
import datetime
|
6 |
+
from xgboost import XGBRegressor
|
7 |
+
import pandas as pd
|
8 |
+
import hopsworks
|
9 |
+
import os
|
10 |
|
11 |
os.environ['HOPSWORKS_PROJECT'] = os.getenv('HOPSWORKS_PROJECT')
|
12 |
os.environ['HOPSWORKS_API_KEY'] = os.getenv('HOPSWORKS_API_KEY')
|
|
|
14 |
project = hopsworks.login()
|
15 |
fs = project.get_feature_store()
|
16 |
|
17 |
+
project = hopsworks.login()
|
18 |
+
fs = project.get_feature_store()
|
19 |
+
|
20 |
+
mr = project.get_model_registry()
|
21 |
+
retrieved_model = mr.get_model(
|
22 |
+
name="air_quality_xgboost_model",
|
23 |
+
version=1,
|
24 |
+
)
|
25 |
+
saved_model_dir = retrieved_model.download()
|
26 |
+
|
27 |
|
28 |
def get_merged_dataframe():
|
29 |
|
|
|
41 |
version=1,
|
42 |
)
|
43 |
|
44 |
+
weather_fg = fs.get_feature_group(
|
45 |
+
name='weather',
|
46 |
+
version=1,
|
47 |
+
)
|
48 |
|
49 |
+
retrieved_xgboost_model = XGBRegressor()
|
50 |
+
retrieved_xgboost_model.load_model(saved_model_dir + "/model.json")
|
|
|
|
|
|
|
51 |
|
|
|
|
|
|
|
52 |
|
53 |
+
selected_features = air_quality_fg.select_all(['pm25', 'past_air_quality']).join(weather_fg.select(['temperature_2m_mean', 'precipitation_sum', 'wind_speed_10m_max', 'wind_direction_10m_dominant']), on=['city'])
|
54 |
+
selected_features = selected_features.read()
|
55 |
+
selected_features['date'] = pd.to_datetime(selected_features['date'], utc=True).dt.tz_convert(None).astype('datetime64[ns]')
|
56 |
+
|
57 |
+
predicted_data = monitor_fg.read()
|
58 |
+
predicted_data = predicted_data[['date','predicted_pm25']]
|
59 |
+
predicted_data['date'] = predicted_data['date'].dt.tz_convert(None).astype('datetime64[ns]')
|
60 |
+
predicted_data = predicted_data.sort_values(by=['date'], ascending=True).reset_index(drop=True)
|
61 |
|
62 |
|
63 |
+
#get historical predicted pm25
|
64 |
+
selected_features['predicted_pm25'] = retrieved_xgboost_model.predict(selected_features[['past_air_quality','temperature_2m_mean', 'precipitation_sum', 'wind_speed_10m_max', 'wind_direction_10m_dominant']])
|
65 |
|
66 |
+
#merge data
|
67 |
+
selected_features = selected_features[['date', 'pm25', 'predicted_pm25']]
|
68 |
+
combined_df = pd.merge(selected_features, predicted_data,on='date', how='outer')
|
69 |
combined_df['date'] = pd.to_datetime(combined_df['date'], utc=True).dt.tz_convert(None).astype('datetime64[ns]')
|
70 |
|
71 |
+
# Combine the predicted_pm25_x and predicted_pm25_y columns into one
|
72 |
+
combined_df['predicted_pm25'] = combined_df['predicted_pm25_x'].combine_first(combined_df['predicted_pm25_y'])
|
73 |
+
|
74 |
+
# Drop the individual columns after merging
|
75 |
+
combined_df = combined_df.drop(columns=['predicted_pm25_x', 'predicted_pm25_y'])
|
76 |
|
77 |
print(get_merged_dataframe())
|