adjoint-bass commited on
Commit
a6e15b1
1 Parent(s): e8665a5

update app, add picture

Browse files
Files changed (3) hide show
  1. app.py +24 -14
  2. functions.py +35 -60
  3. vienna.jpg +0 -0
app.py CHANGED
@@ -3,49 +3,59 @@ import hopsworks
3
  import joblib
4
  import pandas as pd
5
  from datetime import timedelta, datetime
6
- from functions import *
7
 
8
 
9
  def fancy_header(text, font_size=24):
10
- res = f'<p style="color:#ff5f72; font-size: {font_size}px; text-align:center;">{text}</p>'
11
  st.markdown(res, unsafe_allow_html=True)
12
 
13
- st.set_page_config(layout="wide")
 
 
 
14
 
15
- st.title('Air Quality Prediction Project🌩')
16
-
17
- st.write(9 * "-")
18
  fancy_header('\n Connecting to Hopsworks Feature Store...')
19
 
20
  project = hopsworks.login()
21
 
22
  st.write("Successfully connected!✔️")
23
 
24
- st.write(18 * "-")
25
- fancy_header('\n Getting data from Feature Store...')
26
 
27
  today = datetime.date.today()
28
  city = "vienna"
29
  weekly_data = get_weather_data_weekly(city, today)
 
30
 
 
31
 
32
- st.write(27 * "-")
33
-
34
  mr = project.get_model_registry()
35
  model = mr.get_best_model("aqi_model", "rmse", "min")
36
  model_dir = model.download()
37
  model = joblib.load(model_dir + "/aqi_model.pkl")
38
 
39
- st.write("-" * 36)
 
40
 
 
41
 
42
  preds = model.predict(data_encoder(weekly_data)).astype(int)
 
43
  poll_level = get_aplevel(preds.T.reshape(-1, 1))
44
 
45
- next_week = [f"{(today + timedelta(days=d)).strftime('%Y-%m-%d')},{(today + timedelta(days=d)).strftime('%A')}" for d in range(7)]
 
 
46
 
47
- df = pd.DataFrame(data=[preds, poll_level], index=["AQI", "Air pollution level"], columns=next_week)
48
 
49
- st.write(df)
 
50
 
51
  st.button("Re-run")
 
3
  import joblib
4
  import pandas as pd
5
  from datetime import timedelta, datetime
6
+ from functions import get_weather_data_weekly, data_encoder, get_aplevel
7
 
8
 
9
  def fancy_header(text, font_size=24):
10
+ res = f'<p style="color:#ff5f27; font-size: {font_size}px;text-align:center">{text}</p>'
11
  st.markdown(res, unsafe_allow_html=True)
12
 
13
+ # TODO: set the screen to widehardo
14
+ st.title('Air Quality Prediction Project 🌩')
15
+ st.image("bienna.jpg", use_column_width='auto')
16
+ st.write(36 * "-")
17
 
18
+ st.markdown("# This is a final project in the course ID2223 Scalable Machine Learning and Deep Learning :computer:")
19
+ st.markdown("My task was to predict the Air Quality Index (AQI) for one city (I choose Vienna) based on different weather data (pressure, snow-and cloud-coverage, temperature, etc.).")
20
+ st.markdown("For the full list of weather data, please click [here][https://visualcrossing.com/resources/documentation/weather-api/timeline-weather-api]")
21
  fancy_header('\n Connecting to Hopsworks Feature Store...')
22
 
23
  project = hopsworks.login()
24
 
25
  st.write("Successfully connected!✔️")
26
 
27
+ st.write(36 * "-")
28
+ fancy_header('\n Collecting the weather data from Vienna...')
29
 
30
  today = datetime.date.today()
31
  city = "vienna"
32
  weekly_data = get_weather_data_weekly(city, today)
33
+ st.write("Successfully collected!✔️")
34
 
35
+ st.write(36 * "-")
36
 
37
+ fancy_header("Loading the fitted XGBoost model...")
 
38
  mr = project.get_model_registry()
39
  model = mr.get_best_model("aqi_model", "rmse", "min")
40
  model_dir = model.download()
41
  model = joblib.load(model_dir + "/aqi_model.pkl")
42
 
43
+ st.write("Succesfully loaded!✔️")
44
+ st.sidebar.write("-" * 36)
45
 
46
+ fancy_header("Making AQI pedictions for the next week..")
47
 
48
  preds = model.predict(data_encoder(weekly_data)).astype(int)
49
+
50
  poll_level = get_aplevel(preds.T.reshape(-1, 1))
51
 
52
+ next_week_datetime = [today + timedelta(days=d) for d in range(7)]
53
+
54
+ next_week_str = [f"{days.strftime('%Y-%m-%d')}, {days.strftime('%A')}" for days in next_week_datetime]
55
 
56
+ df = pd.DataFrame(data=[preds, poll_level], index=["AQI", "Air pollution level"], columns=next_week_str)
57
 
58
+ st.write("Here they are!")
59
+ st.dataframe(df.style.apply) # ref to function color_aq
60
 
61
  st.button("Re-run")
functions.py CHANGED
@@ -1,65 +1,19 @@
1
  import requests
2
  import os
3
- import joblib
4
  import pandas as pd
5
  import datetime
6
  import numpy as np
7
  from sklearn.preprocessing import OrdinalEncoder
8
  from dotenv import load_dotenv
9
- load_dotenv(override=True)
10
-
11
-
12
- def decode_features(df, feature_view):
13
- """Decodes features in the input DataFrame using corresponding Hopsworks Feature Store transformation functions"""
14
- df_res = df.copy()
15
-
16
- import inspect
17
-
18
-
19
- td_transformation_functions = feature_view._batch_scoring_server._transformation_functions
20
-
21
- res = {}
22
- for feature_name in td_transformation_functions:
23
- if feature_name in df_res.columns:
24
- td_transformation_function = td_transformation_functions[feature_name]
25
- sig, foobar_locals = inspect.signature(td_transformation_function.transformation_fn), locals()
26
- param_dict = dict([(param.name, param.default) for param in sig.parameters.values() if param.default != inspect._empty])
27
- if td_transformation_function.name == "min_max_scaler":
28
- df_res[feature_name] = df_res[feature_name].map(
29
- lambda x: x * (param_dict["max_value"] - param_dict["min_value"]) + param_dict["min_value"])
30
-
31
- elif td_transformation_function.name == "standard_scaler":
32
- df_res[feature_name] = df_res[feature_name].map(
33
- lambda x: x * param_dict['std_dev'] + param_dict["mean"])
34
- elif td_transformation_function.name == "label_encoder":
35
- dictionary = param_dict['value_to_index']
36
- dictionary_ = {v: k for k, v in dictionary.items()}
37
- df_res[feature_name] = df_res[feature_name].map(
38
- lambda x: dictionary_[x])
39
- return df_res
40
-
41
-
42
- def get_model(project, model_name, evaluation_metric, sort_metrics_by):
43
- """Retrieve desired model or download it from the Hopsworks Model Registry.
44
- In second case, it will be physically downloaded to this directory"""
45
- TARGET_FILE = "model.pkl"
46
- list_of_files = [os.path.join(dirpath,filename) for dirpath, _, filenames \
47
- in os.walk('.') for filename in filenames if filename == TARGET_FILE]
48
-
49
- if list_of_files:
50
- model_path = list_of_files[0]
51
- model = joblib.load(model_path)
52
- else:
53
- if not os.path.exists(TARGET_FILE):
54
- mr = project.get_model_registry()
55
- # get best model based on custom metrics
56
- model = mr.get_best_model(model_name,
57
- evaluation_metric,
58
- sort_metrics_by)
59
- model_dir = model.download()
60
- model = joblib.load(model_dir + "/model.pkl")
61
-
62
- return model
63
 
64
 
65
  def get_air_quality_data(station_name):
@@ -90,7 +44,6 @@ def get_air_quality_df(data):
90
  new_data['pm10'] = pd.to_numeric(new_data['pm10'])
91
  new_data['aqi'] = pd.to_numeric(new_data['aqi'])
92
 
93
- print(new_data)
94
  return new_data
95
 
96
 
@@ -125,6 +78,7 @@ def get_weather_data_daily(city):
125
  data['uvindex'],
126
  data['conditions']
127
  ]
 
128
  def get_weather_data_weekly(city: str, start_date: datetime) -> pd.DataFrame:
129
  WEATHER_API_KEY = os.getenv('WEATHER_API_KEY')
130
  end_date = f"{start_date + datetime.timedelta(days=6):%Y-%m-%d}"
@@ -135,10 +89,31 @@ def get_weather_data_weekly(city: str, start_date: datetime) -> pd.DataFrame:
135
  for i in range(7):
136
  data = weather_data[i]
137
  list_of_data = [
138
- answer['address'].lower(), data['datetime'], data['tempmax'], data['tempmin'], data['temp'], data['feelslikemax'],
139
- data['feelslikemin'], data['feelslike'], data['dew'], data['humidity'], data['precip'], data['precipprob'], data['precipcover'],
140
- data['snow'], data['snowdepth'], data['windgust'], data['windspeed'], data['winddir'], data['pressure'], data['cloudcover'],
141
- data['visibility'], data['solarradiation'], data['solarenergy'], data['uvindex'], data['conditions']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
142
  ]
143
  weather_df = get_weather_df(list_of_data)
144
  final_df = pd.concat([final_df, weather_df])
 
1
  import requests
2
  import os
 
3
  import pandas as pd
4
  import datetime
5
  import numpy as np
6
  from sklearn.preprocessing import OrdinalEncoder
7
  from dotenv import load_dotenv
8
+ load_dotenv()
9
+
10
+
11
+ ## TODO: write function to display the color coding of the categoies both in the df and as a guide.
12
+ #sg like:
13
+ def color_aq(val):
14
+ color = 'green' if val else 'red'
15
+ return f'background-color: {color}'
16
+ # but better
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
 
19
  def get_air_quality_data(station_name):
 
44
  new_data['pm10'] = pd.to_numeric(new_data['pm10'])
45
  new_data['aqi'] = pd.to_numeric(new_data['aqi'])
46
 
 
47
  return new_data
48
 
49
 
 
78
  data['uvindex'],
79
  data['conditions']
80
  ]
81
+
82
  def get_weather_data_weekly(city: str, start_date: datetime) -> pd.DataFrame:
83
  WEATHER_API_KEY = os.getenv('WEATHER_API_KEY')
84
  end_date = f"{start_date + datetime.timedelta(days=6):%Y-%m-%d}"
 
89
  for i in range(7):
90
  data = weather_data[i]
91
  list_of_data = [
92
+ answer['address'].lower(),
93
+ data['datetime'],
94
+ data['tempmax'],
95
+ data['tempmin'],
96
+ data['temp'],
97
+ data['feelslikemax'],
98
+ data['feelslikemin'],
99
+ data['feelslike'],
100
+ data['dew'],
101
+ data['humidity'],
102
+ data['precip'],
103
+ data['precipprob'],
104
+ data['precipcover'],
105
+ data['snow'],
106
+ data['snowdepth'],
107
+ data['windgust'],
108
+ data['windspeed'],
109
+ data['winddir'],
110
+ data['pressure'],
111
+ data['cloudcover'],
112
+ data['visibility'],
113
+ data['solarradiation'],
114
+ data['solarenergy'],
115
+ data['uvindex'],
116
+ data['conditions']
117
  ]
118
  weather_df = get_weather_df(list_of_data)
119
  final_df = pd.concat([final_df, weather_df])
vienna.jpg ADDED