File size: 28,081 Bytes
43106f9
 
 
 
938a35d
f2e3576
 
 
 
 
 
938a35d
 
 
 
 
 
 
 
f2e3576
1237c34
f2e3576
f6be049
f2e3576
 
 
 
f6be049
 
 
 
 
 
 
f2e3576
f6be049
f2e3576
 
 
 
 
 
f6be049
 
 
 
 
 
f2e3576
f6be049
 
 
 
 
 
 
 
 
82057dc
f6be049
1237c34
 
 
 
 
938a35d
 
1237c34
938a35d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1237c34
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
938a35d
 
1237c34
938a35d
 
1237c34
 
 
 
938a35d
 
1237c34
938a35d
 
1237c34
 
 
938a35d
 
 
 
 
 
 
1237c34
 
938a35d
 
 
 
 
 
1237c34
 
 
938a35d
 
 
1237c34
938a35d
 
1237c34
 
 
938a35d
 
 
1237c34
938a35d
 
1237c34
 
 
938a35d
 
 
1237c34
938a35d
 
1237c34
 
 
938a35d
 
 
1237c34
938a35d
 
1237c34
 
 
 
 
 
 
938a35d
 
 
 
 
 
 
 
 
1237c34
938a35d
 
 
 
 
 
 
 
 
1237c34
 
938a35d
1237c34
 
 
 
 
938a35d
f6be049
 
 
 
938a35d
f6be049
 
1237c34
938a35d
 
 
 
 
 
 
 
 
 
 
1237c34
f6be049
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f2e3576
 
 
 
938a35d
f2e3576
1237c34
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f2e3576
f6be049
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f2e3576
f6be049
 
 
 
 
 
938a35d
f6be049
 
 
 
 
 
 
 
 
1237c34
f6be049
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1237c34
f6be049
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f2e3576
f6be049
 
 
 
f2e3576
43106f9
 
 
f2e3576
 
 
 
 
43106f9
f2e3576
 
 
 
 
 
 
 
 
 
43106f9
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Connection closed.\n",
      "Connected. Call `.close()` to terminate connection gracefully.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1160344\n",
      "Connected. Call `.close()` to terminate connection gracefully.\n",
      "Connected. Call `.close()` to terminate connection gracefully.\n"
     ]
    }
   ],
   "source": [
    "import datetime\n",
    "import pandas as pd\n",
    "from xgboost import XGBRegressor\n",
    "import hopsworks\n",
    "import json\n",
    "from functions import util\n",
    "import os\n",
    "\n",
    "# Set up\n",
    "\n",
    "api_key = os.getenv('HOPSWORKS_API_KEY')\n",
    "project_name = os.getenv('HOPSWORKS_PROJECT')\n",
    "\n",
    "project = hopsworks.login(project=project_name, api_key_value=api_key)\n",
    "fs = project.get_feature_store() \n",
    "secrets = util.secrets_api(project.name)\n",
    "location_str = secrets.get_secret(\"SENSOR_LOCATION_JSON\").value\n",
    "location = json.loads(location_str)\n",
    "country=location['country']\n",
    "city=location['city']\n",
    "street=location['street']\n",
    "\n",
    "AQI_API_KEY = secrets.get_secret(\"AQI_API_KEY\").value\n",
    "location_str = secrets.get_secret(\"SENSOR_LOCATION_JSON\").value\n",
    "location = json.loads(location_str)\n",
    "\n",
    "today = datetime.datetime.now() - datetime.timedelta(0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Connected. Call `.close()` to terminate connection gracefully.\n",
      "Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (1.02s) \n"
     ]
    }
   ],
   "source": [
    "### Retreive model\n",
    "\n",
    "mr = project.get_model_registry()\n",
    "\n",
    "retrieved_model = mr.get_model(\n",
    "    name=\"air_quality_xgboost_model\",\n",
    "    version=1,\n",
    ")\n",
    "\n",
    "saved_model_dir = retrieved_model.download()\n",
    "retrieved_xgboost_model = XGBRegressor()\n",
    "retrieved_xgboost_model.load_model(saved_model_dir + \"/model.json\")\n",
    "\n",
    "### Retrieve features \n",
    "\n",
    "weather_fg = fs.get_feature_group(\n",
    "    name='weather',\n",
    "    version=1,\n",
    ")\n",
    "\n",
    "today_timestamp = pd.to_datetime(today)\n",
    "batch_data = weather_fg.filter(weather_fg.date >= today_timestamp ).read().sort_values(by=['date'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>date</th>\n",
       "      <th>temperature_2m_mean</th>\n",
       "      <th>precipitation_sum</th>\n",
       "      <th>wind_speed_10m_max</th>\n",
       "      <th>wind_direction_10m_dominant</th>\n",
       "      <th>city</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2024-11-21 00:00:00+00:00</td>\n",
       "      <td>21.700001</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.138420</td>\n",
       "      <td>71.564964</td>\n",
       "      <td>lahore</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2024-11-22 00:00:00+00:00</td>\n",
       "      <td>21.850000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4.610250</td>\n",
       "      <td>128.659836</td>\n",
       "      <td>lahore</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>2024-11-23 00:00:00+00:00</td>\n",
       "      <td>22.250000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>5.091168</td>\n",
       "      <td>44.999897</td>\n",
       "      <td>lahore</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>2024-11-24 00:00:00+00:00</td>\n",
       "      <td>21.400000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4.334974</td>\n",
       "      <td>318.366547</td>\n",
       "      <td>lahore</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>2024-11-25 00:00:00+00:00</td>\n",
       "      <td>20.750000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>6.439876</td>\n",
       "      <td>296.564972</td>\n",
       "      <td>lahore</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2024-11-26 00:00:00+00:00</td>\n",
       "      <td>20.750000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4.680000</td>\n",
       "      <td>270.000000</td>\n",
       "      <td>lahore</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2024-11-27 00:00:00+00:00</td>\n",
       "      <td>20.350000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4.104631</td>\n",
       "      <td>37.875053</td>\n",
       "      <td>lahore</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2024-11-28 00:00:00+00:00</td>\n",
       "      <td>19.799999</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.189795</td>\n",
       "      <td>9.462248</td>\n",
       "      <td>lahore</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                       date  temperature_2m_mean  precipitation_sum  \\\n",
       "1 2024-11-21 00:00:00+00:00            21.700001                0.0   \n",
       "4 2024-11-22 00:00:00+00:00            21.850000                0.0   \n",
       "7 2024-11-23 00:00:00+00:00            22.250000                0.0   \n",
       "6 2024-11-24 00:00:00+00:00            21.400000                0.0   \n",
       "5 2024-11-25 00:00:00+00:00            20.750000                0.0   \n",
       "2 2024-11-26 00:00:00+00:00            20.750000                0.0   \n",
       "0 2024-11-27 00:00:00+00:00            20.350000                0.0   \n",
       "3 2024-11-28 00:00:00+00:00            19.799999                0.0   \n",
       "\n",
       "   wind_speed_10m_max  wind_direction_10m_dominant    city  \n",
       "1            1.138420                    71.564964  lahore  \n",
       "4            4.610250                   128.659836  lahore  \n",
       "7            5.091168                    44.999897  lahore  \n",
       "6            4.334974                   318.366547  lahore  \n",
       "5            6.439876                   296.564972  lahore  \n",
       "2            4.680000                   270.000000  lahore  \n",
       "0            4.104631                    37.875053  lahore  \n",
       "3            2.189795                     9.462248  lahore  "
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "batch_data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "ename": "ValueError",
     "evalue": "feature_names mismatch: ['past_air_quality', 'temperature_2m_mean', 'precipitation_sum', 'wind_speed_10m_max', 'wind_direction_10m_dominant'] ['temperature_2m_mean', 'precipitation_sum', 'wind_speed_10m_max', 'wind_direction_10m_dominant']\nexpected past_air_quality in input data",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
      "Cell \u001b[0;32mIn[6], line 3\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[38;5;66;03m### Predict and upload\u001b[39;00m\n\u001b[0;32m----> 3\u001b[0m batch_data[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mpredicted_pm25\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[43mretrieved_xgboost_model\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpredict\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m      4\u001b[0m \u001b[43m    \u001b[49m\u001b[43mbatch_data\u001b[49m\u001b[43m[\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mtemperature_2m_mean\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mprecipitation_sum\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mwind_speed_10m_max\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mwind_direction_10m_dominant\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m      6\u001b[0m batch_data[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mstreet\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m street\n\u001b[1;32m      7\u001b[0m batch_data[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mcity\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m city\n",
      "File \u001b[0;32m~/Documents/scalable-ml/lab1-new/hbg-weather/.venv/lib/python3.12/site-packages/xgboost/sklearn.py:1168\u001b[0m, in \u001b[0;36mXGBModel.predict\u001b[0;34m(self, X, output_margin, validate_features, base_margin, iteration_range)\u001b[0m\n\u001b[1;32m   1166\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_can_use_inplace_predict():\n\u001b[1;32m   1167\u001b[0m     \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 1168\u001b[0m         predts \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_booster\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43minplace_predict\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   1169\u001b[0m \u001b[43m            \u001b[49m\u001b[43mdata\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mX\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1170\u001b[0m \u001b[43m            \u001b[49m\u001b[43miteration_range\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43miteration_range\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1171\u001b[0m \u001b[43m            \u001b[49m\u001b[43mpredict_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmargin\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mif\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43moutput_margin\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01melse\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mvalue\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1172\u001b[0m \u001b[43m            \u001b[49m\u001b[43mmissing\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmissing\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1173\u001b[0m \u001b[43m            \u001b[49m\u001b[43mbase_margin\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbase_margin\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1174\u001b[0m \u001b[43m            \u001b[49m\u001b[43mvalidate_features\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mvalidate_features\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1175\u001b[0m \u001b[43m        \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1176\u001b[0m         \u001b[38;5;28;01mif\u001b[39;00m _is_cupy_array(predts):\n\u001b[1;32m   1177\u001b[0m             \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mcupy\u001b[39;00m  \u001b[38;5;66;03m# pylint: disable=import-error\u001b[39;00m\n",
      "File \u001b[0;32m~/Documents/scalable-ml/lab1-new/hbg-weather/.venv/lib/python3.12/site-packages/xgboost/core.py:2418\u001b[0m, in \u001b[0;36mBooster.inplace_predict\u001b[0;34m(self, data, iteration_range, predict_type, missing, validate_features, base_margin, strict_shape)\u001b[0m\n\u001b[1;32m   2416\u001b[0m     data, fns, _ \u001b[38;5;241m=\u001b[39m _transform_pandas_df(data, enable_categorical)\n\u001b[1;32m   2417\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m validate_features:\n\u001b[0;32m-> 2418\u001b[0m         \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_validate_features\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfns\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   2419\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m _is_list(data) \u001b[38;5;129;01mor\u001b[39;00m _is_tuple(data):\n\u001b[1;32m   2420\u001b[0m     data \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39marray(data)\n",
      "File \u001b[0;32m~/Documents/scalable-ml/lab1-new/hbg-weather/.venv/lib/python3.12/site-packages/xgboost/core.py:2970\u001b[0m, in \u001b[0;36mBooster._validate_features\u001b[0;34m(self, feature_names)\u001b[0m\n\u001b[1;32m   2964\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m my_missing:\n\u001b[1;32m   2965\u001b[0m     msg \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m (\n\u001b[1;32m   2966\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124mtraining data did not have the following fields: \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m   2967\u001b[0m         \u001b[38;5;241m+\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m, \u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;241m.\u001b[39mjoin(\u001b[38;5;28mstr\u001b[39m(s) \u001b[38;5;28;01mfor\u001b[39;00m s \u001b[38;5;129;01min\u001b[39;00m my_missing)\n\u001b[1;32m   2968\u001b[0m     )\n\u001b[0;32m-> 2970\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(msg\u001b[38;5;241m.\u001b[39mformat(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mfeature_names, feature_names))\n",
      "\u001b[0;31mValueError\u001b[0m: feature_names mismatch: ['past_air_quality', 'temperature_2m_mean', 'precipitation_sum', 'wind_speed_10m_max', 'wind_direction_10m_dominant'] ['temperature_2m_mean', 'precipitation_sum', 'wind_speed_10m_max', 'wind_direction_10m_dominant']\nexpected past_air_quality in input data"
     ]
    }
   ],
   "source": [
    "### Predict and upload\n",
    "\n",
    "batch_data['predicted_pm25'] = retrieved_xgboost_model.predict(\n",
    "    batch_data[['temperature_2m_mean', 'precipitation_sum', 'wind_speed_10m_max', 'wind_direction_10m_dominant']])\n",
    "\n",
    "batch_data['street'] = street\n",
    "batch_data['city'] = city\n",
    "batch_data['country'] = country\n",
    "# Fill in the number of days before the date on which you made the forecast (base_date)\n",
    "batch_data['days_before_forecast_day'] = range(1, len(batch_data)+1)\n",
    "batch_data = batch_data.sort_values(by=['date'])\n",
    "#batch_data['date'] = batch_data['date'].dt.tz_convert(None).astype('datetime64[ns]')\n",
    "\n",
    "plt = util.plot_air_quality_forecast(city, street, batch_data, file_path=\"./img/pm25_forecast.png\")\n",
    "plt.show()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Batch data:                        date  temperature_2m_mean  precipitation_sum  \\\n",
      "0 2024-11-21 00:00:00+00:00                 3.40                0.2   \n",
      "3 2024-11-22 00:00:00+00:00                 4.05                0.7   \n",
      "2 2024-11-23 00:00:00+00:00                 5.45                0.0   \n",
      "1 2024-11-24 00:00:00+00:00                 5.60                0.0   \n",
      "\n",
      "   wind_speed_10m_max  wind_direction_10m_dominant         city  \\\n",
      "0           19.995398                   246.665939  Helsingborg   \n",
      "3           23.540806                   246.571289  Helsingborg   \n",
      "2           30.631746                   240.422256  Helsingborg   \n",
      "1           13.755580                   276.008911  Helsingborg   \n",
      "\n",
      "   predicted_pm25          street country  days_before_forecast_day  \n",
      "0       39.168438  Drottninggatan  Sweden                         1  \n",
      "3       20.740093  Drottninggatan  Sweden                         2  \n",
      "2       46.448105  Drottninggatan  Sweden                         3  \n",
      "1       61.713448  Drottninggatan  Sweden                         4  \n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "0c3e8fd8c8f545a597e504acf5f077e8",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Uploading Dataframe: 0.00% |          | Rows 0/4 | Elapsed Time: 00:00 | Remaining Time: ?"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Launching job: aq_predictions_1_offline_fg_materialization\n",
      "Job started successfully, you can follow the progress at \n",
      "https://c.app.hopsworks.ai/p/1160340/jobs/named/aq_predictions_1_offline_fg_materialization/executions\n",
      "Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (0.95s) \n",
      "Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (1.85s) \n"
     ]
    }
   ],
   "source": [
    "monitor_fg = fs.get_or_create_feature_group(\n",
    "    name='aq_predictions',\n",
    "    description='Air Quality prediction monitoring',\n",
    "    version=1,\n",
    "    primary_key=['city','street','date','days_before_forecast_day'],\n",
    "    event_time=\"date\"\n",
    ")\n",
    "\n",
    "print(f\"Batch data: {batch_data}\")\n",
    "\n",
    "monitor_fg.insert(batch_data, write_options={\"wait_for_job\": True})\n",
    "monitoring_df = monitor_fg.filter(monitor_fg.days_before_forecast_day == 1).read()\n",
    "\n",
    "# Hindcast monitoring\n",
    "\n",
    "air_quality_fg = fs.get_feature_group(\n",
    "    name='air_quality',\n",
    "    version=1,\n",
    ")\n",
    "air_quality_df = air_quality_fg.read()\n",
    "\n",
    "outcome_df = air_quality_df[['date', 'pm25']]\n",
    "preds_df =  monitoring_df[['date', 'predicted_pm25']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "ename": "ValueError",
     "evalue": "You are trying to merge on datetime64[us, UTC] and object columns for key 'date'. If you wish to proceed you should use pd.concat",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
      "Cell \u001b[0;32mIn[5], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m hindcast_df \u001b[38;5;241m=\u001b[39m \u001b[43mpd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmerge\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpreds_df\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43moutcome_df\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mon\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mdate\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m      2\u001b[0m hindcast_df \u001b[38;5;241m=\u001b[39m hindcast_df\u001b[38;5;241m.\u001b[39msort_values(by\u001b[38;5;241m=\u001b[39m[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mdate\u001b[39m\u001b[38;5;124m'\u001b[39m])\n\u001b[1;32m      4\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(hindcast_df) \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m0\u001b[39m:\n",
      "File \u001b[0;32m~/Documents/scalable-ml/lab1-new/hbg-weather/.venv/lib/python3.12/site-packages/pandas/core/reshape/merge.py:169\u001b[0m, in \u001b[0;36mmerge\u001b[0;34m(left, right, how, on, left_on, right_on, left_index, right_index, sort, suffixes, copy, indicator, validate)\u001b[0m\n\u001b[1;32m    154\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m _cross_merge(\n\u001b[1;32m    155\u001b[0m         left_df,\n\u001b[1;32m    156\u001b[0m         right_df,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    166\u001b[0m         copy\u001b[38;5;241m=\u001b[39mcopy,\n\u001b[1;32m    167\u001b[0m     )\n\u001b[1;32m    168\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 169\u001b[0m     op \u001b[38;5;241m=\u001b[39m \u001b[43m_MergeOperation\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    170\u001b[0m \u001b[43m        \u001b[49m\u001b[43mleft_df\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    171\u001b[0m \u001b[43m        \u001b[49m\u001b[43mright_df\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    172\u001b[0m \u001b[43m        \u001b[49m\u001b[43mhow\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mhow\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    173\u001b[0m \u001b[43m        \u001b[49m\u001b[43mon\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mon\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    174\u001b[0m \u001b[43m        \u001b[49m\u001b[43mleft_on\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mleft_on\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    175\u001b[0m \u001b[43m        \u001b[49m\u001b[43mright_on\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mright_on\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    176\u001b[0m \u001b[43m        \u001b[49m\u001b[43mleft_index\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mleft_index\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    177\u001b[0m \u001b[43m        \u001b[49m\u001b[43mright_index\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mright_index\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    178\u001b[0m \u001b[43m        \u001b[49m\u001b[43msort\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msort\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    179\u001b[0m \u001b[43m        \u001b[49m\u001b[43msuffixes\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msuffixes\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    180\u001b[0m \u001b[43m        \u001b[49m\u001b[43mindicator\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mindicator\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    181\u001b[0m \u001b[43m        \u001b[49m\u001b[43mvalidate\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mvalidate\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    182\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    183\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m op\u001b[38;5;241m.\u001b[39mget_result(copy\u001b[38;5;241m=\u001b[39mcopy)\n",
      "File \u001b[0;32m~/Documents/scalable-ml/lab1-new/hbg-weather/.venv/lib/python3.12/site-packages/pandas/core/reshape/merge.py:804\u001b[0m, in \u001b[0;36m_MergeOperation.__init__\u001b[0;34m(self, left, right, how, on, left_on, right_on, left_index, right_index, sort, suffixes, indicator, validate)\u001b[0m\n\u001b[1;32m    800\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_validate_tolerance(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mleft_join_keys)\n\u001b[1;32m    802\u001b[0m \u001b[38;5;66;03m# validate the merge keys dtypes. We may need to coerce\u001b[39;00m\n\u001b[1;32m    803\u001b[0m \u001b[38;5;66;03m# to avoid incompatible dtypes\u001b[39;00m\n\u001b[0;32m--> 804\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_maybe_coerce_merge_keys\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    806\u001b[0m \u001b[38;5;66;03m# If argument passed to validate,\u001b[39;00m\n\u001b[1;32m    807\u001b[0m \u001b[38;5;66;03m# check if columns specified as unique\u001b[39;00m\n\u001b[1;32m    808\u001b[0m \u001b[38;5;66;03m# are in fact unique.\u001b[39;00m\n\u001b[1;32m    809\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m validate \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n",
      "File \u001b[0;32m~/Documents/scalable-ml/lab1-new/hbg-weather/.venv/lib/python3.12/site-packages/pandas/core/reshape/merge.py:1483\u001b[0m, in \u001b[0;36m_MergeOperation._maybe_coerce_merge_keys\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m   1481\u001b[0m \u001b[38;5;66;03m# datetimelikes must match exactly\u001b[39;00m\n\u001b[1;32m   1482\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m needs_i8_conversion(lk\u001b[38;5;241m.\u001b[39mdtype) \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m needs_i8_conversion(rk\u001b[38;5;241m.\u001b[39mdtype):\n\u001b[0;32m-> 1483\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(msg)\n\u001b[1;32m   1484\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m needs_i8_conversion(lk\u001b[38;5;241m.\u001b[39mdtype) \u001b[38;5;129;01mand\u001b[39;00m needs_i8_conversion(rk\u001b[38;5;241m.\u001b[39mdtype):\n\u001b[1;32m   1485\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(msg)\n",
      "\u001b[0;31mValueError\u001b[0m: You are trying to merge on datetime64[us, UTC] and object columns for key 'date'. If you wish to proceed you should use pd.concat"
     ]
    }
   ],
   "source": [
    "hindcast_df = pd.merge(preds_df, outcome_df, on=\"date\")\n",
    "hindcast_df = hindcast_df.sort_values(by=['date'])\n",
    "\n",
    "if len(hindcast_df) == 0:\n",
    "    hindcast_df = util.backfill_predictions_for_monitoring(weather_fg, air_quality_df, monitor_fg, retrieved_xgboost_model)\n",
    "\n",
    "plt = util.plot_air_quality_forecast(city, street, hindcast_df, file_path=\"./img/pm25_hindcast_1day.png\", hindcast=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-11-20 14:23:12,559 WARNING: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "\n"
     ]
    }
   ],
   "source": [
    "import numpy as np\n",
    "\n",
    "scale = 5\n",
    "outcome_df['predicted_pm25'] = outcome_df['pm25'] + scale * np.random.uniform(-1, 1, outcome_df.shape[0])\n",
    "outcome_df.sort_values(by=['date'])\n",
    "outcome_df.to_pickle('outcome_df.pkl')"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": ".venv",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}