Robzy commited on
Commit
35ffba0
1 Parent(s): 8e5eb32

starting to write scripts

Browse files
.gitignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ .venv
2
+ .env
3
+ .cache.sqlite
backfill.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import datetime
2
+ import requests
3
+ import pandas as pd
4
+ import hopsworks
5
+ import datetime
6
+ from pathlib import Path
7
+ from functions import util
8
+ import json
9
+ import re
10
+ import os
11
+ import warnings
12
+ import pandas as pd
13
+
14
+ api_key = os.getenv('HOPSWORKS_API_KEY')
15
+ project_name = os.getenv('HOPSWORKS_PROJECT')
16
+
17
+ project = hopsworks.login(project=project_name, api_key_value=api_key)
18
+ fs = project.get_feature_store()
19
+ secrets = util.secrets_api(project.name)
20
+
21
+ AQI_API_KEY = secrets.get_secret("AQI_API_KEY").value
22
+ location_str = secrets.get_secret("SENSOR_LOCATION_JSON").value
23
+ location = json.loads(location_str)
24
+
25
+ country=location['country']
26
+ city=location['city']
27
+ street=location['street']
28
+ aqicn_url=location['aqicn_url']
29
+ latitude=location['latitude']
30
+ longitude=location['longitude']
31
+
32
+ today = datetime.date.today()
33
+
34
+ # Retrieve feature groups
35
+ air_quality_fg = fs.get_feature_group(
36
+ name='air_quality',
37
+ version=1,
38
+ )
39
+ weather_fg = fs.get_feature_group(
40
+ name='weather',
41
+ version=1,
42
+ )
43
+
44
+ aq_today_df = util.get_pm25(aqicn_url, country, city, street, today, AQI_API_KEY)
45
+ #aq_today_df = util.get_pm25(aqicn_url, country, city, street, "2024-11-15", AQI_API_KEY)
46
+ aq_today_df['date'] = pd.to_datetime(aq_today_df['date']).dt.date
47
+ aq_today_df
48
+
49
+ # Get weather forecast data
50
+
51
+ hourly_df = util.get_hourly_weather_forecast(city, latitude, longitude)
52
+ hourly_df = hourly_df.set_index('date')
53
+
54
+ # We will only make 1 daily prediction, so we will replace the hourly forecasts with a single daily forecast
55
+ # We only want the daily weather data, so only get weather at 12:00
56
+ daily_df = hourly_df.between_time('11:59', '12:01')
57
+ daily_df = daily_df.reset_index()
58
+ daily_df['date'] = pd.to_datetime(daily_df['date']).dt.date
59
+ daily_df['date'] = pd.to_datetime(daily_df['date'])
60
+ # daily_df['date'] = daily_df['date'].astype(str)
61
+ daily_df['city'] = city
62
+ daily_df
functions/__pycache__/air_quality_data_retrieval.cpython-312.pyc ADDED
Binary file (5.82 kB). View file
 
functions/__pycache__/context_engineering.cpython-312.pyc ADDED
Binary file (9.77 kB). View file
 
functions/__pycache__/llm_chain.cpython-312.pyc ADDED
Binary file (7.77 kB). View file
 
functions/__pycache__/util.cpython-312.pyc ADDED
Binary file (16.8 kB). View file
 
functions/air_quality_data_retrieval.py ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ from typing import Any, Dict, List
3
+ import datetime
4
+ import pandas as pd
5
+ import hopsworks
6
+ from hsfs.feature import Feature
7
+
8
+ def get_historical_data_for_date(date: str, feature_view, weather_fg, model) -> pd.DataFrame:
9
+ """
10
+ Retrieve data for a specific date from a feature view.
11
+
12
+ Args:
13
+ date (str): The date in the format "%Y-%m-%d".
14
+ feature_view: The feature view object.
15
+ model: The machine learning model used for prediction.
16
+
17
+ Returns:
18
+ pd.DataFrame: A DataFrame containing data for the specified date.
19
+ """
20
+ # Convert date string to datetime object
21
+ date_datetime = datetime.datetime.strptime(date, "%Y-%m-%d").date()
22
+
23
+ features_df, labels_df = feature_view.training_data(
24
+ start_time=date_datetime,
25
+ end_time=date_datetime + datetime.timedelta(days=1),
26
+ # event_time=True,
27
+ statistics_config=False
28
+ )
29
+ # bugfix line, shouldn't need to cast to datetime
30
+ features_df['date'] = pd.to_datetime(features_df['date'])
31
+ batch_data = features_df
32
+ batch_data['pm25'] = labels_df['pm25']
33
+ batch_data['date'] = batch_data['date'].apply(lambda x: x.strftime('%Y-%m-%d'))
34
+
35
+ return batch_data[['date', 'pm25']].sort_values('date').reset_index(drop=True)
36
+
37
+
38
+ def get_historical_data_in_date_range(date_start: str, date_end: str, feature_view, weather_fg, model) -> pd.DataFrame:
39
+ """
40
+ Retrieve data for a specific date range from a time in the past from a feature view.
41
+
42
+ Args:
43
+ date_start (str): The start date in the format "%Y-%m-%d".
44
+ date_end (str): The end date in the format "%Y-%m-%d".
45
+ feature_view: The feature view object.
46
+ model: The machine learning model used for prediction.
47
+
48
+ Returns:
49
+ pd.DataFrame: A DataFrame containing data for the specified date range.
50
+ """
51
+ # Convert date strings to datetime objects
52
+ # date_start_dt = datetime.datetime.strptime(date_start, "%Y-%m-%d").date()
53
+ # date_end_dt = datetime.datetime.strptime(date_end, "%Y-%m-%d").date()
54
+
55
+ batch_data = feature_view.query.read()
56
+ batch_data = batch_data[(batch_data['date'] >= date_start) & (batch_data['date'] <= date_end)]
57
+
58
+ batch_data['date'] = batch_data['date'].apply(lambda x: x.strftime('%Y-%m-%d'))
59
+
60
+ return batch_data[['date', 'pm25']].sort_values('date').reset_index(drop=True)
61
+
62
+ def get_future_data_for_date(date: str, feature_view, weather_fg, model) -> pd.DataFrame:
63
+ """
64
+ Predicts future PM2.5 data for a specified date using a given feature view and model.
65
+
66
+ Args:
67
+ date (str): The date in the format "%Y-%m-%d".
68
+ feature_view: The feature view object.
69
+ model: The machine learning model used for prediction.
70
+
71
+ Returns:
72
+ pd.DataFrame: A DataFrame containing data for the specified date.
73
+ """
74
+ date_start_dt = datetime.datetime.strptime(date, "%Y-%m-%d") #.date()
75
+ fg_data = weather_fg.read()
76
+
77
+ # Couldn't get our filters to work, so filter in memory
78
+ df = fg_data[fg_data.date == date_start_dt]
79
+ batch_data = df.drop(['date', 'city'], axis=1)
80
+
81
+ df['pm25'] = model.predict(batch_data)
82
+
83
+ return df[['date', 'pm25']].sort_values('date').reset_index(drop=True)
84
+
85
+
86
+
87
+ def get_future_data_in_date_range(date_start: str, date_end: str, feature_view, weather_fg, model) -> pd.DataFrame:
88
+ """
89
+ Predicts future PM2.5 data for a specified start and end date range using a given feature view and model.
90
+
91
+ Args:
92
+ date_start (str): The start date in the format "%Y-%m-%d".
93
+ date_end (str): The end date in the format "%Y-%m-%d".
94
+ feature_view: The feature view object.
95
+ model: The machine learning model used for prediction.
96
+
97
+ Returns:
98
+ pd.DataFrame: A DataFrame containing data for the specified date range.
99
+ """
100
+ date_start_dt = datetime.datetime.strptime(date_start, "%Y-%m-%d") #.date()
101
+ if date_end == None:
102
+ date_end = date_start
103
+ date_end_dt = datetime.datetime.strptime(date_end, "%Y-%m-%d") #.date()
104
+
105
+ fg_data = weather_fg.read()
106
+ # Fix bug: Cannot compare tz-naive and tz-aware datetime-like objects
107
+ fg_data['date'] = pd.to_datetime(fg_data['date']).dt.tz_localize(None)
108
+
109
+ # Couldn't get our filters to work, so filter in memory
110
+ df = fg_data[(fg_data['date'] >= date_start_dt) & (fg_data['date'] <= date_end_dt)]
111
+ batch_data = df.drop(['date', 'city'], axis=1)
112
+
113
+ df['pm25'] = model.predict(batch_data)
114
+
115
+ return df[['date', 'pm25']].sort_values('date').reset_index(drop=True)
functions/context_engineering.py ADDED
@@ -0,0 +1,248 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import xml.etree.ElementTree as ET
2
+ import re
3
+ import inspect
4
+ from typing import get_type_hints
5
+ import json
6
+ import datetime
7
+ import torch
8
+ import sys
9
+ import pandas as pd
10
+ from openai import OpenAI
11
+ from functions.air_quality_data_retrieval import (
12
+ get_historical_data_for_date,
13
+ get_historical_data_in_date_range,
14
+ get_future_data_in_date_range,
15
+ get_future_data_for_date,
16
+ )
17
+ from typing import Any, Dict, List
18
+
19
+
20
+ def get_type_name(t: Any) -> str:
21
+ """Get the name of the type."""
22
+ name = str(t)
23
+ if "list" in name or "dict" in name:
24
+ return name
25
+ else:
26
+ return t.__name__
27
+
28
+
29
+ def serialize_function_to_json(func: Any) -> str:
30
+ """Serialize a function to JSON."""
31
+ signature = inspect.signature(func)
32
+ type_hints = get_type_hints(func)
33
+
34
+ function_info = {
35
+ "name": func.__name__,
36
+ "description": func.__doc__,
37
+ "parameters": {
38
+ "type": "object",
39
+ "properties": {}
40
+ },
41
+ "returns": type_hints.get('return', 'void').__name__
42
+ }
43
+
44
+ for name, _ in signature.parameters.items():
45
+ param_type = get_type_name(type_hints.get(name, type(None)))
46
+ function_info["parameters"]["properties"][name] = {"type": param_type}
47
+
48
+ return json.dumps(function_info, indent=2)
49
+
50
+
51
+ def get_function_calling_prompt(user_query):
52
+ fn = """{"name": "function_name", "arguments": {"arg_1": "value_1", "arg_2": value_2, ...}}"""
53
+ example = """{"name": "get_historical_data_in_date_range", "arguments": {"date_start": "2024-01-10", "date_end": "2024-01-14"}}"""
54
+
55
+ prompt = f"""<|im_start|>system
56
+ You are a helpful assistant with access to the following functions:
57
+
58
+ {serialize_function_to_json(get_historical_data_for_date)}
59
+
60
+ {serialize_function_to_json(get_historical_data_in_date_range)}
61
+
62
+ {serialize_function_to_json(get_future_data_for_date)}
63
+
64
+ {serialize_function_to_json(get_future_data_in_date_range)}
65
+
66
+ ###INSTRUCTIONS:
67
+ - You need to choose one function to use and retrieve paramenters for this function from the user input.
68
+ - If the user query contains 'will', and specifies a single day or date, use get_future_data_in_date_range function
69
+ - If the user query contains 'will', and specifies a range of days or dates, use get_future_data_in_date_range function.
70
+ - If the user query is for future data, but only includes a single day or date, use the get_future_data_in_date_range function,
71
+ - If the user query contains 'today' or 'yesterday', use get_historical_data_for_date function.
72
+ - If the user query contains 'tomorrow', use get_future_data_in_date_range function.
73
+ - If the user query is for historical data, and specifies a range of days or dates, use use get_historical_data_for_date function.
74
+ - If the user says a day of the week, assume the date of that day is when that day next arrives.
75
+ - Do not include feature_view and model parameters.
76
+ - Provide dates STRICTLY in the YYYY-MM-DD format.
77
+ - Generate an 'No Function needed' string if the user query does not require function calling.
78
+
79
+ IMPORTANT: Today is {datetime.date.today().strftime("%A")}, {datetime.date.today()}.
80
+
81
+ To use one of there functions respond STRICTLY with:
82
+ <onefunctioncall>
83
+ <functioncall> {fn} </functioncall>
84
+ </onefunctioncall>
85
+
86
+ ###EXAMPLES
87
+
88
+ EXAMPLE 1:
89
+ - User: Hi!
90
+ - AI Assiatant: No Function needed.
91
+
92
+ EXAMPLE 2:
93
+ - User: Is this Air Quality level good or bad?
94
+ - AI Assiatant: No Function needed.
95
+
96
+ EXAMPLE 3:
97
+ - User: When and what was the minimum air quality from 2024-01-10 till 2024-01-14?
98
+ - AI Assistant:
99
+ <onefunctioncall>
100
+ <functioncall> {example} </functioncall>
101
+ </onefunctioncall>
102
+ <|im_end|>
103
+
104
+ <|im_start|>user
105
+ {user_query}
106
+ <|im_end|>
107
+
108
+ <|im_start|>assistant"""
109
+
110
+ return prompt
111
+
112
+
113
+ def generate_hermes(user_query: str, model_llm, tokenizer) -> str:
114
+ """Retrieves a function name and extracts function parameters based on the user query."""
115
+
116
+ prompt = get_function_calling_prompt(user_query)
117
+
118
+ tokens = tokenizer(prompt, return_tensors="pt").to(model_llm.device)
119
+ input_size = tokens.input_ids.numel()
120
+ with torch.inference_mode():
121
+ generated_tokens = model_llm.generate(
122
+ **tokens,
123
+ use_cache=True,
124
+ do_sample=True,
125
+ temperature=0.2,
126
+ top_p=1.0,
127
+ top_k=0,
128
+ max_new_tokens=512,
129
+ eos_token_id=tokenizer.eos_token_id,
130
+ pad_token_id=tokenizer.eos_token_id,
131
+ )
132
+
133
+ return tokenizer.decode(
134
+ generated_tokens.squeeze()[input_size:],
135
+ skip_special_tokens=True,
136
+ )
137
+
138
+
139
+ def function_calling_with_openai(user_query: str, client) -> str:
140
+ """
141
+ Generates a response using OpenAI's chat API.
142
+
143
+ Args:
144
+ user_query (str): The user's query or prompt.
145
+ instructions (str): Instructions or context to provide to the GPT model.
146
+
147
+ Returns:
148
+ str: The generated response from the assistant.
149
+ """
150
+
151
+ instructions = get_function_calling_prompt(user_query).split('<|im_start|>user')[0]
152
+
153
+ completion = client.chat.completions.create(
154
+ model="gpt-3.5-turbo",
155
+ messages=[
156
+ {"role": "system", "content": instructions},
157
+ {"role": "user", "content": user_query},
158
+ ]
159
+ )
160
+
161
+ # Extract and return the assistant's reply from the response
162
+ if completion and completion.choices:
163
+ last_choice = completion.choices[0]
164
+ if last_choice.message:
165
+ return last_choice.message.content.strip()
166
+ return ""
167
+
168
+
169
+ def extract_function_calls(completion: str) -> List[Dict[str, Any]]:
170
+ """Extract function calls from completion."""
171
+ completion = completion.strip()
172
+ pattern = r"(<onefunctioncall>(.*?)</onefunctioncall>)"
173
+ match = re.search(pattern, completion, re.DOTALL)
174
+ if not match:
175
+ return None
176
+
177
+ multiplefn = match.group(1)
178
+ root = ET.fromstring(multiplefn)
179
+ functions = root.findall("functioncall")
180
+
181
+ return [json.loads(fn.text) for fn in functions]
182
+
183
+
184
+ def invoke_function(function, feature_view, weather_fg, model) -> pd.DataFrame:
185
+ """Invoke a function with given arguments."""
186
+ # Extract function name and arguments from input_data
187
+ function_name = function['name']
188
+ arguments = function['arguments']
189
+
190
+ # Using Python's getattr function to dynamically call the function by its name and passing the arguments
191
+ function_output = getattr(sys.modules[__name__], function_name)(
192
+ **arguments,
193
+ feature_view=feature_view,
194
+ weather_fg=weather_fg,
195
+ model=model,
196
+ )
197
+
198
+ if type(function_output) == str:
199
+ return function_output
200
+
201
+ # Round the 'pm25' value to 2 decimal places
202
+ function_output['pm25'] = function_output['pm25'].apply(round, ndigits=2)
203
+ return function_output
204
+
205
+
206
+ def get_context_data(user_query: str, feature_view, weather_fg, model_air_quality, model_llm=None, tokenizer=None, client=None) -> str:
207
+ """
208
+ Retrieve context data based on user query.
209
+
210
+ Args:
211
+ user_query (str): The user query.
212
+ feature_view: Feature View for data retrieval.
213
+ model_air_quality: The air quality model.
214
+ tokenizer: The tokenizer.
215
+
216
+ Returns:
217
+ str: The context data.
218
+ """
219
+ if client:
220
+ # Generate a response using LLM
221
+ completion = function_calling_with_openai(user_query, client)
222
+
223
+ else:
224
+ # Generate a response using LLM
225
+ completion = generate_hermes(
226
+ user_query,
227
+ model_llm,
228
+ tokenizer,
229
+ )
230
+
231
+ # Extract function calls from the completion
232
+ functions = extract_function_calls(completion)
233
+
234
+ # If function calls were found
235
+ if functions:
236
+ # Invoke the function with provided arguments
237
+ data = invoke_function(functions[0], feature_view, weather_fg, model_air_quality)
238
+
239
+ # Return formatted data as string
240
+ if isinstance(data, pd.DataFrame):
241
+ return f'Air Quality Measurements:\n' + '\n'.join(
242
+ [f'Date: {row["date"]}; Air Quality: {row["pm25"]}' for _, row in data.iterrows()]
243
+ )
244
+ # Return message if data is not updated
245
+ return data
246
+
247
+ # If no function calls were found, return an empty string
248
+ return ''
functions/llm_chain.py ADDED
@@ -0,0 +1,246 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import transformers
2
+ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, AutoConfig, AutoModel
3
+ from langchain.llms import HuggingFacePipeline
4
+ from langchain.prompts import PromptTemplate
5
+ from langchain.chains.llm import LLMChain
6
+ from langchain.memory import ConversationBufferWindowMemory
7
+ import torch
8
+ import datetime
9
+ from typing import Any, Dict, Union
10
+ from functions.context_engineering import get_context_data
11
+ import os
12
+ from safetensors.torch import load_model, save_model
13
+
14
+ def load_model(model_id: str = "teknium/OpenHermes-2.5-Mistral-7B") -> tuple:
15
+ """
16
+ Load the LLM and its corresponding tokenizer.
17
+
18
+ Args:
19
+ model_id (str, optional): Identifier for the pre-trained model. Defaults to "teknium/OpenHermes-2.5-Mistral-7B".
20
+
21
+ Returns:
22
+ tuple: A tuple containing the loaded model and tokenizer.
23
+ """
24
+
25
+ # Load the tokenizer for Mistral-7B-Instruct model
26
+ tokenizer_path = "./mistral/tokenizer"
27
+ if os.path.isdir(tokenizer_path) == False:
28
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
29
+ tokenizer.save_pretrained(tokenizer_path)
30
+ else:
31
+ tokenizer = AutoTokenizer.from_pretrained(tokenizer_path)
32
+
33
+ # Set the pad token to the unknown token to handle padding
34
+ tokenizer.pad_token = tokenizer.unk_token
35
+
36
+ # Set the padding side to "right" to prevent warnings during tokenization
37
+ tokenizer.padding_side = "right"
38
+
39
+ # BitsAndBytesConfig int-4 config
40
+ bnb_config = BitsAndBytesConfig(
41
+ load_in_4bit=True,
42
+ bnb_4bit_use_double_quant=True,
43
+ bnb_4bit_quant_type="nf4",
44
+ bnb_4bit_compute_dtype=torch.bfloat16,
45
+ )
46
+
47
+ model_path = "/tmp/mistral/model"
48
+ if os.path.exists(model_path):
49
+ print("Loading model from disk")
50
+ model_llm = AutoModelForCausalLM.from_pretrained(model_path)
51
+ else:
52
+ # Load the Mistral-7B-Instruct model with quantization configuration
53
+ model_llm = AutoModelForCausalLM.from_pretrained(
54
+ model_id,
55
+ device_map="auto",
56
+ quantization_config=bnb_config,
57
+ )
58
+ model_llm.save_pretrained(model_path)
59
+
60
+
61
+ # Configure the pad token ID in the model to match the tokenizer's pad token ID
62
+ model_llm.config.pad_token_id = tokenizer.pad_token_id
63
+
64
+ return model_llm, tokenizer
65
+
66
+
67
+ def get_prompt_template():
68
+ """
69
+ Retrieve a template for generating prompts in a conversational AI system.
70
+
71
+ Returns:
72
+ str: A string representing the template for generating prompts.
73
+ This template includes placeholders for system information,
74
+ instructions, previous conversation, context, date and user query.
75
+ """
76
+ prompt_template = """<|im_start|>system
77
+ You are one of the best air quality experts in the world.
78
+
79
+ ###INSTRUCTIONS:
80
+ - If you don't know the answer, you will respond politely that you cannot help.
81
+ - Use the context table with air quality indicators for city provided by user to generate your answer.
82
+ - You answer should be at least one sentence.
83
+ - Do not show any calculations to the user.
84
+ - Make sure that you use correct air quality indicators for the corresponding date.
85
+ - Add a rich analysis of the air quality level, such as whether it is safe, whether to go for a walk, etc.
86
+ - Do not mention in your answer that you are using context table.
87
+ <|im_end|>
88
+
89
+ ### CONTEXT:
90
+ {context}
91
+
92
+ IMPORTANT: Today is {date_today}.
93
+
94
+ <|im_start|>user
95
+ {question}<|im_end|>
96
+ <|im_start|>assistant"""
97
+ return prompt_template
98
+
99
+
100
+ def get_llm_chain(model_llm, tokenizer):
101
+ """
102
+ Create and configure a language model chain.
103
+
104
+ Args:
105
+ model_llm: The pre-trained language model for text generation.
106
+ tokenizer: The tokenizer corresponding to the language model.
107
+
108
+ Returns:
109
+ LLMChain: The configured language model chain.
110
+ """
111
+ # Create a text generation pipeline using the loaded model and tokenizer
112
+ text_generation_pipeline = transformers.pipeline(
113
+ model=model_llm, # The pre-trained language model for text generation
114
+ tokenizer=tokenizer, # The tokenizer corresponding to the language model
115
+ task="text-generation", # Specify the task as text generation
116
+ use_cache=True,
117
+ do_sample=True,
118
+ temperature=0.4,
119
+ top_p=1.0,
120
+ top_k=0,
121
+ max_new_tokens=512,
122
+ eos_token_id=tokenizer.eos_token_id,
123
+ pad_token_id=tokenizer.eos_token_id,
124
+ )
125
+
126
+ # Create a Hugging Face pipeline for Mistral LLM using the text generation pipeline
127
+ mistral_llm = HuggingFacePipeline(
128
+ pipeline=text_generation_pipeline,
129
+ )
130
+
131
+ # Create prompt from prompt template
132
+ prompt = PromptTemplate(
133
+ input_variables=["context", "question", "date_today"],
134
+ template=get_prompt_template(),
135
+ )
136
+
137
+ # Create LLM chain
138
+ llm_chain = LLMChain(
139
+ llm=mistral_llm,
140
+ prompt=prompt,
141
+ verbose=False,
142
+ )
143
+
144
+ return llm_chain
145
+
146
+
147
+ def generate_response(
148
+ user_query: str,
149
+ feature_view,
150
+ weather_fg,
151
+ model_air_quality,
152
+ model_llm,
153
+ tokenizer,
154
+ llm_chain=None,
155
+ verbose: bool = False,
156
+ ) -> str:
157
+ """
158
+ Generate response to user query using LLM chain and context data.
159
+
160
+ Args:
161
+ user_query (str): The user's query.
162
+ feature_view: Feature view for data retrieval.
163
+ model_llm: Language model for text generation.
164
+ tokenizer: Tokenizer for processing text.
165
+ model_air_quality: Model for predicting air quality.
166
+ llm_chain: LLM Chain.
167
+ verbose (bool): Whether to print verbose information. Defaults to False.
168
+
169
+ Returns:
170
+ str: Generated response to the user query.
171
+ """
172
+ # Get context data based on user query
173
+ context = get_context_data(
174
+ user_query,
175
+ feature_view,
176
+ weather_fg,
177
+ model_air_quality,
178
+ model_llm=model_llm,
179
+ tokenizer=tokenizer,
180
+ )
181
+
182
+ # Get today's date in a readable format
183
+ date_today = f'{datetime.date.today().strftime("%A")}, {datetime.date.today()}'
184
+
185
+ # Print today's date and context information if verbose mode is enabled
186
+ if verbose:
187
+ print(f"🗓️ Today's date: {date_today}")
188
+ print(f'📖 {context}')
189
+
190
+ # Invoke the language model chain with relevant context
191
+ model_output = llm_chain.invoke({
192
+ "context": context,
193
+ "date_today": date_today,
194
+ "question": user_query,
195
+ })
196
+
197
+ # Return the generated text from the model output
198
+ return model_output['text'].split('<|im_start|>assistant')[-1]
199
+
200
+
201
+ def generate_response_openai(
202
+ user_query: str,
203
+ feature_view,
204
+ weather_fg,
205
+ model_air_quality,
206
+ client,
207
+ verbose=True,
208
+ ):
209
+
210
+ context = get_context_data(
211
+ user_query,
212
+ feature_view,
213
+ weather_fg,
214
+ model_air_quality,
215
+ client=client,
216
+ )
217
+
218
+ # Get today's date in a readable format
219
+ date_today = f'{datetime.date.today().strftime("%A")}, {datetime.date.today()}'
220
+
221
+ # Print today's date and context information if verbose mode is enabled
222
+ if verbose:
223
+ print(f"🗓️ Today's date: {date_today}")
224
+ print(f'📖 {context}')
225
+
226
+ instructions = get_prompt_template().split('<|im_start|>user')[0]
227
+
228
+ instructions_filled = instructions.format(
229
+ context=context,
230
+ date_today=date_today
231
+ )
232
+
233
+ completion = client.chat.completions.create(
234
+ model="gpt-4-0125-preview",
235
+ messages=[
236
+ {"role": "system", "content": instructions_filled},
237
+ {"role": "user", "content": user_query},
238
+ ]
239
+ )
240
+
241
+ # Extract and return the assistant's reply from the response
242
+ if completion and completion.choices:
243
+ last_choice = completion.choices[0]
244
+ if last_choice.message:
245
+ return last_choice.message.content.strip()
246
+ return ""
functions/util.py ADDED
@@ -0,0 +1,311 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import datetime
3
+ import time
4
+ import requests
5
+ import pandas as pd
6
+ import json
7
+ from geopy.geocoders import Nominatim
8
+ import matplotlib.pyplot as plt
9
+ from matplotlib.patches import Patch
10
+ from matplotlib.ticker import MultipleLocator
11
+ import openmeteo_requests
12
+ import requests_cache
13
+ from retry_requests import retry
14
+ import hopsworks
15
+ import hsfs
16
+ from pathlib import Path
17
+
18
+ def get_historical_weather(city, start_date, end_date, latitude, longitude):
19
+ # latitude, longitude = get_city_coordinates(city)
20
+
21
+ # Setup the Open-Meteo API client with cache and retry on error
22
+ cache_session = requests_cache.CachedSession('.cache', expire_after = -1)
23
+ retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
24
+ openmeteo = openmeteo_requests.Client(session = retry_session)
25
+
26
+ # Make sure all required weather variables are listed here
27
+ # The order of variables in hourly or daily is important to assign them correctly below
28
+ url = "https://archive-api.open-meteo.com/v1/archive"
29
+ params = {
30
+ "latitude": latitude,
31
+ "longitude": longitude,
32
+ "start_date": start_date,
33
+ "end_date": end_date,
34
+ "daily": ["temperature_2m_mean", "precipitation_sum", "wind_speed_10m_max", "wind_direction_10m_dominant"]
35
+ }
36
+ responses = openmeteo.weather_api(url, params=params)
37
+
38
+ # Process first location. Add a for-loop for multiple locations or weather models
39
+ response = responses[0]
40
+ print(f"Coordinates {response.Latitude()}°N {response.Longitude()}°E")
41
+ print(f"Elevation {response.Elevation()} m asl")
42
+ print(f"Timezone {response.Timezone()} {response.TimezoneAbbreviation()}")
43
+ print(f"Timezone difference to GMT+0 {response.UtcOffsetSeconds()} s")
44
+
45
+ # Process daily data. The order of variables needs to be the same as requested.
46
+ daily = response.Daily()
47
+ daily_temperature_2m_mean = daily.Variables(0).ValuesAsNumpy()
48
+ daily_precipitation_sum = daily.Variables(1).ValuesAsNumpy()
49
+ daily_wind_speed_10m_max = daily.Variables(2).ValuesAsNumpy()
50
+ daily_wind_direction_10m_dominant = daily.Variables(3).ValuesAsNumpy()
51
+
52
+ daily_data = {"date": pd.date_range(
53
+ start = pd.to_datetime(daily.Time(), unit = "s"),
54
+ end = pd.to_datetime(daily.TimeEnd(), unit = "s"),
55
+ freq = pd.Timedelta(seconds = daily.Interval()),
56
+ inclusive = "left"
57
+ )}
58
+ daily_data["temperature_2m_mean"] = daily_temperature_2m_mean
59
+ daily_data["precipitation_sum"] = daily_precipitation_sum
60
+ daily_data["wind_speed_10m_max"] = daily_wind_speed_10m_max
61
+ daily_data["wind_direction_10m_dominant"] = daily_wind_direction_10m_dominant
62
+
63
+ daily_dataframe = pd.DataFrame(data = daily_data)
64
+ daily_dataframe = daily_dataframe.dropna()
65
+ daily_dataframe['city'] = city
66
+ return daily_dataframe
67
+
68
+ def get_hourly_weather_forecast(city, latitude, longitude):
69
+
70
+ # latitude, longitude = get_city_coordinates(city)
71
+
72
+ # Setup the Open-Meteo API client with cache and retry on error
73
+ cache_session = requests_cache.CachedSession('.cache', expire_after = 3600)
74
+ retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
75
+ openmeteo = openmeteo_requests.Client(session = retry_session)
76
+
77
+ # Make sure all required weather variables are listed here
78
+ # The order of variables in hourly or daily is important to assign them correctly below
79
+ url = "https://api.open-meteo.com/v1/ecmwf"
80
+ params = {
81
+ "latitude": latitude,
82
+ "longitude": longitude,
83
+ "hourly": ["temperature_2m", "precipitation", "wind_speed_10m", "wind_direction_10m"]
84
+ }
85
+ responses = openmeteo.weather_api(url, params=params)
86
+
87
+ # Process first location. Add a for-loop for multiple locations or weather models
88
+ response = responses[0]
89
+ print(f"Coordinates {response.Latitude()}°N {response.Longitude()}°E")
90
+ print(f"Elevation {response.Elevation()} m asl")
91
+ print(f"Timezone {response.Timezone()} {response.TimezoneAbbreviation()}")
92
+ print(f"Timezone difference to GMT+0 {response.UtcOffsetSeconds()} s")
93
+
94
+ # Process hourly data. The order of variables needs to be the same as requested.
95
+
96
+ hourly = response.Hourly()
97
+ hourly_temperature_2m = hourly.Variables(0).ValuesAsNumpy()
98
+ hourly_precipitation = hourly.Variables(1).ValuesAsNumpy()
99
+ hourly_wind_speed_10m = hourly.Variables(2).ValuesAsNumpy()
100
+ hourly_wind_direction_10m = hourly.Variables(3).ValuesAsNumpy()
101
+
102
+ hourly_data = {"date": pd.date_range(
103
+ start = pd.to_datetime(hourly.Time(), unit = "s"),
104
+ end = pd.to_datetime(hourly.TimeEnd(), unit = "s"),
105
+ freq = pd.Timedelta(seconds = hourly.Interval()),
106
+ inclusive = "left"
107
+ )}
108
+ hourly_data["temperature_2m_mean"] = hourly_temperature_2m
109
+ hourly_data["precipitation_sum"] = hourly_precipitation
110
+ hourly_data["wind_speed_10m_max"] = hourly_wind_speed_10m
111
+ hourly_data["wind_direction_10m_dominant"] = hourly_wind_direction_10m
112
+
113
+ hourly_dataframe = pd.DataFrame(data = hourly_data)
114
+ hourly_dataframe = hourly_dataframe.dropna()
115
+ return hourly_dataframe
116
+
117
+
118
+
119
+ def get_city_coordinates(city_name: str):
120
+ """
121
+ Takes city name and returns its latitude and longitude (rounded to 2 digits after dot).
122
+ """
123
+ # Initialize Nominatim API (for getting lat and long of the city)
124
+ geolocator = Nominatim(user_agent="MyApp")
125
+ city = geolocator.geocode(city_name)
126
+
127
+ latitude = round(city.latitude, 2)
128
+ longitude = round(city.longitude, 2)
129
+
130
+ return latitude, longitude
131
+
132
+ def trigger_request(url:str):
133
+ response = requests.get(url)
134
+ if response.status_code == 200:
135
+ # Extract the JSON content from the response
136
+ data = response.json()
137
+ else:
138
+ print("Failed to retrieve data. Status Code:", response.status_code)
139
+ raise requests.exceptions.RequestException(response.status_code)
140
+
141
+ return data
142
+
143
+
144
+ def get_pm25(aqicn_url: str, country: str, city: str, street: str, day: datetime.date, AQI_API_KEY: str):
145
+ """
146
+ Returns DataFrame with air quality (pm25) as dataframe
147
+ """
148
+ # The API endpoint URL
149
+ url = f"{aqicn_url}/?token={AQI_API_KEY}"
150
+
151
+ # Make a GET request to fetch the data from the API
152
+ data = trigger_request(url)
153
+
154
+ # if we get 'Unknown station' response then retry with city in url
155
+ if data['data'] == "Unknown station":
156
+ url1 = f"https://api.waqi.info/feed/{country}/{street}/?token={AQI_API_KEY}"
157
+ data = trigger_request(url1)
158
+
159
+ if data['data'] == "Unknown station":
160
+ url2 = f"https://api.waqi.info/feed/{country}/{city}/{street}/?token={AQI_API_KEY}"
161
+ data = trigger_request(url2)
162
+
163
+
164
+ # Check if the API response contains the data
165
+ if data['status'] == 'ok':
166
+ # Extract the air quality data
167
+ aqi_data = data['data']
168
+ aq_today_df = pd.DataFrame()
169
+ aq_today_df['pm25'] = [aqi_data['iaqi'].get('pm25', {}).get('v', None)]
170
+ aq_today_df['pm25'] = aq_today_df['pm25'].astype('float32')
171
+
172
+ aq_today_df['country'] = country
173
+ aq_today_df['city'] = city
174
+ aq_today_df['street'] = street
175
+ aq_today_df['date'] = day
176
+ aq_today_df['date'] = pd.to_datetime(aq_today_df['date'])
177
+ aq_today_df['url'] = aqicn_url
178
+ else:
179
+ print("Error: There may be an incorrect URL for your Sensor or it is not contactable right now. The API response does not contain data. Error message:", data['data'])
180
+ raise requests.exceptions.RequestException(data['data'])
181
+
182
+ return aq_today_df
183
+
184
+
185
+ def plot_air_quality_forecast(city: str, street: str, df: pd.DataFrame, file_path: str, hindcast=False):
186
+ fig, ax = plt.subplots(figsize=(10, 6))
187
+
188
+ day = pd.to_datetime(df['date']).dt.date
189
+ # Plot each column separately in matplotlib
190
+ ax.plot(day, df['predicted_pm25'], label='Predicted PM2.5', color='red', linewidth=2, marker='o', markersize=5, markerfacecolor='blue')
191
+
192
+ # Set the y-axis to a logarithmic scale
193
+ ax.set_yscale('log')
194
+ ax.set_yticks([0, 10, 25, 50, 100, 250, 500])
195
+ ax.get_yaxis().set_major_formatter(plt.ScalarFormatter())
196
+ ax.set_ylim(bottom=1)
197
+
198
+ # Set the labels and title
199
+ ax.set_xlabel('Date')
200
+ ax.set_title(f"PM2.5 Predicted (Logarithmic Scale) for {city}, {street}")
201
+ ax.set_ylabel('PM2.5')
202
+
203
+ colors = ['green', 'yellow', 'orange', 'red', 'purple', 'darkred']
204
+ labels = ['Good', 'Moderate', 'Unhealthy for Some', 'Unhealthy', 'Very Unhealthy', 'Hazardous']
205
+ ranges = [(0, 49), (50, 99), (100, 149), (150, 199), (200, 299), (300, 500)]
206
+ for color, (start, end) in zip(colors, ranges):
207
+ ax.axhspan(start, end, color=color, alpha=0.3)
208
+
209
+ # Add a legend for the different Air Quality Categories
210
+ patches = [Patch(color=colors[i], label=f"{labels[i]}: {ranges[i][0]}-{ranges[i][1]}") for i in range(len(colors))]
211
+ legend1 = ax.legend(handles=patches, loc='upper right', title="Air Quality Categories", fontsize='x-small')
212
+
213
+ # Aim for ~10 annotated values on x-axis, will work for both forecasts ans hindcasts
214
+ if len(df.index) > 11:
215
+ every_x_tick = len(df.index) / 10
216
+ ax.xaxis.set_major_locator(MultipleLocator(every_x_tick))
217
+
218
+ plt.xticks(rotation=45)
219
+
220
+ if hindcast == True:
221
+ ax.plot(day, df['pm25'], label='Actual PM2.5', color='black', linewidth=2, marker='^', markersize=5, markerfacecolor='grey')
222
+ legend2 = ax.legend(loc='upper left', fontsize='x-small')
223
+ ax.add_artist(legend1)
224
+
225
+ # Ensure everything is laid out neatly
226
+ plt.tight_layout()
227
+
228
+ # # Save the figure, overwriting any existing file with the same name
229
+ plt.savefig(file_path)
230
+ return plt
231
+
232
+
233
+ def delete_feature_groups(fs, name):
234
+ try:
235
+ for fg in fs.get_feature_groups(name):
236
+ fg.delete()
237
+ print(f"Deleted {fg.name}/{fg.version}")
238
+ except hsfs.client.exceptions.RestAPIError:
239
+ print(f"No {name} feature group found")
240
+
241
+ def delete_feature_views(fs, name):
242
+ try:
243
+ for fv in fs.get_feature_views(name):
244
+ fv.delete()
245
+ print(f"Deleted {fv.name}/{fv.version}")
246
+ except hsfs.client.exceptions.RestAPIError:
247
+ print(f"No {name} feature view found")
248
+
249
+ def delete_models(mr, name):
250
+ models = mr.get_models(name)
251
+ if not models:
252
+ print(f"No {name} model found")
253
+ for model in models:
254
+ model.delete()
255
+ print(f"Deleted model {model.name}/{model.version}")
256
+
257
+ def delete_secrets(proj, name):
258
+ secrets = secrets_api(proj.name)
259
+ try:
260
+ secret = secrets.get_secret(name)
261
+ secret.delete()
262
+ print(f"Deleted secret {name}")
263
+ except hopsworks.client.exceptions.RestAPIError:
264
+ print(f"No {name} secret found")
265
+
266
+ # WARNING - this will wipe out all your feature data and models
267
+ def purge_project(proj):
268
+ fs = proj.get_feature_store()
269
+ mr = proj.get_model_registry()
270
+
271
+ # Delete Feature Views before deleting the feature groups
272
+ delete_feature_views(fs, "air_quality_fv")
273
+
274
+ # Delete ALL Feature Groups
275
+ delete_feature_groups(fs, "air_quality")
276
+ delete_feature_groups(fs, "weather")
277
+ delete_feature_groups(fs, "aq_predictions")
278
+
279
+ # Delete all Models
280
+ delete_models(mr, "air_quality_xgboost_model")
281
+ delete_secrets(proj, "SENSOR_LOCATION_JSON")
282
+
283
+
284
+ def secrets_api(proj):
285
+ host = "c.app.hopsworks.ai"
286
+ api_key = os.environ.get('HOPSWORKS_API_KEY')
287
+ conn = hopsworks.connection(host=host, project=proj, api_key_value=api_key)
288
+ return conn.get_secrets_api()
289
+
290
+
291
+ def check_file_path(file_path):
292
+ my_file = Path(file_path)
293
+ if my_file.is_file() == False:
294
+ print(f"Error. File not found at the path: {file_path} ")
295
+ else:
296
+ print(f"File successfully found at the path: {file_path}")
297
+
298
+ def backfill_predictions_for_monitoring(weather_fg, air_quality_df, monitor_fg, model):
299
+ features_df = weather_fg.read()
300
+ features_df = features_df.sort_values(by=['date'], ascending=True)
301
+ features_df = features_df.tail(10)
302
+ features_df['predicted_pm25'] = model.predict(features_df[['temperature_2m_mean', 'precipitation_sum', 'wind_speed_10m_max', 'wind_direction_10m_dominant']])
303
+ air_quality_df['date'] = pd.to_datetime(air_quality_df['date'])
304
+ features_df['date'] = features_df['date'].dt.tz_convert(None).astype('datetime64[ns]')
305
+
306
+ df = pd.merge(features_df, air_quality_df[['date','pm25','street','country']], on="date")
307
+ df['days_before_forecast_day'] = 1
308
+ hindcast_df = df
309
+ df = df.drop('pm25', axis=1)
310
+ monitor_fg.insert(df, write_options={"wait_for_job": True})
311
+ return hindcast_df
infer.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import datetime
2
+ import pandas as pd
3
+ from xgboost import XGBRegressor
4
+ import hopsworks
5
+ import json
6
+ from functions import util
7
+ import os
8
+
9
+ # Set up
10
+
11
+ api_key = os.getenv('HOPSWORKS_API_KEY')
12
+ project_name = os.getenv('HOPSWORKS_PROJECT')
13
+
14
+ project = hopsworks.login(project=project_name, api_key_value=api_key)
15
+ fs = project.get_feature_store()
16
+ secrets = util.secrets_api(project.name)
17
+
18
+ AQI_API_KEY = secrets.get_secret("AQI_API_KEY").value
19
+ location_str = secrets.get_secret("SENSOR_LOCATION_JSON").value
20
+ location = json.loads(location_str)
21
+
22
+ today = datetime.datetime.now() - datetime.timedelta(0)
23
+
24
+ feature_view = fs.get_feature_view(
25
+ name='air_quality_fv',
26
+ version=1,
27
+ )
28
+
29
+ # Retreive model
30
+
31
+ mr = project.get_model_registry()
32
+
33
+ retrieved_model = mr.get_model(
34
+ name="air_quality_xgboost_model",
35
+ version=1,
36
+ )
37
+
38
+ saved_model_dir = retrieved_model.download()
39
+ retrieved_xgboost_model = XGBRegressor()
40
+ retrieved_xgboost_model.load_model(saved_model_dir + "/model.json")
41
+
42
+ # Retrieve features
43
+
44
+ weather_fg = fs.get_feature_group(
45
+ name='weather',
46
+ version=1,
47
+ )
48
+
49
+ today_timestamp = pd.to_datetime(today)
50
+ batch_data = weather_fg.filter(weather_fg.date >= today_timestamp ).read()
51
+ batch_data['predicted_pm25'] = retrieved_xgboost_model.predict(
52
+ batch_data[['temperature_2m_mean', 'precipitation_sum', 'wind_speed_10m_max', 'wind_direction_10m_dominant']])
requirements-llm.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # LLM libraries
2
+ gradio==3.40.1
3
+ getpass4==0.0.14.1
4
+
5
+ transformers==4.38.2
6
+ langchain==0.1.10
7
+ bitsandbytes==0.42.0
8
+ accelerate==0.27.2
9
+
10
+ # OpenAI
11
+ openai==1.14.3
requirements.txt ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Feature store and model registry
2
+ hopsworks
3
+
4
+ # Resolve city names from (longitude, latitude) coordinates
5
+ geopy==2.4.1
6
+
7
+ # Read weather data. Unpinned version - if we don't update, we won't get the weather data
8
+ openmeteo-requests
9
+
10
+ # Be more efficient when making REST (Http) requests
11
+ requests-cache==1.2.0
12
+ retry-requests==2.0.0
13
+
14
+ # ML framework libraries
15
+ xgboost==2.0.3
16
+ scikit-learn==1.4.1.post1
17
+
18
+ # Plot charts
19
+ matplotlib==3.8.3
20
+
21
+ python-dotenv
training.py ADDED
File without changes