import xml.etree.ElementTree as ET import re import inspect from typing import get_type_hints import json import datetime import torch import sys import pandas as pd from openai import OpenAI from functions.air_quality_data_retrieval import ( get_historical_data_for_date, get_historical_data_in_date_range, get_future_data_in_date_range, get_future_data_for_date, ) from typing import Any, Dict, List def get_type_name(t: Any) -> str: """Get the name of the type.""" name = str(t) if "list" in name or "dict" in name: return name else: return t.__name__ def serialize_function_to_json(func: Any) -> str: """Serialize a function to JSON.""" signature = inspect.signature(func) type_hints = get_type_hints(func) function_info = { "name": func.__name__, "description": func.__doc__, "parameters": { "type": "object", "properties": {} }, "returns": type_hints.get('return', 'void').__name__ } for name, _ in signature.parameters.items(): param_type = get_type_name(type_hints.get(name, type(None))) function_info["parameters"]["properties"][name] = {"type": param_type} return json.dumps(function_info, indent=2) def get_function_calling_prompt(user_query): fn = """{"name": "function_name", "arguments": {"arg_1": "value_1", "arg_2": value_2, ...}}""" example = """{"name": "get_historical_data_in_date_range", "arguments": {"date_start": "2024-01-10", "date_end": "2024-01-14"}}""" prompt = f"""<|im_start|>system You are a helpful assistant with access to the following functions: {serialize_function_to_json(get_historical_data_for_date)} {serialize_function_to_json(get_historical_data_in_date_range)} {serialize_function_to_json(get_future_data_for_date)} {serialize_function_to_json(get_future_data_in_date_range)} ###INSTRUCTIONS: - You need to choose one function to use and retrieve paramenters for this function from the user input. - If the user query contains 'will', and specifies a single day or date, use get_future_data_in_date_range function - If the user query contains 'will', and specifies a range of days or dates, use get_future_data_in_date_range function. - If the user query is for future data, but only includes a single day or date, use the get_future_data_in_date_range function, - If the user query contains 'today' or 'yesterday', use get_historical_data_for_date function. - If the user query contains 'tomorrow', use get_future_data_in_date_range function. - If the user query is for historical data, and specifies a range of days or dates, use use get_historical_data_for_date function. - If the user says a day of the week, assume the date of that day is when that day next arrives. - Do not include feature_view and model parameters. - Provide dates STRICTLY in the YYYY-MM-DD format. - Generate an 'No Function needed' string if the user query does not require function calling. IMPORTANT: Today is {datetime.date.today().strftime("%A")}, {datetime.date.today()}. To use one of there functions respond STRICTLY with: {fn} ###EXAMPLES EXAMPLE 1: - User: Hi! - AI Assiatant: No Function needed. EXAMPLE 2: - User: Is this Air Quality level good or bad? - AI Assiatant: No Function needed. EXAMPLE 3: - User: When and what was the minimum air quality from 2024-01-10 till 2024-01-14? - AI Assistant: {example} <|im_end|> <|im_start|>user {user_query} <|im_end|> <|im_start|>assistant""" return prompt def generate_hermes(user_query: str, model_llm, tokenizer) -> str: """Retrieves a function name and extracts function parameters based on the user query.""" prompt = get_function_calling_prompt(user_query) tokens = tokenizer(prompt, return_tensors="pt").to(model_llm.device) input_size = tokens.input_ids.numel() with torch.inference_mode(): generated_tokens = model_llm.generate( **tokens, use_cache=True, do_sample=True, temperature=0.2, top_p=1.0, top_k=0, max_new_tokens=512, eos_token_id=tokenizer.eos_token_id, pad_token_id=tokenizer.eos_token_id, ) return tokenizer.decode( generated_tokens.squeeze()[input_size:], skip_special_tokens=True, ) def function_calling_with_openai(user_query: str, client) -> str: """ Generates a response using OpenAI's chat API. Args: user_query (str): The user's query or prompt. instructions (str): Instructions or context to provide to the GPT model. Returns: str: The generated response from the assistant. """ instructions = get_function_calling_prompt(user_query).split('<|im_start|>user')[0] completion = client.chat.completions.create( model="gpt-3.5-turbo", messages=[ {"role": "system", "content": instructions}, {"role": "user", "content": user_query}, ] ) # Extract and return the assistant's reply from the response if completion and completion.choices: last_choice = completion.choices[0] if last_choice.message: return last_choice.message.content.strip() return "" def extract_function_calls(completion: str) -> List[Dict[str, Any]]: """Extract function calls from completion.""" completion = completion.strip() pattern = r"((.*?))" match = re.search(pattern, completion, re.DOTALL) if not match: return None multiplefn = match.group(1) root = ET.fromstring(multiplefn) functions = root.findall("functioncall") return [json.loads(fn.text) for fn in functions] def invoke_function(function, feature_view, weather_fg, model) -> pd.DataFrame: """Invoke a function with given arguments.""" # Extract function name and arguments from input_data function_name = function['name'] arguments = function['arguments'] # Using Python's getattr function to dynamically call the function by its name and passing the arguments function_output = getattr(sys.modules[__name__], function_name)( **arguments, feature_view=feature_view, weather_fg=weather_fg, model=model, ) if type(function_output) == str: return function_output # Round the 'pm25' value to 2 decimal places function_output['pm25'] = function_output['pm25'].apply(round, ndigits=2) return function_output def get_context_data(user_query: str, feature_view, weather_fg, model_air_quality, model_llm=None, tokenizer=None, client=None) -> str: """ Retrieve context data based on user query. Args: user_query (str): The user query. feature_view: Feature View for data retrieval. model_air_quality: The air quality model. tokenizer: The tokenizer. Returns: str: The context data. """ if client: # Generate a response using LLM completion = function_calling_with_openai(user_query, client) else: # Generate a response using LLM completion = generate_hermes( user_query, model_llm, tokenizer, ) # Extract function calls from the completion functions = extract_function_calls(completion) # If function calls were found if functions: # Invoke the function with provided arguments data = invoke_function(functions[0], feature_view, weather_fg, model_air_quality) # Return formatted data as string if isinstance(data, pd.DataFrame): return f'Air Quality Measurements:\n' + '\n'.join( [f'Date: {row["date"]}; Air Quality: {row["pm25"]}' for _, row in data.iterrows()] ) # Return message if data is not updated return data # If no function calls were found, return an empty string return ''