Spaces:
Sleeping
Sleeping
| import xml.etree.ElementTree as ET | |
| import re | |
| import inspect | |
| from typing import get_type_hints | |
| import json | |
| import datetime | |
| import torch | |
| import sys | |
| import pandas as pd | |
| from openai import OpenAI | |
| from functions.air_quality_data_retrieval import ( | |
| get_historical_data_for_date, | |
| get_historical_data_in_date_range, | |
| get_future_data_in_date_range, | |
| get_future_data_for_date, | |
| ) | |
| from typing import Any, Dict, List | |
| def get_type_name(t: Any) -> str: | |
| """Get the name of the type.""" | |
| name = str(t) | |
| if "list" in name or "dict" in name: | |
| return name | |
| else: | |
| return t.__name__ | |
| def serialize_function_to_json(func: Any) -> str: | |
| """Serialize a function to JSON.""" | |
| signature = inspect.signature(func) | |
| type_hints = get_type_hints(func) | |
| function_info = { | |
| "name": func.__name__, | |
| "description": func.__doc__, | |
| "parameters": { | |
| "type": "object", | |
| "properties": {} | |
| }, | |
| "returns": type_hints.get('return', 'void').__name__ | |
| } | |
| for name, _ in signature.parameters.items(): | |
| param_type = get_type_name(type_hints.get(name, type(None))) | |
| function_info["parameters"]["properties"][name] = {"type": param_type} | |
| return json.dumps(function_info, indent=2) | |
| def get_function_calling_prompt(user_query): | |
| fn = """{"name": "function_name", "arguments": {"arg_1": "value_1", "arg_2": value_2, ...}}""" | |
| example = """{"name": "get_historical_data_in_date_range", "arguments": {"date_start": "2024-01-10", "date_end": "2024-01-14"}}""" | |
| prompt = f"""<|im_start|>system | |
| You are a helpful assistant with access to the following functions: | |
| {serialize_function_to_json(get_historical_data_for_date)} | |
| {serialize_function_to_json(get_historical_data_in_date_range)} | |
| {serialize_function_to_json(get_future_data_for_date)} | |
| {serialize_function_to_json(get_future_data_in_date_range)} | |
| ###INSTRUCTIONS: | |
| - You need to choose one function to use and retrieve paramenters for this function from the user input. | |
| - If the user query contains 'will', and specifies a single day or date, use get_future_data_in_date_range function | |
| - If the user query contains 'will', and specifies a range of days or dates, use get_future_data_in_date_range function. | |
| - If the user query is for future data, but only includes a single day or date, use the get_future_data_in_date_range function, | |
| - If the user query contains 'today' or 'yesterday', use get_historical_data_for_date function. | |
| - If the user query contains 'tomorrow', use get_future_data_in_date_range function. | |
| - If the user query is for historical data, and specifies a range of days or dates, use use get_historical_data_for_date function. | |
| - If the user says a day of the week, assume the date of that day is when that day next arrives. | |
| - Do not include feature_view and model parameters. | |
| - Provide dates STRICTLY in the YYYY-MM-DD format. | |
| - Generate an 'No Function needed' string if the user query does not require function calling. | |
| IMPORTANT: Today is {datetime.date.today().strftime("%A")}, {datetime.date.today()}. | |
| To use one of there functions respond STRICTLY with: | |
| <onefunctioncall> | |
| <functioncall> {fn} </functioncall> | |
| </onefunctioncall> | |
| ###EXAMPLES | |
| EXAMPLE 1: | |
| - User: Hi! | |
| - AI Assiatant: No Function needed. | |
| EXAMPLE 2: | |
| - User: Is this Air Quality level good or bad? | |
| - AI Assiatant: No Function needed. | |
| EXAMPLE 3: | |
| - User: When and what was the minimum air quality from 2024-01-10 till 2024-01-14? | |
| - AI Assistant: | |
| <onefunctioncall> | |
| <functioncall> {example} </functioncall> | |
| </onefunctioncall> | |
| <|im_end|> | |
| <|im_start|>user | |
| {user_query} | |
| <|im_end|> | |
| <|im_start|>assistant""" | |
| return prompt | |
| def generate_hermes(user_query: str, model_llm, tokenizer) -> str: | |
| """Retrieves a function name and extracts function parameters based on the user query.""" | |
| prompt = get_function_calling_prompt(user_query) | |
| tokens = tokenizer(prompt, return_tensors="pt").to(model_llm.device) | |
| input_size = tokens.input_ids.numel() | |
| with torch.inference_mode(): | |
| generated_tokens = model_llm.generate( | |
| **tokens, | |
| use_cache=True, | |
| do_sample=True, | |
| temperature=0.2, | |
| top_p=1.0, | |
| top_k=0, | |
| max_new_tokens=512, | |
| eos_token_id=tokenizer.eos_token_id, | |
| pad_token_id=tokenizer.eos_token_id, | |
| ) | |
| return tokenizer.decode( | |
| generated_tokens.squeeze()[input_size:], | |
| skip_special_tokens=True, | |
| ) | |
| def function_calling_with_openai(user_query: str, client) -> str: | |
| """ | |
| Generates a response using OpenAI's chat API. | |
| Args: | |
| user_query (str): The user's query or prompt. | |
| instructions (str): Instructions or context to provide to the GPT model. | |
| Returns: | |
| str: The generated response from the assistant. | |
| """ | |
| instructions = get_function_calling_prompt(user_query).split('<|im_start|>user')[0] | |
| completion = client.chat.completions.create( | |
| model="gpt-3.5-turbo", | |
| messages=[ | |
| {"role": "system", "content": instructions}, | |
| {"role": "user", "content": user_query}, | |
| ] | |
| ) | |
| # Extract and return the assistant's reply from the response | |
| if completion and completion.choices: | |
| last_choice = completion.choices[0] | |
| if last_choice.message: | |
| return last_choice.message.content.strip() | |
| return "" | |
| def extract_function_calls(completion: str) -> List[Dict[str, Any]]: | |
| """Extract function calls from completion.""" | |
| completion = completion.strip() | |
| pattern = r"(<onefunctioncall>(.*?)</onefunctioncall>)" | |
| match = re.search(pattern, completion, re.DOTALL) | |
| if not match: | |
| return None | |
| multiplefn = match.group(1) | |
| root = ET.fromstring(multiplefn) | |
| functions = root.findall("functioncall") | |
| return [json.loads(fn.text) for fn in functions] | |
| def invoke_function(function, feature_view, weather_fg, model) -> pd.DataFrame: | |
| """Invoke a function with given arguments.""" | |
| # Extract function name and arguments from input_data | |
| function_name = function['name'] | |
| arguments = function['arguments'] | |
| # Using Python's getattr function to dynamically call the function by its name and passing the arguments | |
| function_output = getattr(sys.modules[__name__], function_name)( | |
| **arguments, | |
| feature_view=feature_view, | |
| weather_fg=weather_fg, | |
| model=model, | |
| ) | |
| if type(function_output) == str: | |
| return function_output | |
| # Round the 'pm25' value to 2 decimal places | |
| function_output['pm25'] = function_output['pm25'].apply(round, ndigits=2) | |
| return function_output | |
| def get_context_data(user_query: str, feature_view, weather_fg, model_air_quality, model_llm=None, tokenizer=None, client=None) -> str: | |
| """ | |
| Retrieve context data based on user query. | |
| Args: | |
| user_query (str): The user query. | |
| feature_view: Feature View for data retrieval. | |
| model_air_quality: The air quality model. | |
| tokenizer: The tokenizer. | |
| Returns: | |
| str: The context data. | |
| """ | |
| if client: | |
| # Generate a response using LLM | |
| completion = function_calling_with_openai(user_query, client) | |
| else: | |
| # Generate a response using LLM | |
| completion = generate_hermes( | |
| user_query, | |
| model_llm, | |
| tokenizer, | |
| ) | |
| # Extract function calls from the completion | |
| functions = extract_function_calls(completion) | |
| # If function calls were found | |
| if functions: | |
| # Invoke the function with provided arguments | |
| data = invoke_function(functions[0], feature_view, weather_fg, model_air_quality) | |
| # Return formatted data as string | |
| if isinstance(data, pd.DataFrame): | |
| return f'Air Quality Measurements:\n' + '\n'.join( | |
| [f'Date: {row["date"]}; Air Quality: {row["pm25"]}' for _, row in data.iterrows()] | |
| ) | |
| # Return message if data is not updated | |
| return data | |
| # If no function calls were found, return an empty string | |
| return '' | |