Spaces:

Robzy
/

hbg-weather

Sleeping

File size: 8,111 Bytes

35ffba0

import xml.etree.ElementTree as ET
import re
import inspect
from typing import get_type_hints
import json
import datetime
import torch
import sys
import pandas as pd
from openai import OpenAI
from functions.air_quality_data_retrieval import (
    get_historical_data_for_date,
    get_historical_data_in_date_range,
    get_future_data_in_date_range,
    get_future_data_for_date,
)
from typing import Any, Dict, List


def get_type_name(t: Any) -> str:
    """Get the name of the type."""
    name = str(t)
    if "list" in name or "dict" in name:
        return name
    else:
        return t.__name__


def serialize_function_to_json(func: Any) -> str:
    """Serialize a function to JSON."""
    signature = inspect.signature(func)
    type_hints = get_type_hints(func)

    function_info = {
        "name": func.__name__,
        "description": func.__doc__,
        "parameters": {
            "type": "object",
            "properties": {}
        },
        "returns": type_hints.get('return', 'void').__name__
    }

    for name, _ in signature.parameters.items():
        param_type = get_type_name(type_hints.get(name, type(None)))
        function_info["parameters"]["properties"][name] = {"type": param_type}

    return json.dumps(function_info, indent=2)


def get_function_calling_prompt(user_query):
    fn = """{"name": "function_name", "arguments": {"arg_1": "value_1", "arg_2": value_2, ...}}"""
    example = """{"name": "get_historical_data_in_date_range", "arguments": {"date_start": "2024-01-10", "date_end": "2024-01-14"}}"""

    prompt = f"""<|im_start|>system
You are a helpful assistant with access to the following functions:

{serialize_function_to_json(get_historical_data_for_date)}

{serialize_function_to_json(get_historical_data_in_date_range)}

{serialize_function_to_json(get_future_data_for_date)}

{serialize_function_to_json(get_future_data_in_date_range)}

###INSTRUCTIONS:
- You need to choose one function to use and retrieve paramenters for this function from the user input.
- If the user query contains 'will', and specifies a single day or date, use get_future_data_in_date_range function
- If the user query contains 'will', and specifies a range of days or dates, use get_future_data_in_date_range function.
- If the user query is for future data, but only includes a single day or date, use the get_future_data_in_date_range function,
- If the user query contains 'today' or 'yesterday', use get_historical_data_for_date function.
- If the user query contains 'tomorrow', use get_future_data_in_date_range function.
- If the user query is for historical data, and specifies a range of days or dates, use use get_historical_data_for_date function.
- If the user says a day of the week, assume the date of that day is when that day next arrives.
- Do not include feature_view and model parameters.
- Provide dates STRICTLY in the YYYY-MM-DD format.
- Generate an 'No Function needed' string if the user query does not require function calling.

IMPORTANT: Today is {datetime.date.today().strftime("%A")}, {datetime.date.today()}.

To use one of there functions respond STRICTLY with:
<onefunctioncall>
    <functioncall> {fn} </functioncall>
</onefunctioncall>

###EXAMPLES

EXAMPLE 1:
- User: Hi!
- AI Assiatant: No Function needed.

EXAMPLE 2:
- User: Is this Air Quality level good or bad?
- AI Assiatant: No Function needed.

EXAMPLE 3:
- User: When and what was the minimum air quality from 2024-01-10 till 2024-01-14?
- AI Assistant:
<onefunctioncall>
    <functioncall> {example} </functioncall>
</onefunctioncall>
<|im_end|>

<|im_start|>user
{user_query}
<|im_end|>

<|im_start|>assistant"""

    return prompt


def generate_hermes(user_query: str, model_llm, tokenizer) -> str:
    """Retrieves a function name and extracts function parameters based on the user query."""

    prompt = get_function_calling_prompt(user_query)

    tokens = tokenizer(prompt, return_tensors="pt").to(model_llm.device)
    input_size = tokens.input_ids.numel()
    with torch.inference_mode():
        generated_tokens = model_llm.generate(
            **tokens,
            use_cache=True,
            do_sample=True,
            temperature=0.2,
            top_p=1.0,
            top_k=0,
            max_new_tokens=512,
            eos_token_id=tokenizer.eos_token_id,
            pad_token_id=tokenizer.eos_token_id,
        )

    return tokenizer.decode(
        generated_tokens.squeeze()[input_size:],
        skip_special_tokens=True,
    )


def function_calling_with_openai(user_query: str, client) -> str:
    """
    Generates a response using OpenAI's chat API.

    Args:
        user_query (str): The user's query or prompt.
        instructions (str): Instructions or context to provide to the GPT model.

    Returns:
        str: The generated response from the assistant.
    """

    instructions = get_function_calling_prompt(user_query).split('<|im_start|>user')[0]

    completion = client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "system", "content": instructions},
            {"role": "user", "content": user_query},
        ]
    )

    # Extract and return the assistant's reply from the response
    if completion and completion.choices:
        last_choice = completion.choices[0]
        if last_choice.message:
            return last_choice.message.content.strip()
    return ""


def extract_function_calls(completion: str) -> List[Dict[str, Any]]:
    """Extract function calls from completion."""
    completion = completion.strip()
    pattern = r"(<onefunctioncall>(.*?)</onefunctioncall>)"
    match = re.search(pattern, completion, re.DOTALL)
    if not match:
        return None

    multiplefn = match.group(1)
    root = ET.fromstring(multiplefn)
    functions = root.findall("functioncall")

    return [json.loads(fn.text) for fn in functions]


def invoke_function(function, feature_view, weather_fg, model) -> pd.DataFrame:
    """Invoke a function with given arguments."""
    # Extract function name and arguments from input_data
    function_name = function['name']
    arguments = function['arguments']

    # Using Python's getattr function to dynamically call the function by its name and passing the arguments
    function_output = getattr(sys.modules[__name__], function_name)(
        **arguments,
        feature_view=feature_view,
        weather_fg=weather_fg,
        model=model,
    )

    if type(function_output) == str:
        return function_output

    # Round the 'pm25' value to 2 decimal places
    function_output['pm25'] = function_output['pm25'].apply(round, ndigits=2)
    return function_output


def get_context_data(user_query: str, feature_view, weather_fg, model_air_quality, model_llm=None, tokenizer=None, client=None) -> str:
    """
    Retrieve context data based on user query.

    Args:
        user_query (str): The user query.
        feature_view: Feature View for data retrieval.
        model_air_quality: The air quality model.
        tokenizer: The tokenizer.

    Returns:
        str: The context data.
    """
    if client:
        # Generate a response using LLM
        completion = function_calling_with_openai(user_query, client)

    else:
        # Generate a response using LLM
        completion = generate_hermes(
            user_query,
            model_llm,
            tokenizer,
        )

    # Extract function calls from the completion
    functions = extract_function_calls(completion)

    # If function calls were found
    if functions:
        # Invoke the function with provided arguments
        data = invoke_function(functions[0], feature_view, weather_fg, model_air_quality)

        # Return formatted data as string
        if isinstance(data, pd.DataFrame):
            return f'Air Quality Measurements:\n' + '\n'.join(
                [f'Date: {row["date"]}; Air Quality: {row["pm25"]}' for _, row in data.iterrows()]
            )
        # Return message if data is not updated
        return data

    # If no function calls were found, return an empty string
    return ''