Spaces:

CineAI
/

Chelsea

Sleeping

File size: 4,578 Bytes

import time

from llm.apimodels.gemini_model import Gemini
from llm.apimodels.hf_model import HF_Mistaril, HF_TinyLlama, HF_SmolLM135, HF_SmolLM360, HF_SmolLM, HF_Gemma2, HF_Qwen2

from typing import Optional, Any

from langchain.chains.conversation.memory import ConversationBufferWindowMemory
from langchain.chains import ConversationChain

def prettify(raw_text: str) -> str:        
    pretty = raw_text.replace("**", "")
    return pretty.strip()


memory: ConversationBufferWindowMemory = ConversationBufferWindowMemory(k=3, ai_prefix="Chelsea")

DELAY: int = 300  # 5 minutes

def has_failed(conversation, prompt) -> Optional[str]:
    """
    Checks if the LLM conversation prediction fails and returns None if so.

    Args:
        conversation: The LLM conversation object used for prediction.
        prompt: The prompt to be used for prediction.

    Returns:
        None, otherwise the prettified response.
    """

    try:
        response = conversation.predict(input=prompt)
        print(f"response: {response}")
        result = prettify(raw_text=response)
        return result
    except Exception as e:
        print(f"Error during prediction with conversation in has_failed function: {e}")
        return None


def has_delay(conversation, prompt) -> Optional[str]:
    """
    Checks if the LLM conversation prediction takes longer than a set delay.

    Args:
        conversation: The LLM conversation object used for prediction.
        prompt: The prompt to be used for prediction.

    Returns:
        None if the execution time exceeds the delay,
        otherwise, the prettified response from the conversation object.
    """

    start_time = time.perf_counter()  # Start timer before prediction
    try:
        response = conversation.predict(input=prompt)
        execution_time = time.perf_counter() - start_time  # Calculate execution time

        if execution_time > DELAY: 
            return None  # Return None if delayed
        
        result = prettify(raw_text=response)  # Prettify the response
        return result  # Return the prettified response

    except Exception as e:
        print(f"Error during prediction with conversation in has_delay function: {e}")


class Conversation:
    def __init__(self):
        """
        Initializes the Conversation class with a prompt and a list of LLM model classes.

        Args:
            model_classes (list, optional): A list of LLM model classes to try in sequence.
                Defaults to [Gemini, HF_SmolLM135, HF_SmolLM360, HF_TinyLlama, HF_SmolLM, HF_Gemma2, HF_Mistaril, HF_Qwen2].
        """

        self.model_classes = [Gemini, HF_Gemma2, HF_SmolLM, HF_SmolLM360, HF_Mistaril, HF_Qwen2, HF_TinyLlama, HF_SmolLM135]
        self.current_model_index = 0

    def _get_conversation(self) -> Any:
        """
        Creates a ConversationChain object using the current model class.
        """
        try:
            current_model_class = self.model_classes[self.current_model_index]
            print("current model class is: ", current_model_class)
            return ConversationChain(llm=current_model_class().execution(), memory=memory, return_final_only=True)
        except Exception as e:
            print(f"Error during conversation chain in get_conversation function: {e}")

    def chatting(self, prompt: str) -> str:
        """
        Carries out the conversation with the user, handling errors and delays.

        Args: 
            prompt(str): The prompt to be used for prediction.

        Returns:
            str: The final conversation response or None if all models fail.
        """

        if prompt is None or prompt == "":
            raise Exception(f"Prompt must be string not None or empty string: {prompt}")

        while self.current_model_index < len(self.model_classes):
            conversation = self._get_conversation()

            result = has_failed(conversation=conversation, prompt=prompt)
            if result is not None:
                return result
            print(f"chat - chatting result : {result}")

            result = has_delay(conversation=conversation, prompt=prompt)
            if result is None:
                self.current_model_index += 1  # Switch to next model after delay
                continue

            return result

        return "All models failed conversation. Please, try again"
    
    def __str__(self) -> str:
        return f"prompt: {type(self.prompt)}"
    
    def __repr__(self) -> str:
        return f"{self.__class__.__name__}(prompt: {type(self.prompt)})"