import os from typing import Dict, List, Any from llama_cpp import Llama import gemma_tools MAX_TOKENS = 5000 class EndpointHandler(): def __init__(self, model_dir=None): if model_dir: print(f"Initializing with model from directory: {model_dir}") # For Hugging Face endpoints, you might not need to explicitly load the model if it's already linked # But if you need to initialize it specifically: print("Initializing Llama model directly from Hugging Face repository...") self.model = Llama.from_pretrained( model_id="njwright92/ComicBot_v.2-gguf", # Use model_id instead of filename for repo reference n_ctx=MAX_TOKENS chat_format="llama-2" ) print("Model initialization complete.") def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]: # Extract and validate arguments from the data print("Extracting and validating arguments from the data payload...") args_check = gemma_tools.get_args_or_none( data) # Using the new function if not args_check[0]: # If validation failed return [{ "status": args_check.get("status", "error"), "reason": args_check.get("reason", "unknown"), "description": args_check.get("description", "Validation error in arguments") }] args = args_check # If validation passed, args are in args_check # Define the formatting template fmat = "system\n{system_prompt} \nuser\n{inputs} \nmodel" try: formatted_prompt = fmat.format(**args) print(f"Formatted prompt: {formatted_prompt}") except Exception as e: print(f"Error in formatting the prompt: {str(e)}") return [{ "status": "error", "reason": "Invalid format", "detail": str(e) }] max_length = data.get("max_length", 512) try: max_length = int(max_length) print(f"Max length set to: {max_length}") except ValueError: return [{ "status": "error", "reason": "max_length must be an integer", "detail": "max_length was not a valid integer" }] print("Generating response from the model...") res = self.model(formatted_prompt, temperature=args["temperature"], top_p=args["top_p"], top_k=args["top_k"], max_tokens=max_length) print(f"Model response: {res}") return [{ "status": "success", # Assuming Llama's response format "response": res['choices'][0]['text'] }]