ComicBot_v.2-gguf / handler.py
njwright92's picture
Update handler.py
2f5dbb7 verified
raw
history blame
2.9 kB
import os
from typing import Dict, List, Any
from llama_cpp import Llama
import gemma_tools
MAX_TOKENS = 5000
class EndpointHandler():
def __init__(self, model_dir=None):
if model_dir:
print(f"Initializing with model from directory: {model_dir}")
# For Hugging Face endpoints, you might not need to explicitly load the model if it's already linked
# But if you need to initialize it specifically:
print("Initializing Llama model directly from Hugging Face repository...")
self.model = Llama.from_pretrained(
model_id="njwright92/ComicBot_v.2-gguf", # Use model_id instead of filename for repo reference
n_ctx=MAX_TOKENS
chat_format="llama-2"
)
print("Model initialization complete.")
def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
# Extract and validate arguments from the data
print("Extracting and validating arguments from the data payload...")
args_check = gemma_tools.get_args_or_none(
data) # Using the new function
if not args_check[0]: # If validation failed
return [{
"status": args_check.get("status", "error"),
"reason": args_check.get("reason", "unknown"),
"description": args_check.get("description", "Validation error in arguments")
}]
args = args_check # If validation passed, args are in args_check
# Define the formatting template
fmat = "<startofturn>system\n{system_prompt} <endofturn>\n<startofturn>user\n{inputs} <endofturn>\n<startofturn>model"
try:
formatted_prompt = fmat.format(**args)
print(f"Formatted prompt: {formatted_prompt}")
except Exception as e:
print(f"Error in formatting the prompt: {str(e)}")
return [{
"status": "error",
"reason": "Invalid format",
"detail": str(e)
}]
max_length = data.get("max_length", 512)
try:
max_length = int(max_length)
print(f"Max length set to: {max_length}")
except ValueError:
return [{
"status": "error",
"reason": "max_length must be an integer",
"detail": "max_length was not a valid integer"
}]
print("Generating response from the model...")
res = self.model(formatted_prompt,
temperature=args["temperature"],
top_p=args["top_p"],
top_k=args["top_k"],
max_tokens=max_length)
print(f"Model response: {res}")
return [{
"status": "success",
# Assuming Llama's response format
"response": res['choices'][0]['text']
}]