|
import os |
|
from typing import Dict, List, Any |
|
from llama_cpp import Llama |
|
import gemma_tools |
|
|
|
MAX_TOKENS = 5000 |
|
|
|
class EndpointHandler(): |
|
def __init__(self, model_dir=None): |
|
if model_dir: |
|
print(f"Initializing with model from directory: {model_dir}") |
|
|
|
|
|
|
|
print("Initializing Llama model directly from Hugging Face repository...") |
|
self.model = Llama.from_pretrained( |
|
model_id="njwright92/ComicBot_v.2-gguf", |
|
n_ctx=MAX_TOKENS |
|
chat_format="llama-2" |
|
) |
|
print("Model initialization complete.") |
|
|
|
def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]: |
|
|
|
print("Extracting and validating arguments from the data payload...") |
|
args_check = gemma_tools.get_args_or_none( |
|
data) |
|
|
|
if not args_check[0]: |
|
return [{ |
|
"status": args_check.get("status", "error"), |
|
"reason": args_check.get("reason", "unknown"), |
|
"description": args_check.get("description", "Validation error in arguments") |
|
}] |
|
|
|
args = args_check |
|
|
|
|
|
fmat = "<startofturn>system\n{system_prompt} <endofturn>\n<startofturn>user\n{inputs} <endofturn>\n<startofturn>model" |
|
|
|
try: |
|
formatted_prompt = fmat.format(**args) |
|
print(f"Formatted prompt: {formatted_prompt}") |
|
except Exception as e: |
|
print(f"Error in formatting the prompt: {str(e)}") |
|
return [{ |
|
"status": "error", |
|
"reason": "Invalid format", |
|
"detail": str(e) |
|
}] |
|
|
|
max_length = data.get("max_length", 512) |
|
try: |
|
max_length = int(max_length) |
|
print(f"Max length set to: {max_length}") |
|
except ValueError: |
|
return [{ |
|
"status": "error", |
|
"reason": "max_length must be an integer", |
|
"detail": "max_length was not a valid integer" |
|
}] |
|
|
|
print("Generating response from the model...") |
|
res = self.model(formatted_prompt, |
|
temperature=args["temperature"], |
|
top_p=args["top_p"], |
|
top_k=args["top_k"], |
|
max_tokens=max_length) |
|
|
|
print(f"Model response: {res}") |
|
|
|
return [{ |
|
"status": "success", |
|
|
|
"response": res['choices'][0]['text'] |
|
}] |
|
|