njwright92
/

ComicBot_v.2-gguf

Text Generation

Inference Endpoints

Model card Files Files and versions Community

ComicBot_v.2-gguf / handler.py

njwright92's picture

Update handler.py

2f5dbb7 verified 3 months ago

2.9 kB

	import os
	from typing import Dict, List, Any
	from llama_cpp import Llama
	import gemma_tools

	MAX_TOKENS = 5000

	class EndpointHandler():
	def __init__(self, model_dir=None):
	if model_dir:
	print(f"Initializing with model from directory: {model_dir}")

	# For Hugging Face endpoints, you might not need to explicitly load the model if it's already linked
	# But if you need to initialize it specifically:
	print("Initializing Llama model directly from Hugging Face repository...")
	self.model = Llama.from_pretrained(
	model_id="njwright92/ComicBot_v.2-gguf", # Use model_id instead of filename for repo reference
	n_ctx=MAX_TOKENS
	chat_format="llama-2"
	)
	print("Model initialization complete.")

	def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
	# Extract and validate arguments from the data
	print("Extracting and validating arguments from the data payload...")
	args_check = gemma_tools.get_args_or_none(
	data) # Using the new function

	if not args_check[0]: # If validation failed
	return [{
	"status": args_check.get("status", "error"),
	"reason": args_check.get("reason", "unknown"),
	"description": args_check.get("description", "Validation error in arguments")
	}]

	args = args_check # If validation passed, args are in args_check

	# Define the formatting template
	fmat = "<startofturn>system\n{system_prompt} <endofturn>\n<startofturn>user\n{inputs} <endofturn>\n<startofturn>model"

	try:
	formatted_prompt = fmat.format(**args)
	print(f"Formatted prompt: {formatted_prompt}")
	except Exception as e:
	print(f"Error in formatting the prompt: {str(e)}")
	return [{
	"status": "error",
	"reason": "Invalid format",
	"detail": str(e)
	}]

	max_length = data.get("max_length", 512)
	try:
	max_length = int(max_length)
	print(f"Max length set to: {max_length}")
	except ValueError:
	return [{
	"status": "error",
	"reason": "max_length must be an integer",
	"detail": "max_length was not a valid integer"
	}]

	print("Generating response from the model...")
	res = self.model(formatted_prompt,
	temperature=args["temperature"],
	top_p=args["top_p"],
	top_k=args["top_k"],
	max_tokens=max_length)

	print(f"Model response: {res}")

	return [{
	"status": "success",
	# Assuming Llama's response format
	"response": res['choices'][0]['text']
	}]