njwright92
commited on
Commit
•
22f17bb
1
Parent(s):
fee3ab4
Update handler.py
Browse files- handler.py +29 -25
handler.py
CHANGED
@@ -1,38 +1,39 @@
|
|
1 |
from typing import Dict, List, Any
|
2 |
from llama_cpp import Llama
|
|
|
3 |
|
4 |
MAX_TOKENS = 8192
|
5 |
|
6 |
|
7 |
class EndpointHandler():
|
8 |
def __init__(self):
|
9 |
-
# Initialize the model with
|
10 |
print("Initializing Llama model with ComicBot settings...")
|
11 |
self.model = Llama.from_pretrained(
|
12 |
-
"njwright92/ComicBot_v.2-gguf", filename="comic_mistral-v5.2.q5_0.gguf", n_ctx=
|
13 |
print("Model initialization complete.")
|
14 |
|
15 |
def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
|
16 |
-
# Extract arguments from the data
|
17 |
-
print("Extracting arguments from the data payload...")
|
18 |
-
|
19 |
-
|
20 |
|
21 |
-
#
|
22 |
-
fmat = "<startofturn>system\n{system_prompt} <endofturn>\n<startofturn>user\n{prompt} <endofturn>\n<startofturn>model"
|
23 |
-
|
24 |
-
# Check if args is properly formatted
|
25 |
-
if not args:
|
26 |
-
print("No arguments found in the data payload.")
|
27 |
return [{
|
28 |
-
"status": "error",
|
29 |
-
"
|
|
|
30 |
}]
|
31 |
|
|
|
|
|
|
|
|
|
|
|
32 |
try:
|
33 |
-
|
34 |
-
|
35 |
-
print(f"Formatted prompt: {fmat}")
|
36 |
except Exception as e:
|
37 |
print(f"Error in formatting the prompt: {str(e)}")
|
38 |
return [{
|
@@ -45,21 +46,24 @@ class EndpointHandler():
|
|
45 |
try:
|
46 |
max_length = int(max_length)
|
47 |
print(f"Max length set to: {max_length}")
|
48 |
-
except
|
49 |
-
print(f"Error converting max_length to int: {str(e)}")
|
50 |
return [{
|
51 |
"status": "error",
|
52 |
-
"reason": "max_length
|
53 |
-
"detail":
|
54 |
}]
|
55 |
|
56 |
print("Generating response from the model...")
|
57 |
-
res = self.model(
|
58 |
-
|
|
|
|
|
|
|
|
|
59 |
print(f"Model response: {res}")
|
60 |
|
61 |
return [{
|
62 |
"status": "success",
|
63 |
-
|
|
|
64 |
}]
|
65 |
-
|
|
|
1 |
from typing import Dict, List, Any
|
2 |
from llama_cpp import Llama
|
3 |
+
import gemma_tools
|
4 |
|
5 |
MAX_TOKENS = 8192
|
6 |
|
7 |
|
8 |
class EndpointHandler():
|
9 |
def __init__(self):
|
10 |
+
# Initialize the model with ComicBot configuration
|
11 |
print("Initializing Llama model with ComicBot settings...")
|
12 |
self.model = Llama.from_pretrained(
|
13 |
+
"njwright92/ComicBot_v.2-gguf", filename="comic_mistral-v5.2.q5_0.gguf", n_ctx=MAX_TOKENS)
|
14 |
print("Model initialization complete.")
|
15 |
|
16 |
def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
|
17 |
+
# Extract and validate arguments from the data
|
18 |
+
print("Extracting and validating arguments from the data payload...")
|
19 |
+
args_check = gemma_tools.get_args_or_none(
|
20 |
+
data) # Using the new function
|
21 |
|
22 |
+
if not args_check[0]: # If validation failed
|
|
|
|
|
|
|
|
|
|
|
23 |
return [{
|
24 |
+
"status": args_check.get("status", "error"),
|
25 |
+
"reason": args_check.get("reason", "unknown"),
|
26 |
+
"description": args_check.get("description", "Validation error in arguments")
|
27 |
}]
|
28 |
|
29 |
+
args = args_check # If validation passed, args are in args_check
|
30 |
+
|
31 |
+
# Define the formatting template
|
32 |
+
fmat = "<startofturn>system\n{system_prompt} <endofturn>\n<startofturn>user\n{inputs} <endofturn>\n<startofturn>model"
|
33 |
+
|
34 |
try:
|
35 |
+
formatted_prompt = fmat.format(**args)
|
36 |
+
print(f"Formatted prompt: {formatted_prompt}")
|
|
|
37 |
except Exception as e:
|
38 |
print(f"Error in formatting the prompt: {str(e)}")
|
39 |
return [{
|
|
|
46 |
try:
|
47 |
max_length = int(max_length)
|
48 |
print(f"Max length set to: {max_length}")
|
49 |
+
except ValueError:
|
|
|
50 |
return [{
|
51 |
"status": "error",
|
52 |
+
"reason": "max_length must be an integer",
|
53 |
+
"detail": "max_length was not a valid integer"
|
54 |
}]
|
55 |
|
56 |
print("Generating response from the model...")
|
57 |
+
res = self.model(formatted_prompt,
|
58 |
+
temperature=args["temperature"],
|
59 |
+
top_p=args["top_p"],
|
60 |
+
top_k=args["top_k"],
|
61 |
+
max_tokens=max_length)
|
62 |
+
|
63 |
print(f"Model response: {res}")
|
64 |
|
65 |
return [{
|
66 |
"status": "success",
|
67 |
+
# Assuming Llama's response format
|
68 |
+
"response": res['choices'][0]['text']
|
69 |
}]
|
|