njwright92 commited on
Commit
22f17bb
1 Parent(s): fee3ab4

Update handler.py

Browse files
Files changed (1) hide show
  1. handler.py +29 -25
handler.py CHANGED
@@ -1,38 +1,39 @@
1
  from typing import Dict, List, Any
2
  from llama_cpp import Llama
 
3
 
4
  MAX_TOKENS = 8192
5
 
6
 
7
  class EndpointHandler():
8
  def __init__(self):
9
- # Initialize the model with your ComicBot configuration
10
  print("Initializing Llama model with ComicBot settings...")
11
  self.model = Llama.from_pretrained(
12
- "njwright92/ComicBot_v.2-gguf", filename="comic_mistral-v5.2.q5_0.gguf", n_ctx=8192)
13
  print("Model initialization complete.")
14
 
15
  def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
16
- # Extract arguments from the data
17
- print("Extracting arguments from the data payload...")
18
- args = data.get("args", {})
19
- print(f"Arguments extracted: {args}")
20
 
21
- # Define the formatting template
22
- fmat = "<startofturn>system\n{system_prompt} <endofturn>\n<startofturn>user\n{prompt} <endofturn>\n<startofturn>model"
23
-
24
- # Check if args is properly formatted
25
- if not args:
26
- print("No arguments found in the data payload.")
27
  return [{
28
- "status": "error",
29
- "message": "No arguments found in the data payload."
 
30
  }]
31
 
 
 
 
 
 
32
  try:
33
- fmat = fmat.format(system_prompt=args.get(
34
- "system_prompt", ""), prompt=args.get("inputs", ""))
35
- print(f"Formatted prompt: {fmat}")
36
  except Exception as e:
37
  print(f"Error in formatting the prompt: {str(e)}")
38
  return [{
@@ -45,21 +46,24 @@ class EndpointHandler():
45
  try:
46
  max_length = int(max_length)
47
  print(f"Max length set to: {max_length}")
48
- except Exception as e:
49
- print(f"Error converting max_length to int: {str(e)}")
50
  return [{
51
  "status": "error",
52
- "reason": "max_length was passed as something that was not a plain old int",
53
- "detail": str(e)
54
  }]
55
 
56
  print("Generating response from the model...")
57
- res = self.model(fmat, temperature=args.get("temperature", 1.0), top_p=args.get(
58
- "top_p", 0.9), top_k=args.get("top_k", 40), max_tokens=max_length)
 
 
 
 
59
  print(f"Model response: {res}")
60
 
61
  return [{
62
  "status": "success",
63
- "response": res
 
64
  }]
65
-
 
1
  from typing import Dict, List, Any
2
  from llama_cpp import Llama
3
+ import gemma_tools
4
 
5
  MAX_TOKENS = 8192
6
 
7
 
8
  class EndpointHandler():
9
  def __init__(self):
10
+ # Initialize the model with ComicBot configuration
11
  print("Initializing Llama model with ComicBot settings...")
12
  self.model = Llama.from_pretrained(
13
+ "njwright92/ComicBot_v.2-gguf", filename="comic_mistral-v5.2.q5_0.gguf", n_ctx=MAX_TOKENS)
14
  print("Model initialization complete.")
15
 
16
  def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
17
+ # Extract and validate arguments from the data
18
+ print("Extracting and validating arguments from the data payload...")
19
+ args_check = gemma_tools.get_args_or_none(
20
+ data) # Using the new function
21
 
22
+ if not args_check[0]: # If validation failed
 
 
 
 
 
23
  return [{
24
+ "status": args_check.get("status", "error"),
25
+ "reason": args_check.get("reason", "unknown"),
26
+ "description": args_check.get("description", "Validation error in arguments")
27
  }]
28
 
29
+ args = args_check # If validation passed, args are in args_check
30
+
31
+ # Define the formatting template
32
+ fmat = "<startofturn>system\n{system_prompt} <endofturn>\n<startofturn>user\n{inputs} <endofturn>\n<startofturn>model"
33
+
34
  try:
35
+ formatted_prompt = fmat.format(**args)
36
+ print(f"Formatted prompt: {formatted_prompt}")
 
37
  except Exception as e:
38
  print(f"Error in formatting the prompt: {str(e)}")
39
  return [{
 
46
  try:
47
  max_length = int(max_length)
48
  print(f"Max length set to: {max_length}")
49
+ except ValueError:
 
50
  return [{
51
  "status": "error",
52
+ "reason": "max_length must be an integer",
53
+ "detail": "max_length was not a valid integer"
54
  }]
55
 
56
  print("Generating response from the model...")
57
+ res = self.model(formatted_prompt,
58
+ temperature=args["temperature"],
59
+ top_p=args["top_p"],
60
+ top_k=args["top_k"],
61
+ max_tokens=max_length)
62
+
63
  print(f"Model response: {res}")
64
 
65
  return [{
66
  "status": "success",
67
+ # Assuming Llama's response format
68
+ "response": res['choices'][0]['text']
69
  }]