Spaces:
Running
on
Zero
Running
on
Zero
Martín Santillán Cooper
commited on
Commit
•
2cb730a
1
Parent(s):
d33d1ff
Log model runtime in seconds
Browse files
model.py
CHANGED
@@ -3,13 +3,20 @@ from time import time, sleep
|
|
3 |
from logger import logger
|
4 |
import math
|
5 |
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
safe_token = "No"
|
10 |
unsafe_token = "Yes"
|
11 |
nlogprobs = 5
|
12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
def parse_output(output):
|
14 |
label, prob = None, None
|
15 |
|
@@ -46,17 +53,6 @@ def get_probablities(logprobs):
|
|
46 |
|
47 |
return probabilities
|
48 |
|
49 |
-
|
50 |
-
mock_model_call = os.getenv('MOCK_MODEL_CALL') == 'true'
|
51 |
-
if not mock_model_call:
|
52 |
-
import torch
|
53 |
-
from vllm import LLM, SamplingParams
|
54 |
-
from transformers import AutoTokenizer
|
55 |
-
model_path = os.getenv('MODEL_PATH')#"granite-guardian-3b-pipecleaner-r241024a"
|
56 |
-
sampling_params = SamplingParams(temperature=0.0, logprobs=nlogprobs)
|
57 |
-
model = LLM(model=model_path, tensor_parallel_size=1)
|
58 |
-
tokenizer = AutoTokenizer.from_pretrained(model_path)
|
59 |
-
|
60 |
def generate_text(prompt):
|
61 |
logger.debug(f'Prompts content is: \n{prompt["content"]}')
|
62 |
mock_model_call = os.getenv('MOCK_MODEL_CALL') == 'true'
|
@@ -72,8 +68,7 @@ def generate_text(prompt):
|
|
72 |
with torch.no_grad():
|
73 |
output = model.generate(tokenized_chat, sampling_params, use_tqdm=False)
|
74 |
|
75 |
-
|
76 |
-
predicted_label = output[0].outputs[0].text.strip()
|
77 |
|
78 |
label, prob_of_risk = parse_output(output[0])
|
79 |
|
@@ -82,6 +77,6 @@ def generate_text(prompt):
|
|
82 |
|
83 |
end = time()
|
84 |
total = end - start
|
85 |
-
logger.debug(f'
|
86 |
|
87 |
return {'assessment': label, 'certainty': prob_of_risk}
|
|
|
3 |
from logger import logger
|
4 |
import math
|
5 |
|
|
|
|
|
|
|
6 |
safe_token = "No"
|
7 |
unsafe_token = "Yes"
|
8 |
nlogprobs = 5
|
9 |
|
10 |
+
mock_model_call = os.getenv('MOCK_MODEL_CALL') == 'true'
|
11 |
+
if not mock_model_call:
|
12 |
+
import torch
|
13 |
+
from vllm import LLM, SamplingParams
|
14 |
+
from transformers import AutoTokenizer
|
15 |
+
model_path = os.getenv('MODEL_PATH')#"granite-guardian-3b-pipecleaner-r241024a"
|
16 |
+
sampling_params = SamplingParams(temperature=0.0, logprobs=nlogprobs)
|
17 |
+
model = LLM(model=model_path, tensor_parallel_size=1)
|
18 |
+
tokenizer = AutoTokenizer.from_pretrained(model_path)
|
19 |
+
|
20 |
def parse_output(output):
|
21 |
label, prob = None, None
|
22 |
|
|
|
53 |
|
54 |
return probabilities
|
55 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
56 |
def generate_text(prompt):
|
57 |
logger.debug(f'Prompts content is: \n{prompt["content"]}')
|
58 |
mock_model_call = os.getenv('MOCK_MODEL_CALL') == 'true'
|
|
|
68 |
with torch.no_grad():
|
69 |
output = model.generate(tokenized_chat, sampling_params, use_tqdm=False)
|
70 |
|
71 |
+
# predicted_label = output[0].outputs[0].text.strip()
|
|
|
72 |
|
73 |
label, prob_of_risk = parse_output(output[0])
|
74 |
|
|
|
77 |
|
78 |
end = time()
|
79 |
total = end - start
|
80 |
+
logger.debug(f'The evaluation took {total} secs')
|
81 |
|
82 |
return {'assessment': label, 'certainty': prob_of_risk}
|