future-xy
commited on
Commit
•
85e30d4
1
Parent(s):
f0ad559
fix generation bugs
Browse files
src/backend/huggingface_generate_until.py
CHANGED
@@ -28,7 +28,10 @@ class HFLMwithChatTemplate(HFLMWithMeasurement):
|
|
28 |
messages = [
|
29 |
{"role": "user", "content": f"{input_string}"},
|
30 |
]
|
31 |
-
|
|
|
|
|
|
|
32 |
updated_strings.append(updated_string)
|
33 |
strings = updated_strings[:]
|
34 |
except:
|
|
|
28 |
messages = [
|
29 |
{"role": "user", "content": f"{input_string}"},
|
30 |
]
|
31 |
+
if "dbrx-instruct" in self.model.name_or_path:
|
32 |
+
updated_string = self.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
|
33 |
+
else:
|
34 |
+
updated_string = self.tokenizer.apply_chat_template(messages, tokenize=False)
|
35 |
updated_strings.append(updated_string)
|
36 |
strings = updated_strings[:]
|
37 |
except:
|
src/backend/manage_requests.py
CHANGED
@@ -42,6 +42,9 @@ class EvalRequest:
|
|
42 |
# A GPTQ model does not need dtype to be specified,
|
43 |
# it will be inferred from the config
|
44 |
pass
|
|
|
|
|
|
|
45 |
else:
|
46 |
raise Exception(f"Unknown precision {self.precision}.")
|
47 |
return model_args
|
|
|
42 |
# A GPTQ model does not need dtype to be specified,
|
43 |
# it will be inferred from the config
|
44 |
pass
|
45 |
+
elif self.precision == "8bit":
|
46 |
+
model_args += ",load_in_8bit=True"
|
47 |
+
model_args += ",trust_remote_code=True"
|
48 |
else:
|
49 |
raise Exception(f"Unknown precision {self.precision}.")
|
50 |
return model_args
|
src/backend/run_eval_suite.py
CHANGED
@@ -48,7 +48,7 @@ def run_evaluation(
|
|
48 |
)
|
49 |
# hf-chat is implemented to use apply_chat_template
|
50 |
results = evaluator.simple_evaluate(
|
51 |
-
model=eval_request.inference_framework, # "hf-
|
52 |
model_args=eval_request.get_model_args(),
|
53 |
tasks=task_names,
|
54 |
num_fewshot=num_fewshot,
|
|
|
48 |
)
|
49 |
# hf-chat is implemented to use apply_chat_template
|
50 |
results = evaluator.simple_evaluate(
|
51 |
+
model=eval_request.inference_framework, # "hf-chat", "moe-infinity"
|
52 |
model_args=eval_request.get_model_args(),
|
53 |
tasks=task_names,
|
54 |
num_fewshot=num_fewshot,
|
src/backend/tasks/selfcheckgpt/task.py
CHANGED
@@ -23,13 +23,14 @@ class SelfCheckGPT(ConfigurableTask):
|
|
23 |
def __init__(self):
|
24 |
super().__init__(config={"metadata": {"version": self.VERSION}})
|
25 |
# these end tokens are hard coded because of the current limitaion of the llm-eval.
|
26 |
-
self.generation_kwargs = {"until": ["\n\n", "<unk>", "<|im_end|>", "</s>", "<|endoftext|>"], "max_length": 512}
|
|
|
27 |
self.generation_kwargs_sampling_number = 5 # the number of sampling for self-consistence
|
28 |
self.generation_kwargs_sampling = {
|
29 |
"temperature": 0.99,
|
30 |
"do_sample": True,
|
31 |
-
"until": ["
|
32 |
-
"max_length":
|
33 |
}
|
34 |
|
35 |
self.selfcheckgpt_type = os.environ.get("SELFCHECKGPTTYPE", "SelfCheckNLI")
|
|
|
23 |
def __init__(self):
|
24 |
super().__init__(config={"metadata": {"version": self.VERSION}})
|
25 |
# these end tokens are hard coded because of the current limitaion of the llm-eval.
|
26 |
+
# self.generation_kwargs = {"until": ["\n\n", "<unk>", "<|im_end|>", "</s>", "<|endoftext|>"], "max_length": 512}
|
27 |
+
self.generation_kwargs = {"until": ["<im_end>"], "max_length": 1024}
|
28 |
self.generation_kwargs_sampling_number = 5 # the number of sampling for self-consistence
|
29 |
self.generation_kwargs_sampling = {
|
30 |
"temperature": 0.99,
|
31 |
"do_sample": True,
|
32 |
+
"until": ["<im_end>", "</s>"],
|
33 |
+
"max_length": 1024,
|
34 |
}
|
35 |
|
36 |
self.selfcheckgpt_type = os.environ.get("SELFCHECKGPTTYPE", "SelfCheckNLI")
|