Spaces:
Running
on
Zero
Running
on
Zero
from transformers import AutoTokenizer, AutoModelForCausalLM | |
from string import Template | |
import torch | |
FINETUNE_INST = "You are evaluating errors in a model-generated output for a(an) ${task} task." | |
FINETUNE_INPUT = """\ | |
Task instruction: ${generation_instruction} | |
Source: ${input_context} | |
Model-generated Output: ${hypothesis_output} | |
Based on the given task instruction and source, identify errors in this model-generated output. | |
For each error you give in the response, please also elaborate the following information: | |
- error location (the words that are wrong in the output) | |
- error aspect it belongs to. | |
- explanation why it's an error, and the correction suggestions. | |
- severity of the error ("Major" or "Minor"). | |
- reduction of score (between 0.5 and 5 given the severity of the error) | |
Your evaluation output: | |
""" | |
TIGERScore_model_map = { | |
"7b": "TIGER-Lab/TIGERScore-7B-V1.0", | |
"13b": "TIGER-Lab/TIGERScore-13B-V1.0", | |
} | |
tigerscore_model = None | |
tigerscore_tokenizer = None | |
tasks = [ | |
"translation", | |
"summarization", | |
"data2text", | |
"mathQA", | |
"long-form QA", | |
"instruction-following", | |
] | |
def load_tigerscore(model_size): | |
assert model_size in TIGERScore_model_map | |
model_name = TIGERScore_model_map[model_size] | |
global tigerscore_model, tigerscore_tokenizer | |
tigerscore_model = AutoModelForCausalLM.from_pretrained( | |
model_name, | |
torch_dtype=torch.bfloat16, | |
device_map="auto" | |
) | |
tigerscore_tokenizer = AutoTokenizer.from_pretrained( | |
model_name, | |
use_fast=True | |
) | |
def generate(task, input_context, generation_instruction, hypo_output, **generate_kwargs): | |
inst_part = Template(FINETUNE_INST) | |
inst_part = inst_part.substitute(task=task) | |
input_part = Template(FINETUNE_INPUT) | |
input_part = input_part.substitute( | |
generation_instruction=generation_instruction, | |
input_context=input_context, | |
hypothesis_output=hypo_output | |
) | |
prompt = (inst_part + "\n" + input_part).strip("\n ") + "\n" | |
encodings = tigerscore_tokenizer(prompt, return_tensors="pt") | |
input_ids = encodings["input_ids"].to(tigerscore_model.device) | |
attention_mask = encodings["attention_mask"].to(tigerscore_model.device) | |
gen_params = { | |
"input_ids": input_ids, | |
"attention_mask": attention_mask, | |
"max_new_tokens": 512, | |
"do_sample": True, | |
"top_k": 1, | |
"num_return_sequences": 1, | |
} | |
gen_params.update(generate_kwargs) | |
output = tigerscore_model.generate(**gen_params) | |
output = tigerscore_tokenizer.decode(output[0][len(input_ids[0]):], skip_special_tokens=True) | |
return output | |
if __name__ == "__main__": | |
task = "translation" | |
input_context = "Der künftige EM-Cheforganisator Philipp Lahm soll laut Grindel im DFB-Präsidium mitarbeiten." | |
generation_instruction = "Translate the following text from German to English." | |
hypo_output = "According to Grindel, the future head of the European Championships, Philipp Lahm, is to participate in the DFB Presidency." | |
output = generate(task, input_context, generation_instruction, hypo_output) | |
print(output) | |