Spaces:

sparse-generative-ai
/

open-moe-llm-leaderboard

Running

open-moe-llm-leaderboard / src /backend /huggingface_generate_until.py

future-xy

fix generation bugs

85e30d4 9 months ago

2.42 kB

	from typing import List, Literal, Optional, Tuple, Union
	import torch
	import transformers

	from lm_eval.api.registry import register_model

	from src.backend.hflm_with_measurement import HFLMWithMeasurement


	@register_model("hf-chat")
	class HFLMwithChatTemplate(HFLMWithMeasurement):
	def __init__(self, use_chat_template=True, **kwargs):
	super().__init__(**kwargs)
	self.use_chat_template = use_chat_template

	def tok_batch_encode(
	self,
	strings: List[str],
	padding_side: str = "left",
	left_truncate_len: int = None,
	truncation: bool = False,
	) -> Tuple[torch.Tensor, torch.Tensor]:

	if self.use_chat_template:
	try:
	updated_strings = []
	for input_string in strings:
	messages = [
	{"role": "user", "content": f"{input_string}"},
	]
	if "dbrx-instruct" in self.model.name_or_path:
	updated_string = self.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
	else:
	updated_string = self.tokenizer.apply_chat_template(messages, tokenize=False)
	updated_strings.append(updated_string)
	strings = updated_strings[:]
	except:
	print(f"failed to update input string with chat template: {self._model}")
	# encode a batch of strings. converts to tensors and pads automatically, unlike tok_encode.
	old_padding_side = self.tokenizer.padding_side
	self.tokenizer.padding_side = padding_side

	if self.AUTO_MODEL_CLASS == transformers.AutoModelForCausalLM:
	add_special_tokens = False
	elif self.AUTO_MODEL_CLASS == transformers.AutoModelForSeq2SeqLM:
	add_special_tokens = True

	encoding = self.tokenizer(
	strings,
	truncation=truncation,
	padding="longest",
	return_tensors="pt",
	add_special_tokens=add_special_tokens,
	)
	if left_truncate_len:
	encoding["input_ids"] = encoding["input_ids"][:, -left_truncate_len:]
	encoding["attention_mask"] = encoding["attention_mask"][:, -left_truncate_len:]
	self.tokenizer.padding_side = old_padding_side

	return encoding["input_ids"], encoding["attention_mask"]