Spaces:
Running
Running
Update Space (evaluate main: 9f0f888e)
Browse files- perplexity.py +7 -5
- requirements.txt +1 -1
perplexity.py
CHANGED
@@ -100,7 +100,9 @@ class Perplexity(evaluate.Metric):
|
|
100 |
reference_urls=["https://huggingface.co/docs/transformers/perplexity"],
|
101 |
)
|
102 |
|
103 |
-
def _compute(
|
|
|
|
|
104 |
|
105 |
if device is not None:
|
106 |
assert device in ["gpu", "cpu", "cuda"], "device should be either gpu or cpu."
|
@@ -126,20 +128,20 @@ class Perplexity(evaluate.Metric):
|
|
126 |
# assign one of the special tokens to also be the pad token
|
127 |
tokenizer.add_special_tokens({"pad_token": existing_special_tokens[0]})
|
128 |
|
129 |
-
if add_start_token:
|
130 |
# leave room for <BOS> token to be added:
|
131 |
assert (
|
132 |
tokenizer.bos_token is not None
|
133 |
), "Input model must already have a BOS token if using add_start_token=True. Please use a different model, or set add_start_token=False"
|
134 |
-
max_tokenized_len =
|
135 |
else:
|
136 |
-
max_tokenized_len =
|
137 |
|
138 |
encodings = tokenizer(
|
139 |
predictions,
|
140 |
add_special_tokens=False,
|
141 |
padding=True,
|
142 |
-
truncation=True,
|
143 |
max_length=max_tokenized_len,
|
144 |
return_tensors="pt",
|
145 |
return_attention_mask=True,
|
|
|
100 |
reference_urls=["https://huggingface.co/docs/transformers/perplexity"],
|
101 |
)
|
102 |
|
103 |
+
def _compute(
|
104 |
+
self, predictions, model_id, batch_size: int = 16, add_start_token: bool = True, device=None, max_length=None
|
105 |
+
):
|
106 |
|
107 |
if device is not None:
|
108 |
assert device in ["gpu", "cpu", "cuda"], "device should be either gpu or cpu."
|
|
|
128 |
# assign one of the special tokens to also be the pad token
|
129 |
tokenizer.add_special_tokens({"pad_token": existing_special_tokens[0]})
|
130 |
|
131 |
+
if add_start_token and max_length:
|
132 |
# leave room for <BOS> token to be added:
|
133 |
assert (
|
134 |
tokenizer.bos_token is not None
|
135 |
), "Input model must already have a BOS token if using add_start_token=True. Please use a different model, or set add_start_token=False"
|
136 |
+
max_tokenized_len = max_length - 1
|
137 |
else:
|
138 |
+
max_tokenized_len = max_length
|
139 |
|
140 |
encodings = tokenizer(
|
141 |
predictions,
|
142 |
add_special_tokens=False,
|
143 |
padding=True,
|
144 |
+
truncation=True if max_tokenized_len else False,
|
145 |
max_length=max_tokenized_len,
|
146 |
return_tensors="pt",
|
147 |
return_attention_mask=True,
|
requirements.txt
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
git+https://github.com/huggingface/evaluate@
|
2 |
torch
|
3 |
torch
|
4 |
transformers
|
|
|
1 |
+
git+https://github.com/huggingface/evaluate@9f0f888eb455bc0952f467b1cab47716e3f04e83
|
2 |
torch
|
3 |
torch
|
4 |
transformers
|