lvwerra HF staff commited on
Commit
0b4bc5a
1 Parent(s): 764f24b

Update Space (evaluate main: 9f0f888e)

Browse files
Files changed (2) hide show
  1. perplexity.py +7 -5
  2. requirements.txt +1 -1
perplexity.py CHANGED
@@ -100,7 +100,9 @@ class Perplexity(evaluate.Metric):
100
  reference_urls=["https://huggingface.co/docs/transformers/perplexity"],
101
  )
102
 
103
- def _compute(self, predictions, model_id, batch_size: int = 16, add_start_token: bool = True, device=None):
 
 
104
 
105
  if device is not None:
106
  assert device in ["gpu", "cpu", "cuda"], "device should be either gpu or cpu."
@@ -126,20 +128,20 @@ class Perplexity(evaluate.Metric):
126
  # assign one of the special tokens to also be the pad token
127
  tokenizer.add_special_tokens({"pad_token": existing_special_tokens[0]})
128
 
129
- if add_start_token:
130
  # leave room for <BOS> token to be added:
131
  assert (
132
  tokenizer.bos_token is not None
133
  ), "Input model must already have a BOS token if using add_start_token=True. Please use a different model, or set add_start_token=False"
134
- max_tokenized_len = model.config.max_length - 1
135
  else:
136
- max_tokenized_len = model.config.max_length
137
 
138
  encodings = tokenizer(
139
  predictions,
140
  add_special_tokens=False,
141
  padding=True,
142
- truncation=True,
143
  max_length=max_tokenized_len,
144
  return_tensors="pt",
145
  return_attention_mask=True,
 
100
  reference_urls=["https://huggingface.co/docs/transformers/perplexity"],
101
  )
102
 
103
+ def _compute(
104
+ self, predictions, model_id, batch_size: int = 16, add_start_token: bool = True, device=None, max_length=None
105
+ ):
106
 
107
  if device is not None:
108
  assert device in ["gpu", "cpu", "cuda"], "device should be either gpu or cpu."
 
128
  # assign one of the special tokens to also be the pad token
129
  tokenizer.add_special_tokens({"pad_token": existing_special_tokens[0]})
130
 
131
+ if add_start_token and max_length:
132
  # leave room for <BOS> token to be added:
133
  assert (
134
  tokenizer.bos_token is not None
135
  ), "Input model must already have a BOS token if using add_start_token=True. Please use a different model, or set add_start_token=False"
136
+ max_tokenized_len = max_length - 1
137
  else:
138
+ max_tokenized_len = max_length
139
 
140
  encodings = tokenizer(
141
  predictions,
142
  add_special_tokens=False,
143
  padding=True,
144
+ truncation=True if max_tokenized_len else False,
145
  max_length=max_tokenized_len,
146
  return_tensors="pt",
147
  return_attention_mask=True,
requirements.txt CHANGED
@@ -1,4 +1,4 @@
1
- git+https://github.com/huggingface/evaluate@960dc92ef6a86e3b11d7b7276b4960135a3129b9
2
  torch
3
  torch
4
  transformers
 
1
+ git+https://github.com/huggingface/evaluate@9f0f888eb455bc0952f467b1cab47716e3f04e83
2
  torch
3
  torch
4
  transformers