rajammanabrolu commited on
Commit
eeec945
1 Parent(s): cd5239e

Update tiktoken.py

Browse files
Files changed (1) hide show
  1. tiktoken.py +1 -18
tiktoken.py CHANGED
@@ -3,7 +3,6 @@
3
  from functools import lru_cache
4
  from typing import Any, Dict, List, Optional, Tuple
5
 
6
- import torch
7
  from transformers import PreTrainedTokenizer
8
 
9
  DEFAULT_SYSTEM_PROMPT = """You are a helpful, respectful and honest assistant. Always answer as helpfully as possible."""
@@ -200,8 +199,6 @@ class TiktokenTokenizerWrapper(PreTrainedTokenizer):
200
  '{% endif %}'
201
  '{% if (add_generation_prompt == true and loop.last) %}'
202
  "{{ '\n' + '<|im_start|>' + 'assistant' + '\n' }}"
203
- "{% elif (message['role'] == 'assistant') %}"
204
- '{{ eos_token }}'
205
  '{% endif %}'
206
  '{% endfor %}')
207
  template = template.replace(
@@ -358,19 +355,5 @@ class TiktokenTokenizerWrapper(PreTrainedTokenizer):
358
 
359
  return self.add_tokens(actual_new_tokens, special_tokens=True)
360
 
361
- def construct_logit_tensor(self, logprobs: Dict[str,
362
- float]) -> torch.Tensor:
363
- """Construct tensor of shape (vocab_size,) mapping words to logprobs.
364
 
365
- Args:
366
- logprobs (Dict[str, float]): Dictionary mapping tokens to log probabilities assigned to them by the model.
367
- """
368
- tensor = torch.tensor([min(logprobs.values()) - 1] * (self.vocab_size))
369
- for k in logprobs:
370
- encoding = self(k)['input_ids']
371
- idx = encoding[0]
372
- tensor[idx] = logprobs[k]
373
- return tensor
374
-
375
-
376
- TiktokenTokenizerWrapper.register_for_auto_class()
 
3
  from functools import lru_cache
4
  from typing import Any, Dict, List, Optional, Tuple
5
 
 
6
  from transformers import PreTrainedTokenizer
7
 
8
  DEFAULT_SYSTEM_PROMPT = """You are a helpful, respectful and honest assistant. Always answer as helpfully as possible."""
 
199
  '{% endif %}'
200
  '{% if (add_generation_prompt == true and loop.last) %}'
201
  "{{ '\n' + '<|im_start|>' + 'assistant' + '\n' }}"
 
 
202
  '{% endif %}'
203
  '{% endfor %}')
204
  template = template.replace(
 
355
 
356
  return self.add_tokens(actual_new_tokens, special_tokens=True)
357
 
 
 
 
358
 
359
+ TiktokenTokenizerWrapper.register_for_auto_class()