xzuyn commited on
Commit
39599fb
·
1 Parent(s): eb70853

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -6
app.py CHANGED
@@ -9,12 +9,12 @@ phi2_tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-2")
9
  falcon_tokenizer = AutoTokenizer.from_pretrained("tiiuae/falcon-7b")
10
 
11
  def tokenize(input_text):
12
- gpt2_tokens = gpt2_tokenizer(input_text)["input_ids"]
13
- gpt_neox_tokens = gpt_neox_tokenizer(input_text)["input_ids"]
14
- llama_tokens = llama_tokenizer(input_text)["input_ids"]
15
- yi_tokens = llama_tokenizer(input_text)["input_ids"]
16
- phi2_tokens = phi2_tokenizer(input_text)["input_ids"]
17
- falcon_tokens = falcon_tokenizer(input_text)["input_ids"]
18
 
19
  return f"GPT-2/GPT-J: {len(gpt2_tokens)}\nGPT-NeoX: {len(gpt_neox_tokens)}\nLLaMa: {len(llama_tokens)}\nPhi-2: {len(phi2_tokens)}\nFalcon: {len(falcon_tokens)}"
20
 
 
9
  falcon_tokenizer = AutoTokenizer.from_pretrained("tiiuae/falcon-7b")
10
 
11
  def tokenize(input_text):
12
+ gpt2_tokens = gpt2_tokenizer(input_text, add_special_tokens=True)["input_ids"]
13
+ gpt_neox_tokens = gpt_neox_tokenizer(input_text, add_special_tokens=True)["input_ids"]
14
+ llama_tokens = llama_tokenizer(input_text, add_special_tokens=True)["input_ids"]
15
+ yi_tokens = llama_tokenizer(input_text, add_special_tokens=True)["input_ids"]
16
+ phi2_tokens = phi2_tokenizer(input_text, add_special_tokens=True)["input_ids"]
17
+ falcon_tokens = falcon_tokenizer(input_text, add_special_tokens=True)["input_ids"]
18
 
19
  return f"GPT-2/GPT-J: {len(gpt2_tokens)}\nGPT-NeoX: {len(gpt_neox_tokens)}\nLLaMa: {len(llama_tokens)}\nPhi-2: {len(phi2_tokens)}\nFalcon: {len(falcon_tokens)}"
20