xzuyn commited on
Commit
f3369dd
1 Parent(s): bbc0512

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -21
app.py CHANGED
@@ -1,29 +1,17 @@
1
  from transformers import AutoTokenizer
2
  import gradio as gr
3
 
4
- def tokenize(input_text):
5
- llama_tokens = len(llama_tokenizer(input_text, add_special_tokens=True)["input_ids"])
6
- mistral_tokens = len(mistral_tokenizer(input_text, add_special_tokens=True)["input_ids"])
7
- gpt2_tokens = len(gpt2_tokenizer(input_text, add_special_tokens=True)["input_ids"])
8
- gpt_neox_tokens = len(gpt_neox_tokenizer(input_text, add_special_tokens=True)["input_ids"])
9
- falcon_tokens = len(falcon_tokenizer(input_text, add_special_tokens=True)["input_ids"])
10
- phi2_tokens = len(phi2_tokenizer(input_text, add_special_tokens=True)["input_ids"])
11
- t5_tokens = len(t5_tokenizer(input_text, add_special_tokens=True)["input_ids"])
12
-
13
- token_lengths = {
14
- "LLaMa": llama_tokens,
15
- "Mistral": mistral_tokens,
16
- "GPT-2/GPT-J": gpt2_tokens,
17
- "GPT-NeoX": gpt_neox_tokens,
18
- "Falcon": falcon_tokens,
19
- "Phi-2": phi2_tokens,
20
- "T5": t5_tokens
21
- }
22
 
23
- sorted_tokens = sorted(token_lengths.items(), key=lambda x: x[1], reverse=True)
24
- result = "\n".join([f"{name}: {length}" for name, length in sorted_tokens])
 
 
 
 
 
 
25
 
26
- return result
27
 
28
 
29
  if __name__ == "__main__":
 
1
  from transformers import AutoTokenizer
2
  import gradio as gr
3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
+ def tokenize(input_text):
6
+ llama_tokens = llama_tokenizer(input_text, add_special_tokens=True)["input_ids"]
7
+ mistral_tokens = mistral_tokenizer(input_text, add_special_tokens=True)["input_ids"]
8
+ gpt2_tokens = gpt2_tokenizer(input_text, add_special_tokens=True)["input_ids"]
9
+ gpt_neox_tokens = gpt_neox_tokenizer(input_text, add_special_tokens=True)["input_ids"]
10
+ falcon_tokens = falcon_tokenizer(input_text, add_special_tokens=True)["input_ids"]
11
+ phi2_tokens = phi2_tokenizer(input_text, add_special_tokens=True)["input_ids"]
12
+ t5_tokens = t5_tokenizer(input_text, add_special_tokens=True)["input_ids"]
13
 
14
+ return f"LLaMa: {len(llama_tokens)}\nMistral: {len(mistral_tokens)}\nGPT-2/GPT-J: {len(gpt2_tokens)}\nGPT-NeoX: {len(gpt_neox_tokens)}\nFalcon: {len(falcon_tokens)}\nPhi-2: {len(phi2_tokens)}\nT5: {len(t5_tokens)}"
15
 
16
 
17
  if __name__ == "__main__":