xzuyn commited on
Commit
b5dfd52
1 Parent(s): 2e317dd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -15
app.py CHANGED
@@ -1,26 +1,27 @@
1
  from transformers import AutoTokenizer
2
  import gradio as gr
3
 
 
4
  gpt2_tokenizer = AutoTokenizer.from_pretrained("gpt2")
5
  gptj_tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-j-6b")
6
  gpt_neox_tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-neox-20b")
7
  llama_tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/llama-tokenizer")
8
 
9
- tokenizers = {
10
- "GPT-2": gpt2_tokenizer,
11
- "GPT-J": gptj_tokenizer,
12
- "GPT-NeoX": gpt_neox_tokenizer,
13
- "LLaMa": llama_tokenizer
14
- }
15
 
16
- def tokenize(input_text, tokenizer_name):
17
- tokenizer = tokenizers[tokenizer_name]
18
- tokens = tokenizer(input_text)["input_ids"]
19
- return f"Number of tokens for {tokenizer_name}: {len(tokens)}"
20
 
21
- textbox_input = gr.inputs.Textbox(lines=7)
22
- dropdown_tokenizer = gr.inputs.Dropdown(choices=list(tokenizers.keys()), default="LLaMa")
23
- output_text = "text"
 
 
24
 
25
- iface = gr.Interface(fn=tokenize, inputs=[textbox_input, dropdown_tokenizer], outputs=output_text)
26
- iface.launch()
 
1
  from transformers import AutoTokenizer
2
  import gradio as gr
3
 
4
+
5
  gpt2_tokenizer = AutoTokenizer.from_pretrained("gpt2")
6
  gptj_tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-j-6b")
7
  gpt_neox_tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-neox-20b")
8
  llama_tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/llama-tokenizer")
9
 
10
+ def tokenize(input_text):
11
+ gpt2_tokens = gpt2_tokenizer(input_text)["input_ids"]
12
+ gptj_tokens = gptj_tokenizer(input_text)["input_ids"]
13
+ gpt_neox_tokens = gpt_neox_tokenizer(input_text)["input_ids"]
14
+ llama_tokens = llama_tokenizer(input_text)["input_ids"]
 
15
 
16
+
17
+
18
+ return f"""Number of tokens.
 
19
 
20
+ GPT-2: {len(gpt2_tokens)}
21
+ GPT-J: {len(gptj_tokens)}
22
+ GPT-NeoX: {len(gpt_neox_tokens)}
23
+ LLaMa: {len(llama_tokens)}
24
+ """
25
 
26
+ iface = gr.Interface(fn=tokenize, inputs=gr.inputs.Textbox(lines=7), outputs="text")
27
+ iface.launch()