Spaces:
Sleeping
Sleeping
from transformers import AutoTokenizer | |
import gradio as gr | |
gpt2_tokenizer = AutoTokenizer.from_pretrained("gpt2") | |
gptj_tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-j-6b") | |
gpt_neox_tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-neox-20b") | |
llama_tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/llama-tokenizer") | |
tokenizers = { | |
"GPT-2": gpt2_tokenizer, | |
"GPT-J": gptj_tokenizer, | |
"GPT-NeoX": gpt_neox_tokenizer, | |
"LLaMa": llama_tokenizer | |
} | |
def tokenize(input_text, tokenizer_name): | |
tokenizer = tokenizers[tokenizer_name] | |
tokens = tokenizer(input_text)["input_ids"] | |
return f"Number of tokens for {tokenizer_name}: {len(tokens)}" | |
textbox_input = gr.inputs.Textbox(lines=7) | |
dropdown_tokenizer = gr.inputs.Dropdown(choices=list(tokenizers.keys()), default="LLaMa") | |
output_text = "text" | |
iface = gr.Interface(fn=tokenize, inputs=[textbox_input, dropdown_tokenizer], outputs=output_text) | |
iface.launch() | |