from transformers import AutoTokenizer import gradio as gr gpt2_tokenizer = AutoTokenizer.from_pretrained("gpt2") gptj_tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-j-6b") gpt_neox_tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-neox-20b") llama_tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/llama-tokenizer") tokenizers = { "GPT-2": gpt2_tokenizer, "GPT-J": gptj_tokenizer, "GPT-NeoX": gpt_neox_tokenizer, "LLaMa": llama_tokenizer } def tokenize(input_text, tokenizer_name): tokenizer = tokenizers[tokenizer_name] tokens = tokenizer(input_text)["input_ids"] return f"Number of tokens for {tokenizer_name}: {len(tokens)}" textbox_input = gr.inputs.Textbox(lines=7) dropdown_tokenizer = gr.inputs.Dropdown(choices=list(tokenizers.keys()), default="LLaMa") output_text = "text" iface = gr.Interface(fn=tokenize, inputs=[textbox_input, dropdown_tokenizer], outputs=output_text) iface.launch()