Spaces:
Sleeping
Sleeping
import gradio as gr | |
from transformers import AutoTokenizer | |
import ast | |
model_path = "models/" | |
import gradio as gr | |
# Available models | |
MODELS = ["Meta-Llama-3.1-8B"] | |
def process_input(input_type, input_value, model_name): | |
# Initialize tokenizer | |
tokenizer = AutoTokenizer.from_pretrained(model_path+model_name) | |
if input_type == "Text": | |
character_count = len(input_value) | |
# Tokenize the text | |
token_ids = tokenizer.encode(input_value,add_special_tokens=True) | |
tokens = tokenizer.convert_ids_to_tokens(token_ids) | |
return len(tokens),character_count, tokens, token_ids | |
elif input_type == "Token IDs": | |
try: | |
token_ids = ast.literal_eval(input_value) | |
# Convert token IDs back to text | |
text = tokenizer.decode(token_ids) | |
# Create output strings | |
return len(token_ids),len(token_ids), text, input_value, | |
except ValueError: | |
return "Error", "Invalid input. Please enter space-separated integers for Token IDs.", "" | |
# Create Gradio interface | |
iface = gr.Interface( | |
fn=process_input, | |
inputs=[ | |
gr.Radio(["Text", "Token IDs"], label="Input Type", value="Text"), | |
gr.Textbox(lines=5, label="Input"), | |
gr.Dropdown(choices=MODELS, label="Select Model") | |
], | |
outputs=[ | |
gr.Textbox(label="Token Count"), | |
gr.Textbox(label="Character Count"), | |
gr.Textbox(label="Tokens", lines=10), | |
gr.Textbox(label="Token IDS", lines=5) | |
], | |
title="LLM Tokenization - Convert Text to tokens and vice versa!", | |
description="Enter text or token IDs and select a model to see the results." | |
) | |
if __name__ == "__main__": | |
iface.queue() | |
iface.launch() | |