Spaces:
Sleeping
Sleeping
import gradio as gr | |
from transformers import AutoTokenizer | |
import ast | |
model_path = "models/" | |
import gradio as gr | |
# Available models | |
MODELS = ["Meta-Llama-3.1-8B"] | |
def process_input(input_type, input_value, model_name): | |
# Initialize tokenizer | |
tokenizer = AutoTokenizer.from_pretrained(model_path+model_name) | |
if input_type == "Text": | |
# Tokenize the text | |
tokens = tokenizer.tokenize(input_value) | |
token_ids = tokenizer.encode(input_value) | |
# Create output strings | |
# tokens_str = [f"{i+1}. {token}" for i, token in enumerate(tokens)] | |
# token_ids_str = " ".join(map(str, token_ids)) | |
return f"Total tokens: {len(tokens)}", tokens, token_ids | |
elif input_type == "Token IDs": | |
try: | |
token_ids = ast.literal_eval(input_value) | |
# Convert string of token IDs to list of integers | |
# token_ids = list(map(int, input_value.split())) | |
# Convert token IDs back to text | |
text = tokenizer.decode(token_ids) | |
# print("The decoded text",text) | |
# Tokenize the text to get individual tokens | |
# Create output strings | |
return f"Total tokens: {len(token_ids)}", text, input_value | |
except ValueError: | |
return "Error", "Invalid input. Please enter space-separated integers for Token IDs.", "" | |
# Create Gradio interface | |
iface = gr.Interface( | |
fn=process_input, | |
inputs=[ | |
gr.Radio(["Text", "Token IDs"], label="Input Type", value="Text"), | |
gr.Textbox(lines=5, label="Input"), | |
gr.Dropdown(choices=MODELS, label="Select Model") | |
], | |
outputs=[ | |
gr.Textbox(label="Token Count"), | |
gr.Textbox(label="Tokens", lines=10), | |
gr.Textbox(label="Token IDS", lines=5) | |
], | |
title="LLM Tokenization and Token ID Converter", | |
description="Enter text or token IDs and select a model to see the conversion results." | |
) | |
if __name__ == "__main__": | |
iface.launch() |