Spaces:

vpkprasanna
/

TokenizerViz

Sleeping

TokenizerViz / app.py

prasanna kumar

Final commit for llama based model

c6a1e30 4 months ago

1.75 kB

	import gradio as gr
	from transformers import AutoTokenizer
	import ast
	model_path = "models/"

	import gradio as gr

	# Available models
	MODELS = ["Meta-Llama-3.1-8B"]

	def process_input(input_type, input_value, model_name):
	# Initialize tokenizer
	tokenizer = AutoTokenizer.from_pretrained(model_path+model_name)

	if input_type == "Text":
	character_count = len(input_value)
	# Tokenize the text
	token_ids = tokenizer.encode(input_value,add_special_tokens=True)
	tokens = tokenizer.convert_ids_to_tokens(token_ids)
	return len(tokens),character_count, tokens, token_ids

	elif input_type == "Token IDs":
	try:
	token_ids = ast.literal_eval(input_value)
	# Convert token IDs back to text
	text = tokenizer.decode(token_ids)
	# Create output strings
	return len(token_ids),len(token_ids), text, input_value,
	except ValueError:
	return "Error", "Invalid input. Please enter space-separated integers for Token IDs.", ""

	# Create Gradio interface
	iface = gr.Interface(
	fn=process_input,
	inputs=[
	gr.Radio(["Text", "Token IDs"], label="Input Type", value="Text"),
	gr.Textbox(lines=5, label="Input"),
	gr.Dropdown(choices=MODELS, label="Select Model")
	],
	outputs=[
	gr.Textbox(label="Token Count"),
	gr.Textbox(label="Character Count"),
	gr.Textbox(label="Tokens", lines=10),
	gr.Textbox(label="Token IDS", lines=5)
	],
	title="LLM Tokenization - Convert Text to tokens and vice versa!",
	description="Enter text or token IDs and select a model to see the results."
	)

	if __name__ == "__main__":
	iface.queue()
	iface.launch()