Spaces:

takarajordan
/

DiffusionTokenizer

Running

Jordan Legg commited on 5 days ago

Commit

002fb99

•

1 Parent(s): a35c60b

SEND IT!

Files changed (2) hide show

README.md CHANGED Viewed

@@ -1,6 +1,7 @@
 ---
 title: DiffusionTokenizer
-emoji: 🐠
 colorFrom: purple
 colorTo: indigo
 sdk: gradio
@@ -8,7 +9,7 @@ sdk_version: 5.6.0
 app_file: app.py
 pinned: false
 license: creativeml-openrail-m
-short_description: Easily count tokens for any HF diffusion model.
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+python_version: 3.11.10
 title: DiffusionTokenizer
+emoji: 🔢
 colorFrom: purple
 colorTo: indigo
 sdk: gradio
 app_file: app.py
 pinned: false
 license: creativeml-openrail-m
+short_description: Easily visualize tokens for any diffusion model.
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py CHANGED Viewed

@@ -1,13 +1,11 @@
 import gradio as gr
 from transformers import T5TokenizerFast, CLIPTokenizer
 def count_tokens(text):
-    # Load the common tokenizers
-    t5_tokenizer = T5TokenizerFast.from_pretrained("google/t5-v1_1-xxl", legacy=False)
-    clip_tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14")
     # Get tokens and their IDs
     t5_tokens = t5_tokenizer.encode(text, return_tensors="pt", add_special_tokens=True)[0].tolist()
     clip_tokens = clip_tokenizer.encode(text, add_special_tokens=True)
@@ -51,9 +49,9 @@ def count_tokens(text):
     )
 # Create a Gradio interface with custom layout
-with gr.Blocks(title="Common Diffusion Model Token Counter") as iface:
-    gr.Markdown("# Common Diffusion Model Token Counter")
-    gr.Markdown("Enter text to count tokens using T5 and CLIP tokenizers, commonly used in diffusion models.")
     with gr.Row():
         text_input = gr.Textbox(label="Diffusion Prompt", placeholder="Enter your prompt here...")

 import gradio as gr
 from transformers import T5TokenizerFast, CLIPTokenizer
+# Load the common tokenizers once
+t5_tokenizer = T5TokenizerFast.from_pretrained("google/t5-v1_1-xxl", legacy=False)
+clip_tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14")
 def count_tokens(text):
     # Get tokens and their IDs
     t5_tokens = t5_tokenizer.encode(text, return_tensors="pt", add_special_tokens=True)[0].tolist()
     clip_tokens = clip_tokenizer.encode(text, add_special_tokens=True)
     )
 # Create a Gradio interface with custom layout
+with gr.Blocks(title="DiffusionTokenizer") as iface:
+    gr.Markdown("# DiffusionTokenizer🔢")
+    gr.Markdown("A lightning fast visulization of the tokens used in diffusion models. Use it to understand how your prompt is tokenized.")
     with gr.Row():
         text_input = gr.Textbox(label="Diffusion Prompt", placeholder="Enter your prompt here...")