Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -76,6 +76,15 @@ def stream_wp_token_ids():
|
|
76 |
for id in ids:
|
77 |
yield f":{next(color)}-background[{id}] "
|
78 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
79 |
|
80 |
### Tokenizer Descriptions ###
|
81 |
|
@@ -131,3 +140,5 @@ elif tokenizer == "Byte Pair Encoding (Open AI GPT-4o)":
|
|
131 |
if token_id == True:
|
132 |
color = itertools.cycle(colors)
|
133 |
st.write(stream_wp_token_ids)
|
|
|
|
|
|
76 |
for id in ids:
|
77 |
yield f":{next(color)}-background[{id}] "
|
78 |
|
79 |
+
def num_tokens(txt):
|
80 |
+
words = white_space_tokenizer(txt)
|
81 |
+
n_words = len(n_words) if len(n_words) else 1
|
82 |
+
try:
|
83 |
+
return f'Token count {len(ids)}, f-rate {len(ids)/n_words}'
|
84 |
+
except:
|
85 |
+
return ''
|
86 |
+
|
87 |
+
|
88 |
|
89 |
### Tokenizer Descriptions ###
|
90 |
|
|
|
140 |
if token_id == True:
|
141 |
color = itertools.cycle(colors)
|
142 |
st.write(stream_wp_token_ids)
|
143 |
+
|
144 |
+
st.write(num_tokens())
|