mponty commited on
Commit
1d7fcdc
·
verified ·
1 Parent(s): e71d3bd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -0
app.py CHANGED
@@ -76,6 +76,15 @@ def stream_wp_token_ids():
76
  for id in ids:
77
  yield f":{next(color)}-background[{id}] "
78
 
 
 
 
 
 
 
 
 
 
79
 
80
  ### Tokenizer Descriptions ###
81
 
@@ -131,3 +140,5 @@ elif tokenizer == "Byte Pair Encoding (Open AI GPT-4o)":
131
  if token_id == True:
132
  color = itertools.cycle(colors)
133
  st.write(stream_wp_token_ids)
 
 
 
76
  for id in ids:
77
  yield f":{next(color)}-background[{id}] "
78
 
79
+ def num_tokens(txt):
80
+ words = white_space_tokenizer(txt)
81
+ n_words = len(n_words) if len(n_words) else 1
82
+ try:
83
+ return f'Token count {len(ids)}, f-rate {len(ids)/n_words}'
84
+ except:
85
+ return ''
86
+
87
+
88
 
89
  ### Tokenizer Descriptions ###
90
 
 
140
  if token_id == True:
141
  color = itertools.cycle(colors)
142
  st.write(stream_wp_token_ids)
143
+
144
+ st.write(num_tokens())