yenniejun commited on
Commit
cb5b76e
1 Parent(s): ba38c3f
Files changed (1) hide show
  1. app.py +3 -4
app.py CHANGED
@@ -50,11 +50,10 @@ tokenizer_names_to_test = [
50
 
51
  with st.sidebar:
52
 
53
- st.header('All languages are NOT created (tokenized) equal!')
54
- link="This project compares the tokenization length for different languages. For some tokenizers, tokenizing a message in one language may result in 10-20x more tokens than a comparable message in another language (e.g. try English vs. Burmese). This is part of a larger project of measuring inequality in NLP. See the original article: [All languages are NOT created (tokenized) equal](https://www.artfish.ai/p/all-languages-are-not-created-tokenized) on [Art Fish Intelligence](https://www.artfish.ai/)."
55
  st.markdown(link)
56
-
57
- st.divider()
58
 
59
  st.subheader('Tokenizer')
60
  # TODO multi-select tokenizers
 
50
 
51
  with st.sidebar:
52
 
53
+ st.header('All languages are NOT created (tokenized) equal!')
54
+ link="This project compares the tokenization length for different languages. For some tokenizers, tokenizing a message in one language may result in 10-20x more tokens than a comparable message in another language (e.g. try English vs. Burmese). This is part of a larger project of measuring inequality in NLP. See the original article: [All languages are NOT created (tokenized) equal](https://www.artfish.ai/p/all-languages-are-not-created-tokenized) on [Art Fish Intelligence](https://www.artfish.ai/)."
55
  st.markdown(link)
56
+ st.divider()
 
57
 
58
  st.subheader('Tokenizer')
59
  # TODO multi-select tokenizers