Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -53,8 +53,8 @@ with st.sidebar:
|
|
53 |
st.header('All languages are NOT created (tokenized) equal!')
|
54 |
link="This project compares the tokenization length for different languages. For some tokenizers, tokenizing a message in one language may result in 10-20x more tokens than a comparable message in another language (e.g. try English vs. Burmese). This is part of a larger project of measuring inequality in NLP. See the original article: [All languages are NOT created (tokenized) equal](https://www.artfish.ai/p/all-languages-are-not-created-tokenized) on [Art Fish Intelligence](https://www.artfish.ai/)."
|
55 |
st.markdown(link)
|
56 |
-
st.divider()
|
57 |
|
|
|
58 |
st.subheader('Tokenizer')
|
59 |
# TODO multi-select tokenizers
|
60 |
tokenizer_name = st.sidebar.selectbox('Select tokenizer', options=tokenizer_names_to_test, label_visibility='collapsed')
|
|
|
53 |
st.header('All languages are NOT created (tokenized) equal!')
|
54 |
link="This project compares the tokenization length for different languages. For some tokenizers, tokenizing a message in one language may result in 10-20x more tokens than a comparable message in another language (e.g. try English vs. Burmese). This is part of a larger project of measuring inequality in NLP. See the original article: [All languages are NOT created (tokenized) equal](https://www.artfish.ai/p/all-languages-are-not-created-tokenized) on [Art Fish Intelligence](https://www.artfish.ai/)."
|
55 |
st.markdown(link)
|
|
|
56 |
|
57 |
+
st.header('Data Visualization')
|
58 |
st.subheader('Tokenizer')
|
59 |
# TODO multi-select tokenizers
|
60 |
tokenizer_name = st.sidebar.selectbox('Select tokenizer', options=tokenizer_names_to_test, label_visibility='collapsed')
|