vonewman commited on
Commit
56690ec
1 Parent(s): 7c44018

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -2
app.py CHANGED
@@ -70,7 +70,7 @@ def tag_sentence(text):
70
  predictions = predict_ner_labels(model, tokenizer, text)
71
  # Créez un DataFrame avec les colonnes "words" et "tags"
72
  df = pd.DataFrame({'words': text.split(), 'tags': predictions})
73
- df['tags'] = df['tags'].map(lambda x: f'background-color: lightblue' if x != 'O' else '')
74
  return df
75
 
76
  st.title("📘 Named Entity Recognition Wolof")
@@ -97,4 +97,24 @@ if submit_button:
97
  file_name="results.text", mime='text/plain', key='text')
98
  with c3:
99
  jsonbutton = st.download_button(label="📥 Download .json", data=convert_json(results),
100
- file_name="results.json", mime='application/json
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
  predictions = predict_ner_labels(model, tokenizer, text)
71
  # Créez un DataFrame avec les colonnes "words" et "tags"
72
  df = pd.DataFrame({'words': text.split(), 'tags': predictions})
73
+ df['tags'] = df['tags'].map(lambda x: 'background-color: lightblue' if x != 'O' else '')
74
  return df
75
 
76
  st.title("📘 Named Entity Recognition Wolof")
 
97
  file_name="results.text", mime='text/plain', key='text')
98
  with c3:
99
  jsonbutton = st.download_button(label="📥 Download .json", data=convert_json(results),
100
+ file_name="results.json", mime='application/json', key='json')
101
+
102
+ st.header("")
103
+
104
+ c1, c2, c3 = st.columns([1, 3, 1])
105
+
106
+ with c2:
107
+ st.table(results.style.format(precision=2))
108
+
109
+ st.header("")
110
+ st.header("")
111
+ st.header("")
112
+ with st.expander("ℹ️ - About this app", expanded=True):
113
+ st.write(
114
+ """
115
+ - The **Named Entity Recognition Wolof** app is a tool that performs named entity recognition in Wolof.
116
+ - The available entities are: *corporation*, *location*, *person*, and *date*.
117
+ - The app uses the [XLMRoberta model](https://huggingface.co/xlm-roberta-base), fine-tuned on the [masakhaNER](https://huggingface.co/datasets/masakhane/masakhaner2) dataset.
118
+ - The model uses the **byte-level BPE tokenizer**. Each sentence is first tokenized.
119
+ """
120
+ )