Spaces:

vonewman
/

ner_app

Runtime error

App Files Files Community

vonewman commited on Oct 29, 2023

Commit

447a922

•

1 Parent(s): 66f3b3e

Delete app.py

Browse files

Files changed (1) hide show

app.py +0 -116

app.py DELETED Viewed

@@ -1,116 +0,0 @@
-import streamlit as st
-import pandas as pd
-import re
-import json
-import transformers
-import torch
-from transformers import AutoTokenizer, AutoModelForTokenClassification, Trainer
-st.set_page_config(
-    page_title="Named Entity Recognition Wolof",
-    page_icon="📘"
-)
-def convert_df(df: pd.DataFrame):
-    return df.to_csv(index=False).encode('utf-8')
-def convert_json(df: pd.DataFrame):
-    result = df.to_json(orient="index")
-    parsed = json.loads(result)
-    json_string = json.dumps(parsed)
-    return json_string
-def load_model():
-    model = AutoModelForTokenClassification.from_pretrained("vonewman/wolof-finetuned-ner")
-    trainer = Trainer(model=model)
-    tokenizer = AutoTokenizer.from_pretrained("vonewman/wolof-finetuned-ner")
-    return trainer, model, tokenizer
-def align_word_ids(texts):
-    trainer, model, tokenizer = load_model()
-    tokenized_inputs = tokenizer(texts, padding='max_length', max_length=218, truncation=True)
-    word_ids = tokenized_inputs.word_ids()
-    previous_word_idx = None
-    label_ids = []
-    for word_idx in word_ids:
-        if word_idx is None:
-            label_ids.append(-100)
-        elif word_idx != previous_word_idx:
-            try:
-                label_ids.append(1)
-            except:
-                label_ids.append(-100)
-        else:
-            try:
-                label_ids.append(1 if label_all_tokens else -100)
-            except:
-                label_ids.append(-100)
-        previous_word_idx = word_idx
-    return label_ids
-def predict_ner_labels(model, tokenizer, sentence):
-    use_cuda = torch.cuda.is_available()
-    device = torch.device("cuda" if use_cuda else "cpu")
-    if use_cuda:
-        model = model.cuda()
-    text = tokenizer(sentence, padding='max_length', max_length=218, truncation=True, return_tensors="pt")
-    mask = text['attention_mask'].to(device)
-    input_id = text['input_ids'].to(device)
-    label_ids = torch.Tensor(align_word_ids(sentence)).unsqueeze(0).to(device)
-    logits = model(input_id, mask, None)
-    logits_clean = logits[0][label_ids != -100]
-    predictions = logits_clean.argmax(dim=1).tolist()
-    prediction_label = [id2tag[i] for i in predictions]
-    return prediction_label
-id2tag = {0: 'O', 1: 'B-LOC', 2: 'B-PER', 3: 'I-PER', 4: 'B-ORG', 5: 'I-DATE', 6: 'B-DATE', 7: 'I-ORG', 8: 'I-LOC'}
-def tag_sentence(text):
-    trainer, model, tokenizer = load_model()
-    predictions = predict_ner_labels(model, tokenizer, text)
-    # Créez un DataFrame avec les colonnes "words" et "tags"
-    df = pd.DataFrame({'words': text.split(), 'tags': predictions})
-    return df
-st.title("📘 Named Entity Recognition Wolof")
-with st.form(key='my_form'):
-    x1 = st.text_input(label='Enter a sentence:', max_chars=250)
-    submit_button = st.form_submit_button(label='🏷️ Create tags')
-if submit_button:
-    if re.sub('\s+', '', x1) == '':
-        st.error('Please enter a non-empty sentence.')
-    elif re.match(r'\A\s*\w+\s*\Z', x1):
-        st.error("Please enter a sentence with at least one word")
-    else:
-        st.markdown("### Tagged Sentence")
-        st.header("")
-        results = tag_sentence(x1)
-        cs, c1, c2, c3, cLast = st.columns([0.75, 1.5, 1.5, 1.5, 0.75])
-        with c1:
-            csvbutton = st.download_button(label="📥 Download .csv", data=convert_df(results),
-                                           file_name="results.csv", mime='text/csv', key='csv')
-        with c2:
-            textbutton = st.download_button(label="📥 Download .txt", data=convert_df(results),
-                                            file_name="results.text", mime='text/plain', key='text')
-        with c3:
-            jsonbutton = st.download_button(label="📥 Download .json", data=convert_json(results),
-                                            file_name="results.json", mime='application/json', key='json')
-        st.header("")
-        c1, c2, c3 = st.columns([1, 3, 1])
-        with c2:
-            st.table(results[['words', 'tags']])
-st.header("")
-st.header("")
-st.header("")
-with st.expander("ℹ️ - About this app", expanded=True):
-    st.write(
-        """
--   The **Named Entity Recognition Wolof** app is a tool that performs named entity recognition in Wolof.
--   The available entities are: *corporation*, *location*, *person*, and *date*.
--   The app uses the [XLMRoberta model](https://huggingface.co/xlm-roberta-base), fine-tuned on the [masakhaNER](https://huggingface.co/datasets/masakhane/masakhaner2) dataset.
--   The model uses the **byte-level BPE tokenizer**. Each sentence is first tokenized.
-        """
-)