|
import streamlit as st |
|
from transformers import AutoModelForTokenClassification, pipeline |
|
|
|
|
|
model = AutoModelForTokenClassification.from_pretrained("roberta-base-ner") |
|
|
|
|
|
pipeline = pipeline("ner", model=model) |
|
|
|
|
|
def traiter_document(document): |
|
|
|
tokens = model.tokenizer(document) |
|
|
|
|
|
predictions = pipeline(tokens) |
|
|
|
|
|
return predictions |
|
|
|
|
|
def charger_document(fichier): |
|
|
|
with open(fichier, "r") as f: |
|
document = f.read() |
|
|
|
return document |
|
|
|
|
|
st.title("Named Entity Recognition") |
|
|
|
|
|
document = charger_document(st.file_uploader("Choisissez un document")) |
|
|
|
|
|
predictions = traiter_document(document) |
|
|
|
|
|
for prediction in predictions: |
|
st.write(f"**Entité:** {prediction['entity_type']}") |
|
st.write(f"**Texte:** {prediction['token']}") |
|
|