Spaces:
Runtime error
Runtime error
# Gradio | |
import gradio as gr | |
# Hugging Face libraries | |
from transformers import pipeline | |
from transformers import AutoTokenizer | |
# Model checkpoint | |
model_checkpoint = "dbmdz/bert-large-cased-finetuned-conll03-english" | |
# Instantiate the pipeline | |
ner_task = pipeline(model=model_checkpoint, task="ner", | |
aggregation_strategy="simple") | |
# Instantiate the tokenizer | |
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint) | |
# Sample sentences | |
sentence1 = "Herbert Akroyd Stuart patented the first diesel engine, 1890" | |
sentence2 = "May 10 A delegation tells Leopold III his return would be \ | |
illtimed, 1945" | |
sentence3 = "Fri May 10 Fred Astaire (Frederick Austerlitz) born in Omaha, Nebraska, 1899" | |
sentence4 = "Fri May 10 Germany invades Low Countries, 1940" | |
sentence5 = "Fri May 10 Nazi bookburning, 1933" | |
sentence6 = "Fri May 10 Confederate Memorial Day in South Carolina" | |
sentence7 = "Fri May 10 Mothers Day in Guatemala" | |
sentence8 = "Fri May 10 Dave Mason is born in Worcester, England, 1945" | |
# Gradio interface | |
def predict(sentence): | |
""" | |
Use the corresponding tokenizer to tokenize the sentence. | |
Use the model to predict the entities. | |
""" | |
# Get the tokens from the tokenizer | |
processed_tokens = tokenizer(sentence) | |
token_pieces = processed_tokens.tokens() | |
# Get the prediction of ner from the model | |
result_ner = ner_task(sentence) | |
formatted_ner = "" | |
entities_count = 0 | |
# Print individual entities. | |
# Start the count from 1 for intuitive reading. | |
for i, result in enumerate(result_ner): | |
# Only get the result where score is at least 0.8 | |
if result['score'] < 0.8: | |
continue; | |
else: | |
entities_count += 1 | |
formatted_ner += f"Number: {entities_count} \n" \ | |
+ f"Entity: {result['entity_group']}\n" \ | |
+ f"Word group: {result['word']}\n" \ | |
+ f"Score: {result['score']}\n" | |
formatted_ner += f"{result}\n\n" | |
formatted_ner += f"Number of predicted entities: {entities_count}\n\n" | |
return token_pieces, formatted_ner | |
# Main Gradio interface | |
demo = gr.Interface( | |
fn = predict, | |
inputs = [gr.TextArea(label="Place your sentence here", lines=10, | |
show_copy_button=True)], | |
outputs = | |
[ | |
gr.TextArea(label="Tokens input to the model", interactive=False, | |
lines=10, show_copy_button=True), | |
gr.TextArea(label="Prediction of entities", interactive=False, | |
lines=10, show_copy_button=True) | |
], | |
examples=[[sentence1], [sentence2], [sentence3], [sentence4], | |
[sentence5], [sentence6], [sentence7], [sentence8]], | |
title = "NER (Named Entities Recognition)", | |
description = f""" | |
## Using model {model_checkpoint} to predict entities type | |
<p style="font-size: 1.2rem;">Notes: </p> | |
<ul style="font-size: 1.2rem; list-style-type:square"> | |
<li> The examples are from the calendar utility in Linux. | |
<li> The model cannot recognize date and time. | |
<li> It can recongize PER (person), LOC (location), ORG (organization) and MIS (miscellaneous) | |
entities. | |
</ul> | |
""" | |
) | |
demo.launch() |