liewchooichin's picture
get only score at least 0.8
7ab4587 verified
# Gradio
import gradio as gr
# Hugging Face libraries
from transformers import pipeline
from transformers import AutoTokenizer
# Model checkpoint
model_checkpoint = "dbmdz/bert-large-cased-finetuned-conll03-english"
# Instantiate the pipeline
ner_task = pipeline(model=model_checkpoint, task="ner",
aggregation_strategy="simple")
# Instantiate the tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
# Sample sentences
sentence1 = "Herbert Akroyd Stuart patented the first diesel engine, 1890"
sentence2 = "May 10 A delegation tells Leopold III his return would be \
illtimed, 1945"
sentence3 = "Fri May 10 Fred Astaire (Frederick Austerlitz) born in Omaha, Nebraska, 1899"
sentence4 = "Fri May 10 Germany invades Low Countries, 1940"
sentence5 = "Fri May 10 Nazi bookburning, 1933"
sentence6 = "Fri May 10 Confederate Memorial Day in South Carolina"
sentence7 = "Fri May 10 Mothers Day in Guatemala"
sentence8 = "Fri May 10 Dave Mason is born in Worcester, England, 1945"
# Gradio interface
def predict(sentence):
"""
Use the corresponding tokenizer to tokenize the sentence.
Use the model to predict the entities.
"""
# Get the tokens from the tokenizer
processed_tokens = tokenizer(sentence)
token_pieces = processed_tokens.tokens()
# Get the prediction of ner from the model
result_ner = ner_task(sentence)
formatted_ner = ""
entities_count = 0
# Print individual entities.
# Start the count from 1 for intuitive reading.
for i, result in enumerate(result_ner):
# Only get the result where score is at least 0.8
if result['score'] < 0.8:
continue;
else:
entities_count += 1
formatted_ner += f"Number: {entities_count} \n" \
+ f"Entity: {result['entity_group']}\n" \
+ f"Word group: {result['word']}\n" \
+ f"Score: {result['score']}\n"
formatted_ner += f"{result}\n\n"
formatted_ner += f"Number of predicted entities: {entities_count}\n\n"
return token_pieces, formatted_ner
# Main Gradio interface
demo = gr.Interface(
fn = predict,
inputs = [gr.TextArea(label="Place your sentence here", lines=10,
show_copy_button=True)],
outputs =
[
gr.TextArea(label="Tokens input to the model", interactive=False,
lines=10, show_copy_button=True),
gr.TextArea(label="Prediction of entities", interactive=False,
lines=10, show_copy_button=True)
],
examples=[[sentence1], [sentence2], [sentence3], [sentence4],
[sentence5], [sentence6], [sentence7], [sentence8]],
title = "NER (Named Entities Recognition)",
description = f"""
## Using model {model_checkpoint} to predict entities type
<p style="font-size: 1.2rem;">Notes: </p>
<ul style="font-size: 1.2rem; list-style-type:square">
<li> The examples are from the calendar utility in Linux.
<li> The model cannot recognize date and time.
<li> It can recongize PER (person), LOC (location), ORG (organization) and MIS (miscellaneous)
entities.
</ul>
"""
)
demo.launch()