# Import necessary modules from the transformers library
from transformers import pipeline
from transformers import AutoModelForTokenClassification, AutoTokenizer

# Define the model name to be used for token classification, we use the Impresso NER
# that can be found at "https://huggingface.co/impresso-project/ner-stacked-bert-multilingual"
MODEL_NAME = "impresso-project/ner-stacked-bert-multilingual"

# Load the tokenizer corresponding to the specified model name
ner_tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

ner_pipeline = pipeline(
    "generic-ner",
    model=MODEL_NAME,
    tokenizer=ner_tokenizer,
    trust_remote_code=True,
    device="cpu",
)
sentences = [
    """In the year 1789, King Louis XVI, ruler of France, convened the Estates-General at the Palace of Versailles, 
                where Marie Antoinette, the Queen of France, alongside Maximilien Robespierre, a leading member of the National Assembly, 
                debated with Jean-Jacques Rousseau, the famous philosopher, and Charles de Talleyrand, the Bishop of Autun, 
                regarding the future of the French monarchy. At the same time, across the Atlantic in Philadelphia, 
                George Washington, the first President of the United States, and Thomas Jefferson, the nation's Secretary of State, 
                were drafting policies for the newly established American government following the signing of the Constitution."""
]

print(sentences[0])


# Helper function to print entities one per row
def print_nicely(entities):
    for entity in entities:
        print(
            f"Entity: {entity['entity']} | Confidence: {entity['score']:.2f}% | Text: {entity['word'].strip()} | Start: {entity['start']} | End: {entity['end']}"
        )


# Visualize stacked entities for each sentence
for sentence in sentences:
    results = ner_pipeline(sentence)

    # Extract coarse and fine entities
    for key in results.keys():
        # Visualize the coarse entities
        print_nicely(results[key])