# Import necessary modules from the transformers library from transformers import pipeline from transformers import AutoModelForTokenClassification, AutoTokenizer # Define the model name to be used for token classification, we use the Impresso NER # that can be found at "https://huggingface.co/impresso-project/ner-stacked-bert-multilingual" MODEL_NAME = "impresso-project/ner-stacked-bert-multilingual" # Load the tokenizer corresponding to the specified model name ner_tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) ner_pipeline = pipeline( "generic-ner", model=MODEL_NAME, tokenizer=ner_tokenizer, trust_remote_code=True, device="cpu", ) sentences = [ """In the year 1789, King Louis XVI, ruler of France, convened the Estates-General at the Palace of Versailles, where Marie Antoinette, the Queen of France, alongside Maximilien Robespierre, a leading member of the National Assembly, debated with Jean-Jacques Rousseau, the famous philosopher, and Charles de Talleyrand, the Bishop of Autun, regarding the future of the French monarchy. At the same time, across the Atlantic in Philadelphia, George Washington, the first President of the United States, and Thomas Jefferson, the nation's Secretary of State, were drafting policies for the newly established American government following the signing of the Constitution.""" ] print(sentences[0]) # Helper function to print entities one per row def print_nicely(entities): for entity in entities: print( f"Entity: {entity['entity']} | Confidence: {entity['score']:.2f}% | Text: {entity['word'].strip()} | Start: {entity['start']} | End: {entity['end']}" ) # Visualize stacked entities for each sentence for sentence in sentences: results = ner_pipeline(sentence) # Extract coarse and fine entities for key in results.keys(): # Visualize the coarse entities print_nicely(results[key])