from transformers import pipeline, AutoModelForTokenClassification, AutoTokenizer class AnonymizationPipeline: def __init__(self, model_name): self.model = AutoModelForTokenClassification.from_pretrained("JonathanEGP/Beto_Ner") self.tokenizer = AutoTokenizer.from_pretrained("JonathanEGP/Beto_Ner") self.ner_pipeline = pipeline("ner", model=self.model, tokenizer=self.tokenizer) def anonymize(self, text): entities = self.ner_pipeline(text) entities.sort(key=lambda x: x['end'], reverse=True) for entity in entities: start = entity['start'] end = entity['end'] entity_type = entity['entity'] text = text[:start] + f"[{entity_type}]" + text[end:] return text