Anonimizador_Ner / Anonimizador_Ner.txt
JonathanEGP's picture
Upload Anonimizador_Ner.txt
54741b9 verified
raw
history blame
816 Bytes
from transformers import pipeline, AutoModelForTokenClassification, AutoTokenizer
class AnonymizationPipeline:
def __init__(self, model_name):
self.model = AutoModelForTokenClassification.from_pretrained("JonathanEGP/Beto_Ner")
self.tokenizer = AutoTokenizer.from_pretrained("JonathanEGP/Beto_Ner")
self.ner_pipeline = pipeline("ner", model=self.model, tokenizer=self.tokenizer)
def anonymize(self, text):
entities = self.ner_pipeline(text)
entities.sort(key=lambda x: x['end'], reverse=True)
for entity in entities:
start = entity['start']
end = entity['end']
entity_type = entity['entity']
text = text[:start] + f"[{entity_type}]" + text[end:]
return text