JonathanEGP commited on
Commit
54741b9
·
verified ·
1 Parent(s): c8c9cbd

Upload Anonimizador_Ner.txt

Browse files
Files changed (1) hide show
  1. Anonimizador_Ner.txt +20 -0
Anonimizador_Ner.txt ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import pipeline, AutoModelForTokenClassification, AutoTokenizer
2
+
3
+ class AnonymizationPipeline:
4
+ def __init__(self, model_name):
5
+ self.model = AutoModelForTokenClassification.from_pretrained("JonathanEGP/Beto_Ner")
6
+ self.tokenizer = AutoTokenizer.from_pretrained("JonathanEGP/Beto_Ner")
7
+ self.ner_pipeline = pipeline("ner", model=self.model, tokenizer=self.tokenizer)
8
+
9
+ def anonymize(self, text):
10
+ entities = self.ner_pipeline(text)
11
+
12
+ entities.sort(key=lambda x: x['end'], reverse=True)
13
+
14
+ for entity in entities:
15
+ start = entity['start']
16
+ end = entity['end']
17
+ entity_type = entity['entity']
18
+ text = text[:start] + f"[{entity_type}]" + text[end:]
19
+
20
+ return text