from transformers import pipeline, AutoModelForTokenClassification, AutoTokenizer

class AnonymizationPipeline:
    def __init__(self, model_name):
        self.model = AutoModelForTokenClassification.from_pretrained("JonathanEGP/Beto_Ner")
        self.tokenizer = AutoTokenizer.from_pretrained("JonathanEGP/Beto_Ner")
        self.ner_pipeline = pipeline("ner", model=self.model, tokenizer=self.tokenizer)
    
    def anonymize(self, text):
        entities = self.ner_pipeline(text)
        
        entities.sort(key=lambda x: x['end'], reverse=True)
 
        for entity in entities:
            start = entity['start']
            end = entity['end']
            entity_type = entity['entity']
            text = text[:start] + f"[{entity_type}]" + text[end:]
        
        return text