Spaces:
Runtime error
Runtime error
File size: 471 Bytes
154ca7b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 |
from transformers import BertTokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
# Parameters preparation.
MAX_SENT_LENGTH = 128
PAD_TOKEN_ID = tokenizer.convert_tokens_to_ids(tokenizer.pad_token)
def normalize_v2(text, entity):
text = text.lower()
entity = entity.lower()
if entity not in text:
return text
text = text.replace(entity, tokenizer.mask_token) # TODO: not sure if this will be decoded by BERT.
return text |