File size: 1,367 Bytes
8fbe3ce
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
from transformers import AutoTokenizer, AutoModelForMaskedLM
import torch

# Load the model and tokenizer
tokenizer = AutoTokenizer.from_pretrained("jfernandez/cebfil-roberta")
model = AutoModelForMaskedLM.from_pretrained("jfernandez/cebfil-roberta")

# Define a function to generate responses
def generate_response(text):
  # Add a mask token at the end of the text
  text = text + " <mask>"
  # Tokenize the text and get the input ids
  inputs = tokenizer(text, return_tensors="pt")
  input_ids = inputs["input_ids"]
  # Get the logits from the model
  outputs = model(**inputs)
  logits = outputs.logits
  # Get the most likely token id for the mask
  mask_token_id = tokenizer.mask_token_id
  mask_token_index = torch.where(input_ids == mask_token_id)[1]
  token_logits = logits[0, mask_token_index, :]
  top_5_tokens = torch.topk(token_logits, k=5).indices # get top 5 tokens
  predicted_tokens = tokenizer.convert_ids_to_tokens(top_5_tokens) # convert ids to tokens
  
  # Choose one of the predicted tokens randomly and replace the mask with it
  import random
  response_token = random.choice(predicted_tokens)
  response_text = text.replace("<mask>", response_token)
  
  return response_text

# Test the function with some examples
print(generate_response("Komosta ka"))
print(generate_response("Unsa imong pangalan"))
print(generate_response("Salamat sa"))