adenhaus
/

mt5-large-stata

Text2Text Generation

Model card Files Files and versions Community

adenhaus commited on Feb 21, 2024

Commit

d6feb95

·

verified ·

1 Parent(s): e721459

Update README.md

Files changed (1) hide show

README.md +6 -6

README.md CHANGED Viewed

@@ -36,9 +36,10 @@ It achieves an RMSE loss of 0.32 on the dev split, and a Pearson correlation of
 from transformers import MT5ForConditionalGeneration, MT5Tokenizer
 import torch
-model_path = 'adenhaus/mt5-large-stata'
 tokenizer = MT5Tokenizer.from_pretrained(model_path)
 model = MT5ForConditionalGeneration.from_pretrained(model_path)
 class RegressionLogitsProcessor(torch.nn.Module):
     def __init__(self, extra_token_id):
@@ -53,8 +54,6 @@ def preprocess_inference_input(input_text):
     input_encoded = tokenizer(input_text, return_tensors='pt')
     return input_encoded
-unused_token = "<extra_id_1>"
 def sigmoid(x):
     return 1 / (1 + torch.exp(-x))
@@ -74,10 +73,11 @@ def do_regression(input_str):
   # Extract the logit
   unused_token_id = tokenizer.get_vocab()[unused_token]
   regression_logit = output_sequences.scores[0][0][unused_token_id]
   regression_score = sigmoid(regression_logit).item()
   return regression_score
-print(do_regression("Vaccination Coverage by Province | Percent of children age 12-23 months who received all basic vaccinations | (Angola, 31) (Cabinda, 38) (Zaire, 38) (Uige, 15) (Bengo, 24) (Cuanza Norte, 30) (Luanda, 50) (Malanje, 38) (Lunda Norte, 21) (Cuanza Sul, 19) (Lunda Sul, 21) (Benguela, 26) (Huambo, 26) (Bié, 10) (Moxico, 10) (Namibe, 30) (Huíla, 23) (Cunene, 40) (Cuando Cubango, 8) [output] Three in ten children age 12-23 months received all basic vaccinations—one dose each of BCG and measles and three doses each of DPT-containing vaccine and polio."))
 ```

 from transformers import MT5ForConditionalGeneration, MT5Tokenizer
 import torch
+model_path = 'adenhaus/mt5-small-stata'
 tokenizer = MT5Tokenizer.from_pretrained(model_path)
 model = MT5ForConditionalGeneration.from_pretrained(model_path)
+unused_token = "<extra_id_1>"
 class RegressionLogitsProcessor(torch.nn.Module):
     def __init__(self, extra_token_id):
     input_encoded = tokenizer(input_text, return_tensors='pt')
     return input_encoded
 def sigmoid(x):
     return 1 / (1 + torch.exp(-x))
   # Extract the logit
   unused_token_id = tokenizer.get_vocab()[unused_token]
   regression_logit = output_sequences.scores[0][0][unused_token_id]
   regression_score = sigmoid(regression_logit).item()
   return regression_score
+source_table = "Vaccination Coverage by Province | Percent of children age 12-23 months who received all basic vaccinations | (Angola, 31) (Cabinda, 38) (Zaire, 38) (Uige, 15) (Bengo, 24) (Cuanza Norte, 30) (Luanda, 50) (Malanje, 38) (Lunda Norte, 21) (Cuanza Sul, 19) (Lunda Sul, 21) (Benguela, 26) (Huambo, 26) (Bié, 10) (Moxico, 10) (Namibe, 30) (Huíla, 23) (Cunene, 40) (Cuando Cubango, 8"
+output = "Three in ten children age 12-23 months received all basic vaccinations—one dose each of BCG and measles and three doses each of DPT-containing vaccine and polio."
+print(do_regression(source_table + " [output] " + output))
 ```