File size: 1,914 Bytes
7f4d207 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 |
#!/usr/bin/env python3
#!/usr/bin/env python3
import os
from transformers import AutoTokenizer, GPT2Tokenizer
from megatron.initialize import initialize_megatron
from metaseq import checkpoint_utils
import torch
path = "./model"
# just need to initialize args with something,
# => doesn't need to correspond to the "correct" architecture for this checkpoint
initialize_megatron(args_defaults={
"micro_batch_size": 1,
"num_layers": 12,
"hidden_size": 768,
"num_attention_heads": 12,
"max_position_embeddings": 2048,
"encoder_seq_length": 2048
})
vocab_file = os.path.join(path, "gpt2-vocab.json")
merges_file = os.path.join(path, "gpt2-merges.txt")
tokenizer = GPT2Tokenizer(vocab_file, merges_file)
tokenizer.save_pretrained(path)
checkpoint = checkpoint_utils.load_model_ensemble_and_task(
[os.path.join(path, "restored.pt")],
arg_overrides={
"vocab_filename": vocab_file,
"merges_filename": merges_file,
}
)
model = checkpoint[0][0].eval()
model = model.cuda().half()
# forward passes
def single_batch_forward_logits(prompts):
input_ids = tokenizer(prompts, return_tensors="pt").input_ids
input_ids = torch.cat([torch.tensor([[0]]), input_ids], dim=-1)
input_ids = input_ids.cuda()
with torch.no_grad():
logits = model(input_ids)[0]
return logits
prompts = [
"Today is a beautiful day and I want to",
"In the city of",
"Paris is the capital of France and",
"Computers and mobile phones have taken",
]
print("Next word generation")
for prompt in prompts:
print("-------------")
print(f"Prompt: {prompt}...\n")
logits = single_batch_forward_logits(prompt)
pred_next_token = torch.argmax(logits[0, -1], -1)
next_token = tokenizer.convert_ids_to_tokens([pred_next_token])
next_token = next_token[0].replace("Ġ", "")
print(f"Next word: {next_token}")
print("-------------")
|