patrickvonplaten commited on
Commit
7a9344a
·
1 Parent(s): f5c87bd
config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation_dropout": 0.0,
3
+ "activation_function": "relu",
4
+ "architectures": [
5
+ "OPTModel"
6
+ ],
7
+ "attention_dropout": 0.0,
8
+ "bos_token_id": 0,
9
+ "d_model": 4096,
10
+ "decoder_layernorm": false,
11
+ "decoder_start_token_id": 2,
12
+ "dropout": 0.1,
13
+ "eos_token_id": 2,
14
+ "ffn_dim": 16384,
15
+ "init_std": 0.02,
16
+ "layerdrop": 0.0,
17
+ "max_position_embeddings": 2048,
18
+ "model_type": "opt",
19
+ "num_attention_heads": 32,
20
+ "num_hidden_layers": 32,
21
+ "output_projection": true,
22
+ "pad_token_id": 1,
23
+ "scale_embedding": false,
24
+ "share_input_output_embed": true,
25
+ "torch_dtype": "float16",
26
+ "transformers_version": "4.19.0.dev0",
27
+ "use_cache": false,
28
+ "vocab_size": 50272,
29
+ "word_embed_proj_dim": 4096
30
+ }
model/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model/special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "eos_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "unk_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}}
model/tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"errors": "replace", "unk_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "bos_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "eos_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "add_prefix_space": false, "tokenizer_class": "GPT2Tokenizer"}
model/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
run.sh CHANGED
@@ -1,2 +1,2 @@
1
  #!/usr/bin/env bash
2
- CUDA_VISIBLE_DEVICES="0" torchrun run_model.py --pipeline-model-parallel-size 1 --tensor-model-parallel-size 1
 
1
  #!/usr/bin/env bash
2
+ CUDA_VISIBLE_DEVICES="0,3" torchrun run_model.py --pipeline-model-parallel-size 1 --tensor-model-parallel-size 1
run_model.py CHANGED
@@ -1,9 +1,9 @@
1
  #!/usr/bin/env python3
2
- #!/usr/bin/env python3
3
  import os
4
  from transformers import AutoTokenizer, GPT2Tokenizer
5
  from megatron.initialize import initialize_megatron
6
  from metaseq import checkpoint_utils
 
7
  import torch
8
 
9
  path = "./model"
@@ -34,32 +34,56 @@ checkpoint = checkpoint_utils.load_model_ensemble_and_task(
34
  )
35
 
36
  model = checkpoint[0][0].eval()
37
- model = model.cuda().half()
 
 
38
 
39
 
40
  # forward passes
41
  def single_batch_forward_logits(prompts):
42
  input_ids = tokenizer(prompts, return_tensors="pt").input_ids
43
  input_ids = torch.cat([torch.tensor([[0]]), input_ids], dim=-1)
44
- input_ids = input_ids.cuda()
45
  with torch.no_grad():
46
  logits = model(input_ids)[0]
47
  return logits
48
 
 
 
 
 
 
 
 
 
 
49
  prompts = [
50
- "Today is a beautiful day and I want to",
51
- "In the city of",
52
- "Paris is the capital of France and",
53
- "Computers and mobile phones have taken",
54
  ]
55
 
 
 
 
 
 
56
  print("Next word generation")
57
  for prompt in prompts:
58
  print("-------------")
59
  print(f"Prompt: {prompt}...\n")
60
- logits = single_batch_forward_logits(prompt)
 
 
 
 
 
 
61
  pred_next_token = torch.argmax(logits[0, -1], -1)
62
  next_token = tokenizer.convert_ids_to_tokens([pred_next_token])
63
  next_token = next_token[0].replace("Ġ", "")
64
  print(f"Next word: {next_token}")
65
  print("-------------")
 
 
 
1
  #!/usr/bin/env python3
 
2
  import os
3
  from transformers import AutoTokenizer, GPT2Tokenizer
4
  from megatron.initialize import initialize_megatron
5
  from metaseq import checkpoint_utils
6
+ from transformers import OPTForCausalLM
7
  import torch
8
 
9
  path = "./model"
 
34
  )
35
 
36
  model = checkpoint[0][0].eval()
37
+ model = model.to("cuda:0").half()
38
+
39
+ hf_model = OPTForCausalLM.from_pretrained("../opt-6.7b").to("cuda:1").half()
40
 
41
 
42
  # forward passes
43
  def single_batch_forward_logits(prompts):
44
  input_ids = tokenizer(prompts, return_tensors="pt").input_ids
45
  input_ids = torch.cat([torch.tensor([[0]]), input_ids], dim=-1)
46
+ input_ids = input_ids.to("cuda:0")
47
  with torch.no_grad():
48
  logits = model(input_ids)[0]
49
  return logits
50
 
51
+ # forward hf
52
+ def forward_hf(prompts):
53
+ input_ids = tokenizer(prompts, return_tensors="pt").input_ids
54
+ input_ids = torch.cat([torch.tensor([[0]]), input_ids], dim=-1)
55
+ input_ids = input_ids.to("cuda:1")
56
+ with torch.no_grad():
57
+ logits = hf_model(input_ids)[0]
58
+ return logits
59
+
60
  prompts = [
61
+ "Today is a beautiful day and I want to",
62
+ "In the city of",
63
+ "Paris is the capital of France and",
64
+ "Computers and mobile phones have taken",
65
  ]
66
 
67
+ prompts = [
68
+ "Today is a beautiful day and I want to",
69
+ ]
70
+
71
+ #import ipdb; ipdb.set_trace()
72
  print("Next word generation")
73
  for prompt in prompts:
74
  print("-------------")
75
  print(f"Prompt: {prompt}...\n")
76
+ logits_fsq = single_batch_forward_logits(prompt)
77
+ pred_next_token = torch.argmax(logits_fsq[0, -1], -1)
78
+ next_token = tokenizer.convert_ids_to_tokens([pred_next_token])
79
+ next_token = next_token[0].replace("Ġ", "")
80
+ print(f"Next word: {next_token}")
81
+ print("-------------")
82
+ logits = forward_hf(prompt)
83
  pred_next_token = torch.argmax(logits[0, -1], -1)
84
  next_token = tokenizer.convert_ids_to_tokens([pred_next_token])
85
  next_token = next_token[0].replace("Ġ", "")
86
  print(f"Next word: {next_token}")
87
  print("-------------")
88
+
89
+ torch.allclose(logits_fsq.cpu(), logits.cpu(), atol=1e-3)