BAAI
/

shunxing1234 commited on
Commit
b7b756d
·
1 Parent(s): 7034db9

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +39 -37
README.md CHANGED
@@ -73,41 +73,40 @@ with torch.no_grad():
73
  usning [NBCE](https://github.com/bojone/NBCE/tree/main) Inference
74
 
75
  ```python
76
- import json
77
  import torch
78
  from transformers import AutoTokenizer
79
- from transformers import AutoModelForCausalLM
80
  from transformers import TopPLogitsWarper, LogitsProcessorList
81
- import pdb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
 
83
  # load tokenizer
 
84
  tokenizer = AutoTokenizer.from_pretrained(model_path)
85
- tokenizer.padding_side = 'left'
86
  tokenizer.pad_token = tokenizer.unk_token
87
 
88
  # load Aquila model
89
- model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=torch.float16)
90
  device = torch.device('cuda')
91
  model.to(device)
92
- # load example Context
93
- from cyg_conversation import default_conversation
94
-
95
- conv = default_conversation.copy()
96
- contexts = json.load(open('code_text_2.json'))
97
-
98
- question = "请解释这段程序的功能:"
99
- batch = []
100
- conv.append_message(conv.roles[0], question)
101
- conv.append_message(conv.roles[1], None)
102
- batch.append(conv.get_prompt())
103
- # concat context and question
104
- for ci,context in enumerate(contexts):
105
- conv1 = default_conversation.copy()
106
- conv1.append_message(conv.roles[0], context+question)
107
- conv1.append_message(conv.roles[1], None)
108
- batch.append(conv1.get_prompt())
109
- print('Context长度分布:', [len(text) for text in batch])
110
- print('Context总长度:', sum([len(text) for text in batch]))
111
 
112
  # Top-P
113
  processors = LogitsProcessorList()
@@ -115,17 +114,17 @@ processors.append(TopPLogitsWarper(0.95))
115
 
116
  # Copied from https://github.com/bojone/NBCE/blob/main/test.py#L51-L106
117
  @torch.inference_mode()
118
- def generate(max_tokens):
119
- """Naive Bayes-based Context Extension example code
120
  """
121
  inputs = tokenizer(batch, padding='longest', return_tensors='pt').to(device)
122
  input_ids = inputs.input_ids
123
  attention_mask = inputs.attention_mask
124
-
125
- print('input_ids', input_ids.shape)
126
  past_key_values = None
127
  n = input_ids.shape[0]
128
-
129
  for i in range(max_tokens):
130
  # model output
131
  outputs = model(input_ids=input_ids,
@@ -135,7 +134,7 @@ def generate(max_tokens):
135
  past_key_values=past_key_values
136
  )
137
  past_key_values = outputs.past_key_values
138
-
139
  # ===== NBCE core code starts =====
140
  beta, eta = 0.25, 0.1
141
  logits = outputs.logits[:, -1]
@@ -150,27 +149,30 @@ def generate(max_tokens):
150
  logits_merged = (1 + beta) * logits_max - beta * logits_uncond
151
  logits = torch.where(logits_uncond > -100, logits_merged, logits_max)
152
  # ===== NBCE core code ends =====
153
-
154
  # Building a distribution and sampling
155
  # tau = 1 is standard random sampling,tau->0 is greedy search
156
  # For simplicity, top-k and top-p truncation are not implemented here.
157
  tau = 0.01
158
  probas = torch.nn.functional.softmax(logits[None] / tau , dim=-1)
159
- next_tokens = torch.multinomial(probas, num_samples=1).squeeze(1)
160
  if next_tokens[0] == tokenizer.eos_token_id:
161
  break
162
-
163
  ret = tokenizer.batch_decode(next_tokens)
164
  print(ret[0], flush=True, end='')
165
-
166
  # prepare for next iteration
167
  input_ids = next_tokens.unsqueeze(-1).tile(n, 1)
168
- attention_mask = torch.cat([attention_mask, torch.ones(n, 1, dtype=torch.long, device=device)], dim=-1)
169
 
170
 
171
  if __name__ == '__main__':
172
- generate(1000)
173
-
 
 
 
174
  ```
175
 
176
  ## License
 
73
  usning [NBCE](https://github.com/bojone/NBCE/tree/main) Inference
74
 
75
  ```python
76
+ import json, jsonlines
77
  import torch
78
  from transformers import AutoTokenizer
79
+ from transformers import AquilaForCausalLM
80
  from transformers import TopPLogitsWarper, LogitsProcessorList
81
+
82
+ from cyg_conversation import default_conversation
83
+ def preprocess(text, question="回答:"):
84
+ tmp=""
85
+ import json
86
+ contexts = []
87
+ conv = default_conversation.copy()
88
+ conv.append_message(conv.roles[0], ""+question)
89
+ conv.append_message(conv.roles[1], None)
90
+ contexts.append(conv.get_prompt())
91
+ for pos in range(0,len(text),1024):
92
+ conv1 = default_conversation.copy()
93
+ conv1.append_message(conv1.roles[0], text[pos:min(pos + 1024, len(text))] + question)
94
+ conv1.append_message(conv1.roles[1], None)
95
+ contexts.append(conv1.get_prompt())
96
+ print('Context长度分布:', [len(text) for text in contexts])
97
+ print('Context总长度:', sum([len(text) for text in contexts]))
98
+ return contexts
99
 
100
  # load tokenizer
101
+ model_path = "checkpoints/hf_weight"
102
  tokenizer = AutoTokenizer.from_pretrained(model_path)
103
+ tokenizer.padding_side = 'left'
104
  tokenizer.pad_token = tokenizer.unk_token
105
 
106
  # load Aquila model
107
+ model = AquilaForCausalLM.from_pretrained(model_path, torch_dtype=torch.float16)
108
  device = torch.device('cuda')
109
  model.to(device)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
 
111
  # Top-P
112
  processors = LogitsProcessorList()
 
114
 
115
  # Copied from https://github.com/bojone/NBCE/blob/main/test.py#L51-L106
116
  @torch.inference_mode()
117
+ def generate(max_tokens, batch):
118
+ """Naive Bayes-based Context Extension 演示代码
119
  """
120
  inputs = tokenizer(batch, padding='longest', return_tensors='pt').to(device)
121
  input_ids = inputs.input_ids
122
  attention_mask = inputs.attention_mask
123
+
124
+ #print('input_ids', input_ids.shape)
125
  past_key_values = None
126
  n = input_ids.shape[0]
127
+
128
  for i in range(max_tokens):
129
  # model output
130
  outputs = model(input_ids=input_ids,
 
134
  past_key_values=past_key_values
135
  )
136
  past_key_values = outputs.past_key_values
137
+
138
  # ===== NBCE core code starts =====
139
  beta, eta = 0.25, 0.1
140
  logits = outputs.logits[:, -1]
 
149
  logits_merged = (1 + beta) * logits_max - beta * logits_uncond
150
  logits = torch.where(logits_uncond > -100, logits_merged, logits_max)
151
  # ===== NBCE core code ends =====
152
+
153
  # Building a distribution and sampling
154
  # tau = 1 is standard random sampling,tau->0 is greedy search
155
  # For simplicity, top-k and top-p truncation are not implemented here.
156
  tau = 0.01
157
  probas = torch.nn.functional.softmax(logits[None] / tau , dim=-1)
158
+ next_tokens = torch.multinomial(probas, num_samples=1).squeeze(1)
159
  if next_tokens[0] == tokenizer.eos_token_id:
160
  break
161
+
162
  ret = tokenizer.batch_decode(next_tokens)
163
  print(ret[0], flush=True, end='')
164
+
165
  # prepare for next iteration
166
  input_ids = next_tokens.unsqueeze(-1).tile(n, 1)
167
+ attention_mask = torch.cat([attention_mask, torch.ones(n, 1, dtype=torch.long, device=device)], dim=-1)
168
 
169
 
170
  if __name__ == '__main__':
171
+ count = 0
172
+ with open("/data2/gaokao_chinese_dataset.jsonl",'r') as f:
173
+ for item in jsonlines.Reader(f):
174
+ batch = preprocess(item['prompt'],question=item['question'])
175
+ generate(10, batch)
176
  ```
177
 
178
  ## License