use fastchat conversations template (#578)
Browse files* use fastchat conversations template
* require fastchat (fschat) pip install
* handle roles dynamically from conversation
* tweak fastchat conversation with a monkeypatch to get individual turns
* fix up so it works with multiple conversation styles, and don't strip the turns
* fix sharegpt fixture now that we're using a more correct tokenization
* use a new prompter and support fastchat conversation type
* use sharegpt from prompt strategies now
* update docs, add chatml template
* add a newline after im_end token
* ensure we correctly set system message
* update per PR feedback to handle deprecated sharegpt types
* don't add duplicate wandb req
* make sharegpt fields configurable from yml
* llama2 fixes
* don't fail fatally when turns are improper
- README.md +4 -3
- requirements.txt +1 -0
- src/axolotl/monkeypatch/fastchat_conversation_turns.py +174 -0
- src/axolotl/prompt_strategies/{sharegpt_simple.py → sharegpt.py} +28 -5
- src/axolotl/prompt_strategies/sharegpt_jokes.py +2 -2
- src/axolotl/prompt_tokenizers.py +17 -7
- src/axolotl/prompters.py +51 -80
- src/axolotl/utils/config.py +19 -0
- src/axolotl/utils/data.py +0 -11
- src/axolotl/utils/tokenization.py +1 -0
- tests/fixtures/conversation.tokenized.json +1 -1
- tests/test_prompt_tokenizers.py +3 -3
- tests/test_validation.py +23 -0
README.md
CHANGED
@@ -180,7 +180,7 @@ Have dataset(s) in one of the following format (JSONL recommended):
|
|
180 |
```json
|
181 |
{"instruction": "...", "input": "...", "output": "..."}
|
182 |
```
|
183 |
-
- `sharegpt
|
184 |
```json
|
185 |
{"conversations": [{"from": "...", "value": "..."}]}
|
186 |
```
|
@@ -269,11 +269,11 @@ Have dataset(s) in one of the following format (JSONL recommended):
|
|
269 |
```json
|
270 |
{"prompt": "...", "generation": "..."}
|
271 |
```
|
272 |
-
- `
|
273 |
```json
|
274 |
{"conversations": [{"role": "...", "value": "..."}]}
|
275 |
```
|
276 |
-
- `
|
277 |
```json
|
278 |
{"conversations": [{"from": "...", "value": "..."}]}
|
279 |
```
|
@@ -443,6 +443,7 @@ datasets:
|
|
443 |
data_files: # Optional[str] path to source data files
|
444 |
shards: # Optional[int] number of shards to split data into
|
445 |
name: # Optional[str] name of dataset configuration to load
|
|
|
446 |
|
447 |
# custom user prompt
|
448 |
- path: repo
|
|
|
180 |
```json
|
181 |
{"instruction": "...", "input": "...", "output": "..."}
|
182 |
```
|
183 |
+
- `sharegpt`: conversations where `from` is `human`/`gpt`
|
184 |
```json
|
185 |
{"conversations": [{"from": "...", "value": "..."}]}
|
186 |
```
|
|
|
269 |
```json
|
270 |
{"prompt": "...", "generation": "..."}
|
271 |
```
|
272 |
+
- `sharegpt.load_role`: conversations where `role` is used instead of `from`
|
273 |
```json
|
274 |
{"conversations": [{"role": "...", "value": "..."}]}
|
275 |
```
|
276 |
+
- `sharegpt.load_guanaco`: conversations where `from` is `prompter`/`assistant` instead of default sharegpt
|
277 |
```json
|
278 |
{"conversations": [{"from": "...", "value": "..."}]}
|
279 |
```
|
|
|
443 |
data_files: # Optional[str] path to source data files
|
444 |
shards: # Optional[int] number of shards to split data into
|
445 |
name: # Optional[str] name of dataset configuration to load
|
446 |
+
conversation: # Optional[str] fastchat conversation type, only used with type: sharegpt
|
447 |
|
448 |
# custom user prompt
|
449 |
- path: repo
|
requirements.txt
CHANGED
@@ -31,3 +31,4 @@ scipy
|
|
31 |
scikit-learn==1.2.2
|
32 |
pynvml
|
33 |
art
|
|
|
|
31 |
scikit-learn==1.2.2
|
32 |
pynvml
|
33 |
art
|
34 |
+
fschat==0.2.29
|
src/axolotl/monkeypatch/fastchat_conversation_turns.py
ADDED
@@ -0,0 +1,174 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
monkeypatch to add a get_turns method
|
3 |
+
"""
|
4 |
+
|
5 |
+
import logging
|
6 |
+
from typing import Generator, Tuple
|
7 |
+
|
8 |
+
from fastchat.conversation import SeparatorStyle
|
9 |
+
|
10 |
+
LOG = logging.getLogger("axolotl.monkeypatch.fastchat_conversation_turns")
|
11 |
+
|
12 |
+
|
13 |
+
def get_prompt(self) -> str:
|
14 |
+
ret = ""
|
15 |
+
for role, msg in self.get_turns():
|
16 |
+
ret += role + msg
|
17 |
+
return ret
|
18 |
+
|
19 |
+
|
20 |
+
def get_turns( # pylint: disable=too-many-return-statements
|
21 |
+
self,
|
22 |
+
) -> Generator[Tuple[str, str], None, None]:
|
23 |
+
"""Get the prompt for generation."""
|
24 |
+
system_prompt = self.system_template.format(system_message=self.system_message)
|
25 |
+
if self.sep_style == SeparatorStyle.ADD_COLON_SINGLE:
|
26 |
+
yield "", system_prompt + self.sep
|
27 |
+
for role, message in self.messages:
|
28 |
+
if message:
|
29 |
+
yield role + ": ", message + self.sep
|
30 |
+
else:
|
31 |
+
yield role + ":", ""
|
32 |
+
return
|
33 |
+
if self.sep_style == SeparatorStyle.ADD_COLON_TWO:
|
34 |
+
seps = [self.sep, self.sep2]
|
35 |
+
yield "", system_prompt + seps[0]
|
36 |
+
for i, (role, message) in enumerate(self.messages):
|
37 |
+
if message:
|
38 |
+
yield role + ": ", message + seps[i % 2]
|
39 |
+
else:
|
40 |
+
yield role + ":", ""
|
41 |
+
return
|
42 |
+
if self.sep_style == SeparatorStyle.ADD_COLON_SPACE_SINGLE:
|
43 |
+
yield "", system_prompt + self.sep
|
44 |
+
for role, message in self.messages:
|
45 |
+
if message:
|
46 |
+
yield role + ": ", message + self.sep
|
47 |
+
else:
|
48 |
+
yield role + ": ", "" # must be end with a space
|
49 |
+
return
|
50 |
+
if self.sep_style == SeparatorStyle.ADD_NEW_LINE_SINGLE:
|
51 |
+
yield "", "" if system_prompt == "" else system_prompt + self.sep
|
52 |
+
for role, message in self.messages:
|
53 |
+
if message:
|
54 |
+
yield role + "\n", message + self.sep
|
55 |
+
else:
|
56 |
+
yield role + "\n", ""
|
57 |
+
return
|
58 |
+
if self.sep_style == SeparatorStyle.NO_COLON_SINGLE:
|
59 |
+
yield "", system_prompt
|
60 |
+
for role, message in self.messages:
|
61 |
+
if message:
|
62 |
+
yield role, message + self.sep
|
63 |
+
else:
|
64 |
+
yield role, ""
|
65 |
+
return
|
66 |
+
if self.sep_style == SeparatorStyle.NO_COLON_TWO:
|
67 |
+
seps = [self.sep, self.sep2]
|
68 |
+
yield "", system_prompt
|
69 |
+
for i, (role, message) in enumerate(self.messages):
|
70 |
+
if message:
|
71 |
+
yield role, message + seps[i % 2]
|
72 |
+
else:
|
73 |
+
yield role, ""
|
74 |
+
return
|
75 |
+
if self.sep_style == SeparatorStyle.RWKV:
|
76 |
+
yield "", system_prompt
|
77 |
+
for i, (role, message) in enumerate(self.messages):
|
78 |
+
if message:
|
79 |
+
yield role + ": ", message.replace("\r\n", "\n").replace(
|
80 |
+
"\n\n", "\n"
|
81 |
+
) + "\n\n"
|
82 |
+
else:
|
83 |
+
yield role + ":", ""
|
84 |
+
return
|
85 |
+
if self.sep_style == SeparatorStyle.LLAMA2:
|
86 |
+
seps = [self.sep, self.sep2]
|
87 |
+
if self.system_message:
|
88 |
+
yield "", system_prompt
|
89 |
+
else:
|
90 |
+
yield "", "[INST] "
|
91 |
+
for i, (role, message) in enumerate(self.messages[1:]):
|
92 |
+
if message:
|
93 |
+
yield role + " ", message + seps[i % 2]
|
94 |
+
else:
|
95 |
+
yield role, ""
|
96 |
+
return
|
97 |
+
if self.sep_style == SeparatorStyle.CHATGLM:
|
98 |
+
# source: https://huggingface.co/THUDM/chatglm-6b/blob/1d240ba371910e9282298d4592532d7f0f3e9f3e/modeling_chatglm.py#L1302-L1308
|
99 |
+
# source2: https://huggingface.co/THUDM/chatglm2-6b/blob/e186c891cf64310ac66ef10a87e6635fa6c2a579/modeling_chatglm.py#L926
|
100 |
+
round_add_n = 1 if self.name == "chatglm2" else 0
|
101 |
+
if system_prompt:
|
102 |
+
yield "", system_prompt + self.sep
|
103 |
+
|
104 |
+
for i, (role, message) in enumerate(self.messages):
|
105 |
+
if i % 2 == 0:
|
106 |
+
yield "", f"[Round {i//2 + round_add_n}]{self.sep}"
|
107 |
+
|
108 |
+
if message:
|
109 |
+
yield f"{role}:", f"{message}{self.sep}"
|
110 |
+
else:
|
111 |
+
yield f"{role}:", ""
|
112 |
+
return
|
113 |
+
if self.sep_style == SeparatorStyle.CHATML:
|
114 |
+
yield "", "" if system_prompt == "" else system_prompt + self.sep + "\n"
|
115 |
+
for role, message in self.messages:
|
116 |
+
if message:
|
117 |
+
yield role + "\n", message + self.sep + "\n"
|
118 |
+
else:
|
119 |
+
yield role + "\n", ""
|
120 |
+
return
|
121 |
+
if self.sep_style == SeparatorStyle.CHATINTERN:
|
122 |
+
# source: https://huggingface.co/internlm/internlm-chat-7b-8k/blob/bd546fa984b4b0b86958f56bf37f94aa75ab8831/modeling_internlm.py#L771
|
123 |
+
seps = [self.sep, self.sep2]
|
124 |
+
yield "", system_prompt
|
125 |
+
for i, (role, message) in enumerate(self.messages):
|
126 |
+
prefix = "<s>" if i % 2 == 0 else ""
|
127 |
+
if message:
|
128 |
+
yield prefix + role + ":", message + seps[i % 2] + "\n"
|
129 |
+
else:
|
130 |
+
yield role + ":", ""
|
131 |
+
return
|
132 |
+
if self.sep_style == SeparatorStyle.DOLLY:
|
133 |
+
seps = [self.sep, self.sep2]
|
134 |
+
yield "", system_prompt
|
135 |
+
for i, (role, message) in enumerate(self.messages):
|
136 |
+
if message:
|
137 |
+
suffix = "\n\n" if i % 2 == 1 else ""
|
138 |
+
yield role + ":\n", message + seps[i % 2] + suffix
|
139 |
+
else:
|
140 |
+
yield role + ":\n", ""
|
141 |
+
return
|
142 |
+
if self.sep_style == SeparatorStyle.PHOENIX:
|
143 |
+
yield "", system_prompt
|
144 |
+
for role, message in self.messages:
|
145 |
+
if message:
|
146 |
+
yield role + ": ", "<s>" + message + "</s>"
|
147 |
+
else:
|
148 |
+
yield role + ": " + "<s>", ""
|
149 |
+
return
|
150 |
+
if self.sep_style == SeparatorStyle.ROBIN:
|
151 |
+
yield "", system_prompt + self.sep
|
152 |
+
for role, message in self.messages:
|
153 |
+
if message:
|
154 |
+
yield role + ":\n", message + self.sep
|
155 |
+
else:
|
156 |
+
yield role + ":\n", ""
|
157 |
+
return
|
158 |
+
if self.sep_style == SeparatorStyle.FALCON_CHAT:
|
159 |
+
if self.system_message:
|
160 |
+
yield "", system_prompt + self.sep
|
161 |
+
for role, message in self.messages:
|
162 |
+
if message:
|
163 |
+
yield role + ": ", message + self.sep
|
164 |
+
else:
|
165 |
+
yield role + ":", ""
|
166 |
+
else:
|
167 |
+
raise ValueError(f"Invalid style: {self.sep_style}")
|
168 |
+
|
169 |
+
|
170 |
+
def add_get_turns_to_conversation():
|
171 |
+
import fastchat.conversation
|
172 |
+
|
173 |
+
fastchat.conversation.Conversation.get_turns = get_turns
|
174 |
+
fastchat.conversation.Conversation.get_prompt = get_prompt
|
src/axolotl/prompt_strategies/{sharegpt_simple.py → sharegpt.py}
RENAMED
@@ -1,12 +1,35 @@
|
|
1 |
"""Module containing the SimpleShareGPTPromptTokenizingStrategy class"""
|
|
|
|
|
|
|
2 |
|
3 |
from axolotl.prompt_tokenizers import ShareGPTPromptTokenizingStrategy
|
4 |
-
from axolotl.prompters import
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
|
6 |
|
7 |
-
def load(tokenizer, cfg):
|
|
|
|
|
|
|
|
|
|
|
8 |
return SimpleShareGPTPromptTokenizingStrategy(
|
9 |
-
|
|
|
|
|
|
|
|
|
10 |
tokenizer,
|
11 |
cfg.train_on_inputs,
|
12 |
cfg.sequence_len,
|
@@ -15,7 +38,7 @@ def load(tokenizer, cfg):
|
|
15 |
|
16 |
def load_role(tokenizer, cfg):
|
17 |
return SimpleRoleShareGPTPromptTokenizingStrategy(
|
18 |
-
|
19 |
tokenizer,
|
20 |
cfg.train_on_inputs,
|
21 |
cfg.sequence_len,
|
@@ -24,7 +47,7 @@ def load_role(tokenizer, cfg):
|
|
24 |
|
25 |
def load_guanaco(tokenizer, cfg):
|
26 |
return GuanacoShareGPTPromptTokenizingStrategy(
|
27 |
-
|
28 |
tokenizer,
|
29 |
cfg.train_on_inputs,
|
30 |
cfg.sequence_len,
|
|
|
1 |
"""Module containing the SimpleShareGPTPromptTokenizingStrategy class"""
|
2 |
+
from typing import Any, Dict, Optional
|
3 |
+
|
4 |
+
from fastchat.conversation import Conversation, SeparatorStyle, register_conv_template
|
5 |
|
6 |
from axolotl.prompt_tokenizers import ShareGPTPromptTokenizingStrategy
|
7 |
+
from axolotl.prompters import ShareGPTPrompterV2
|
8 |
+
|
9 |
+
register_conv_template(
|
10 |
+
Conversation(
|
11 |
+
name="chatml",
|
12 |
+
system_template="<|im_start|>system\n{system_message}",
|
13 |
+
system_message="You are a helpful assistant.",
|
14 |
+
roles=["<|im_start|>user", "<|im_start|>assistant"],
|
15 |
+
sep_style=SeparatorStyle.CHATML,
|
16 |
+
sep="<|im_end|>\n",
|
17 |
+
)
|
18 |
+
)
|
19 |
|
20 |
|
21 |
+
def load(tokenizer, cfg, ds_cfg: Optional[Dict[str, Any]] = None):
|
22 |
+
conversation = (
|
23 |
+
ds_cfg["conversation"] if ds_cfg and "conversation" in ds_cfg else None
|
24 |
+
)
|
25 |
+
field_human = ds_cfg["field_human"] if ds_cfg and "field_human" in ds_cfg else None
|
26 |
+
field_model = ds_cfg["field_model"] if ds_cfg and "field_model" in ds_cfg else None
|
27 |
return SimpleShareGPTPromptTokenizingStrategy(
|
28 |
+
ShareGPTPrompterV2(
|
29 |
+
conversation=conversation,
|
30 |
+
role_key_model=field_model,
|
31 |
+
role_key_human=field_human,
|
32 |
+
),
|
33 |
tokenizer,
|
34 |
cfg.train_on_inputs,
|
35 |
cfg.sequence_len,
|
|
|
38 |
|
39 |
def load_role(tokenizer, cfg):
|
40 |
return SimpleRoleShareGPTPromptTokenizingStrategy(
|
41 |
+
ShareGPTPrompterV2(),
|
42 |
tokenizer,
|
43 |
cfg.train_on_inputs,
|
44 |
cfg.sequence_len,
|
|
|
47 |
|
48 |
def load_guanaco(tokenizer, cfg):
|
49 |
return GuanacoShareGPTPromptTokenizingStrategy(
|
50 |
+
ShareGPTPrompterV2(),
|
51 |
tokenizer,
|
52 |
cfg.train_on_inputs,
|
53 |
cfg.sequence_len,
|
src/axolotl/prompt_strategies/sharegpt_jokes.py
CHANGED
@@ -1,11 +1,11 @@
|
|
1 |
"""Module for Jokes prompts using sharegpt style """
|
2 |
from axolotl.prompt_tokenizers import ShareGPTPromptTokenizingStrategy
|
3 |
-
from axolotl.prompters import
|
4 |
|
5 |
|
6 |
def load(tokenizer, cfg):
|
7 |
return SimpleJokesShareGPTPromptTokenizingStrategy(
|
8 |
-
|
9 |
tokenizer,
|
10 |
cfg.train_on_inputs,
|
11 |
cfg.sequence_len,
|
|
|
1 |
"""Module for Jokes prompts using sharegpt style """
|
2 |
from axolotl.prompt_tokenizers import ShareGPTPromptTokenizingStrategy
|
3 |
+
from axolotl.prompters import ShareGPTPrompterV2
|
4 |
|
5 |
|
6 |
def load(tokenizer, cfg):
|
7 |
return SimpleJokesShareGPTPromptTokenizingStrategy(
|
8 |
+
ShareGPTPrompterV2(),
|
9 |
tokenizer,
|
10 |
cfg.train_on_inputs,
|
11 |
cfg.sequence_len,
|
src/axolotl/prompt_tokenizers.py
CHANGED
@@ -6,8 +6,12 @@ import functools
|
|
6 |
import logging
|
7 |
from typing import Dict, List, Tuple, Union
|
8 |
|
|
|
9 |
from transformers import BatchEncoding, PreTrainedTokenizer
|
10 |
|
|
|
|
|
|
|
11 |
from axolotl.prompters import IGNORE_TOKEN_ID
|
12 |
|
13 |
LOG = logging.getLogger("axolotl")
|
@@ -18,6 +22,8 @@ LLAMA_DEFAULT_EOS_TOKEN = "</s>" # nosec
|
|
18 |
LLAMA_DEFAULT_BOS_TOKEN = "<s>" # nosec
|
19 |
LLAMA_DEFAULT_UNK_TOKEN = "<unk>" # nosec
|
20 |
|
|
|
|
|
21 |
|
22 |
class InvalidDataException(Exception):
|
23 |
"""
|
@@ -352,18 +358,21 @@ class ShareGPTPromptTokenizingStrategy(PromptTokenizingStrategy):
|
|
352 |
result, current_len = tokenize_prompt_default()
|
353 |
user_token = self._get_user_token()
|
354 |
assistant_token = self._get_assistant_token()
|
|
|
|
|
|
|
355 |
try:
|
356 |
for _, part in enumerate(
|
357 |
self.prompter.build_prompt(self.get_conversation_thread(prompt))
|
358 |
):
|
359 |
if isinstance(part, tuple):
|
360 |
-
if
|
361 |
turn = part[0] + part[1] if not user_token else part[1]
|
362 |
# this is still the user query, we should
|
363 |
if not part[1].strip():
|
364 |
LOG.warning(f"user turn has empty text: {prompt}")
|
365 |
res = self._tokenize(
|
366 |
-
turn
|
367 |
add_eos_token=False,
|
368 |
strip_bos_token=True,
|
369 |
)
|
@@ -371,14 +380,14 @@ class ShareGPTPromptTokenizingStrategy(PromptTokenizingStrategy):
|
|
371 |
res["input_ids"] = [user_token, *res["input_ids"]]
|
372 |
# everything from this is masked out from the labels
|
373 |
labels = [IGNORE_TOKEN_ID] * len(res["input_ids"])
|
374 |
-
elif part[0]
|
375 |
# TODO label assistant token/tokens w/ IGNORE_TOKEN_ID
|
376 |
turn = part[0] + part[1] if not assistant_token else part[1]
|
377 |
# this should be the assistant response, should end with an eos token
|
378 |
if not part[1].strip():
|
379 |
LOG.warning(f"assistant turn has empty text: {prompt}")
|
380 |
res = self._tokenize(
|
381 |
-
turn
|
382 |
add_eos_token=True,
|
383 |
strip_bos_token=True,
|
384 |
)
|
@@ -389,16 +398,17 @@ class ShareGPTPromptTokenizingStrategy(PromptTokenizingStrategy):
|
|
389 |
]
|
390 |
# not masked out from labels
|
391 |
labels = copy.deepcopy(res["input_ids"])
|
392 |
-
elif part[0] == "
|
393 |
-
|
394 |
# this is only ever the first part, should include the bos token and the user query
|
395 |
res = self._tokenize(
|
396 |
-
|
397 |
)
|
398 |
# everything from this is masked out from the labels
|
399 |
labels = [IGNORE_TOKEN_ID] * len(res["input_ids"])
|
400 |
else:
|
401 |
LOG.warning(f"unhandled role: {part[0]}")
|
|
|
402 |
|
403 |
# pylint: disable=duplicate-code
|
404 |
result, current_len = parse_tokenized_to_result(
|
|
|
6 |
import logging
|
7 |
from typing import Dict, List, Tuple, Union
|
8 |
|
9 |
+
from fastchat.conversation import Conversation
|
10 |
from transformers import BatchEncoding, PreTrainedTokenizer
|
11 |
|
12 |
+
from axolotl.monkeypatch.fastchat_conversation_turns import (
|
13 |
+
add_get_turns_to_conversation,
|
14 |
+
)
|
15 |
from axolotl.prompters import IGNORE_TOKEN_ID
|
16 |
|
17 |
LOG = logging.getLogger("axolotl")
|
|
|
22 |
LLAMA_DEFAULT_BOS_TOKEN = "<s>" # nosec
|
23 |
LLAMA_DEFAULT_UNK_TOKEN = "<unk>" # nosec
|
24 |
|
25 |
+
add_get_turns_to_conversation()
|
26 |
+
|
27 |
|
28 |
class InvalidDataException(Exception):
|
29 |
"""
|
|
|
358 |
result, current_len = tokenize_prompt_default()
|
359 |
user_token = self._get_user_token()
|
360 |
assistant_token = self._get_assistant_token()
|
361 |
+
conversation: Conversation = (
|
362 |
+
self.prompter._conversation # pylint: disable=protected-access
|
363 |
+
)
|
364 |
try:
|
365 |
for _, part in enumerate(
|
366 |
self.prompter.build_prompt(self.get_conversation_thread(prompt))
|
367 |
):
|
368 |
if isinstance(part, tuple):
|
369 |
+
if conversation.roles[0] in part[0]:
|
370 |
turn = part[0] + part[1] if not user_token else part[1]
|
371 |
# this is still the user query, we should
|
372 |
if not part[1].strip():
|
373 |
LOG.warning(f"user turn has empty text: {prompt}")
|
374 |
res = self._tokenize(
|
375 |
+
turn,
|
376 |
add_eos_token=False,
|
377 |
strip_bos_token=True,
|
378 |
)
|
|
|
380 |
res["input_ids"] = [user_token, *res["input_ids"]]
|
381 |
# everything from this is masked out from the labels
|
382 |
labels = [IGNORE_TOKEN_ID] * len(res["input_ids"])
|
383 |
+
elif conversation.roles[1] in part[0]:
|
384 |
# TODO label assistant token/tokens w/ IGNORE_TOKEN_ID
|
385 |
turn = part[0] + part[1] if not assistant_token else part[1]
|
386 |
# this should be the assistant response, should end with an eos token
|
387 |
if not part[1].strip():
|
388 |
LOG.warning(f"assistant turn has empty text: {prompt}")
|
389 |
res = self._tokenize(
|
390 |
+
turn,
|
391 |
add_eos_token=True,
|
392 |
strip_bos_token=True,
|
393 |
)
|
|
|
398 |
]
|
399 |
# not masked out from labels
|
400 |
labels = copy.deepcopy(res["input_ids"])
|
401 |
+
elif part[0] == "":
|
402 |
+
turn = part[1]
|
403 |
# this is only ever the first part, should include the bos token and the user query
|
404 |
res = self._tokenize(
|
405 |
+
turn, add_eos_token=False, strip_bos_token=False
|
406 |
)
|
407 |
# everything from this is masked out from the labels
|
408 |
labels = [IGNORE_TOKEN_ID] * len(res["input_ids"])
|
409 |
else:
|
410 |
LOG.warning(f"unhandled role: {part[0]}")
|
411 |
+
continue
|
412 |
|
413 |
# pylint: disable=duplicate-code
|
414 |
result, current_len = parse_tokenized_to_result(
|
src/axolotl/prompters.py
CHANGED
@@ -1,9 +1,10 @@
|
|
1 |
"""Module containing prompters"""
|
2 |
|
3 |
-
import dataclasses
|
4 |
import logging
|
5 |
-
from enum import Enum
|
6 |
-
from typing import Generator,
|
|
|
|
|
7 |
|
8 |
LOG = logging.getLogger("axolotl")
|
9 |
IGNORE_TOKEN_ID = -100
|
@@ -214,53 +215,6 @@ class ReflectAlpacaPrompter:
|
|
214 |
yield res
|
215 |
|
216 |
|
217 |
-
class SeparatorStyle(Enum):
|
218 |
-
"""Different separator style."""
|
219 |
-
|
220 |
-
SINGLE = auto()
|
221 |
-
TWO = auto()
|
222 |
-
DOLLY = auto()
|
223 |
-
|
224 |
-
|
225 |
-
# TODO clean this 💩 up
|
226 |
-
@dataclasses.dataclass
|
227 |
-
class Conversation:
|
228 |
-
"""A class that keeps all conversation history."""
|
229 |
-
|
230 |
-
system: str
|
231 |
-
roles: List[str]
|
232 |
-
messages: List[List[str]]
|
233 |
-
offset: int
|
234 |
-
sep_style: SeparatorStyle = SeparatorStyle.SINGLE
|
235 |
-
sep: str = "###"
|
236 |
-
sep2: Optional[str] = None
|
237 |
-
|
238 |
-
def get_prompt(self) -> Generator[Tuple[str, str], None, None]:
|
239 |
-
# seps = [self.sep, self.sep2]
|
240 |
-
preamble = self.system + self.sep
|
241 |
-
yield ("SYSTEM:", preamble)
|
242 |
-
for _, (role, message) in enumerate(self.messages):
|
243 |
-
if message:
|
244 |
-
yield (role + ":", " " + message)
|
245 |
-
else:
|
246 |
-
LOG.warning(f"role with empty message: {role}")
|
247 |
-
yield (role + ":", "")
|
248 |
-
|
249 |
-
def copy(self):
|
250 |
-
return Conversation(
|
251 |
-
system=self.system,
|
252 |
-
roles=self.roles,
|
253 |
-
messages=[[x, y] for x, y in self.messages],
|
254 |
-
offset=self.offset,
|
255 |
-
sep_style=self.sep_style,
|
256 |
-
sep=self.sep,
|
257 |
-
sep2=self.sep2,
|
258 |
-
)
|
259 |
-
|
260 |
-
def append_message(self, role, message):
|
261 |
-
self.messages.append([role, message])
|
262 |
-
|
263 |
-
|
264 |
SHAREGPT_ASSERTION_FAILED_ROLE = (
|
265 |
"Role did not alternate between turns (gpt and human). Please check your data."
|
266 |
)
|
@@ -271,28 +225,27 @@ class ShareGPTPrompter: # pylint: disable=too-few-public-methods
|
|
271 |
A prompter that generates prompts for the ShareGPT
|
272 |
"""
|
273 |
|
274 |
-
|
275 |
-
|
276 |
-
|
277 |
-
|
278 |
-
|
279 |
-
|
280 |
-
|
281 |
-
|
282 |
-
|
283 |
-
|
284 |
-
|
285 |
-
)
|
286 |
-
|
287 |
-
|
288 |
-
|
289 |
-
|
290 |
-
|
291 |
-
|
292 |
-
|
293 |
-
|
294 |
-
|
295 |
-
)
|
296 |
|
297 |
def build_prompt(self, source) -> Generator[str, None, None]:
|
298 |
if len(source) < 2:
|
@@ -306,17 +259,14 @@ class ShareGPTPrompter: # pylint: disable=too-few-public-methods
|
|
306 |
|
307 |
# Add the conversation system prompt if provided, otherwise use the default one
|
308 |
if source[0]["from"] == "system":
|
309 |
-
conv.
|
310 |
source.pop(0)
|
311 |
|
312 |
-
roles = {
|
313 |
|
314 |
try:
|
315 |
# Apply prompt templates
|
316 |
-
if
|
317 |
-
source[0]["from"] not in roles
|
318 |
-
or roles[source[0]["from"]] != conv.roles[0]
|
319 |
-
):
|
320 |
# Skip the first one if it is not from human
|
321 |
source = source[1:]
|
322 |
except IndexError as err:
|
@@ -326,8 +276,29 @@ class ShareGPTPrompter: # pylint: disable=too-few-public-methods
|
|
326 |
conv.messages = []
|
327 |
for j, sentence in enumerate(source):
|
328 |
role = roles[sentence["from"]]
|
329 |
-
|
|
|
330 |
conv.append_message(role, sentence["value"])
|
331 |
|
332 |
-
for part in conv.
|
|
|
|
|
333 |
yield part
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
"""Module containing prompters"""
|
2 |
|
|
|
3 |
import logging
|
4 |
+
from enum import Enum
|
5 |
+
from typing import Generator, Optional, Union
|
6 |
+
|
7 |
+
from fastchat.conversation import Conversation, get_conv_template
|
8 |
|
9 |
LOG = logging.getLogger("axolotl")
|
10 |
IGNORE_TOKEN_ID = -100
|
|
|
215 |
yield res
|
216 |
|
217 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
218 |
SHAREGPT_ASSERTION_FAILED_ROLE = (
|
219 |
"Role did not alternate between turns (gpt and human). Please check your data."
|
220 |
)
|
|
|
225 |
A prompter that generates prompts for the ShareGPT
|
226 |
"""
|
227 |
|
228 |
+
role_key_human = "human"
|
229 |
+
role_key_model = "gpt"
|
230 |
+
|
231 |
+
def __init__(
|
232 |
+
self,
|
233 |
+
prompt_style=None, # pylint: disable=unused-argument
|
234 |
+
conversation: Optional[Union[str, Conversation]] = None,
|
235 |
+
role_key_human: Optional[str] = None,
|
236 |
+
role_key_model: Optional[str] = None,
|
237 |
+
):
|
238 |
+
if conversation:
|
239 |
+
if isinstance(conversation, Conversation):
|
240 |
+
self._conversation = conversation
|
241 |
+
else:
|
242 |
+
self._conversation = get_conv_template(conversation)
|
243 |
+
else:
|
244 |
+
self._conversation = get_conv_template("vicuna_v1.1")
|
245 |
+
if role_key_human:
|
246 |
+
self.role_key_human = role_key_human
|
247 |
+
if role_key_model:
|
248 |
+
self.role_key_model = role_key_model
|
|
|
249 |
|
250 |
def build_prompt(self, source) -> Generator[str, None, None]:
|
251 |
if len(source) < 2:
|
|
|
259 |
|
260 |
# Add the conversation system prompt if provided, otherwise use the default one
|
261 |
if source[0]["from"] == "system":
|
262 |
+
conv.set_system_message(source[0]["value"])
|
263 |
source.pop(0)
|
264 |
|
265 |
+
roles = {self.role_key_human: conv.roles[0], self.role_key_model: conv.roles[1]}
|
266 |
|
267 |
try:
|
268 |
# Apply prompt templates
|
269 |
+
if source[0]["from"] not in roles:
|
|
|
|
|
|
|
270 |
# Skip the first one if it is not from human
|
271 |
source = source[1:]
|
272 |
except IndexError as err:
|
|
|
276 |
conv.messages = []
|
277 |
for j, sentence in enumerate(source):
|
278 |
role = roles[sentence["from"]]
|
279 |
+
if role != conv.roles[j % 2]:
|
280 |
+
LOG.warning(f"{SHAREGPT_ASSERTION_FAILED_ROLE}: {sentence}")
|
281 |
conv.append_message(role, sentence["value"])
|
282 |
|
283 |
+
for part in conv.get_turns():
|
284 |
+
if part[0] and not part[1]:
|
285 |
+
LOG.warning(f"role with empty message: {part[0]}")
|
286 |
yield part
|
287 |
+
|
288 |
+
|
289 |
+
class ShareGPTPrompterV2(ShareGPTPrompter):
|
290 |
+
"""
|
291 |
+
A V2 prompter that generates prompts for the ShareGPT
|
292 |
+
"""
|
293 |
+
|
294 |
+
def __init__(
|
295 |
+
self,
|
296 |
+
conversation: Optional[Union[str, Conversation]] = None,
|
297 |
+
role_key_human: Optional[str] = None,
|
298 |
+
role_key_model: Optional[str] = None,
|
299 |
+
):
|
300 |
+
super().__init__(
|
301 |
+
conversation=conversation,
|
302 |
+
role_key_human=role_key_human,
|
303 |
+
role_key_model=role_key_model,
|
304 |
+
)
|
src/axolotl/utils/config.py
CHANGED
@@ -278,6 +278,25 @@ def validate_config(cfg):
|
|
278 |
"`model_type: MixFormerSequentialForCausalLM` required for sample_packing"
|
279 |
)
|
280 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
281 |
# TODO
|
282 |
# MPT 7b
|
283 |
# https://github.com/facebookresearch/bitsandbytes/issues/25
|
|
|
278 |
"`model_type: MixFormerSequentialForCausalLM` required for sample_packing"
|
279 |
)
|
280 |
|
281 |
+
if cfg.datasets:
|
282 |
+
for idx, ds_cfg in enumerate(cfg.datasets):
|
283 |
+
if ds_cfg.type == "sharegpt:chat":
|
284 |
+
LOG.warning(
|
285 |
+
PendingDeprecationWarning(
|
286 |
+
"`type: sharegpt:chat` will soon be deprecated. simply use `type: sharegpt` instead."
|
287 |
+
)
|
288 |
+
)
|
289 |
+
cfg.datasets[idx].type = "sharegpt"
|
290 |
+
if "sharegpt_simple" in ds_cfg.type:
|
291 |
+
LOG.warning(
|
292 |
+
PendingDeprecationWarning(
|
293 |
+
"`type: sharegpt_simple` will soon be deprecated. simply use `type: sharegpt` instead."
|
294 |
+
)
|
295 |
+
)
|
296 |
+
cfg.datasets[idx].type = cfg.datasets[idx].type.replace(
|
297 |
+
"sharegpt_simple", "sharegpt"
|
298 |
+
)
|
299 |
+
|
300 |
# TODO
|
301 |
# MPT 7b
|
302 |
# https://github.com/facebookresearch/bitsandbytes/issues/25
|
src/axolotl/utils/data.py
CHANGED
@@ -25,7 +25,6 @@ from axolotl.prompt_tokenizers import (
|
|
25 |
GPTeacherPromptTokenizingStrategy,
|
26 |
JeopardyPromptTokenizingStrategy,
|
27 |
OpenAssistantPromptTokenizingStrategy,
|
28 |
-
ShareGPTPromptTokenizingStrategy,
|
29 |
SummarizeTLDRPromptTokenizingStrategy,
|
30 |
)
|
31 |
from axolotl.prompters import (
|
@@ -35,7 +34,6 @@ from axolotl.prompters import (
|
|
35 |
MultipleChoiceConcisePrompter,
|
36 |
MultipleChoiceExplainPrompter,
|
37 |
ReflectAlpacaPrompter,
|
38 |
-
ShareGPTPrompter,
|
39 |
SummarizeTLDRPrompter,
|
40 |
)
|
41 |
from axolotl.utils.dict import DictDefault
|
@@ -320,15 +318,6 @@ def load_tokenized_prepared_datasets(
|
|
320 |
)
|
321 |
ds_wrapper = TokenizedPromptDataset(ds_strategy, ds)
|
322 |
datasets.append(ds_wrapper)
|
323 |
-
elif d_base_type == "sharegpt":
|
324 |
-
ds_strategy = ShareGPTPromptTokenizingStrategy(
|
325 |
-
ShareGPTPrompter(d_prompt_style),
|
326 |
-
tokenizer,
|
327 |
-
cfg.train_on_inputs,
|
328 |
-
cfg.sequence_len,
|
329 |
-
)
|
330 |
-
ds_wrapper = TokenizedPromptDataset(ds_strategy, ds)
|
331 |
-
datasets.append(ds_wrapper)
|
332 |
else:
|
333 |
suffix = ""
|
334 |
if ":load_" in d.type:
|
|
|
25 |
GPTeacherPromptTokenizingStrategy,
|
26 |
JeopardyPromptTokenizingStrategy,
|
27 |
OpenAssistantPromptTokenizingStrategy,
|
|
|
28 |
SummarizeTLDRPromptTokenizingStrategy,
|
29 |
)
|
30 |
from axolotl.prompters import (
|
|
|
34 |
MultipleChoiceConcisePrompter,
|
35 |
MultipleChoiceExplainPrompter,
|
36 |
ReflectAlpacaPrompter,
|
|
|
37 |
SummarizeTLDRPrompter,
|
38 |
)
|
39 |
from axolotl.utils.dict import DictDefault
|
|
|
318 |
)
|
319 |
ds_wrapper = TokenizedPromptDataset(ds_strategy, ds)
|
320 |
datasets.append(ds_wrapper)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
321 |
else:
|
322 |
suffix = ""
|
323 |
if ":load_" in d.type:
|
src/axolotl/utils/tokenization.py
CHANGED
@@ -33,5 +33,6 @@ def check_example_labels(example, tokenizer, text_only=False):
|
|
33 |
|
34 |
LOG.info(" ".join(colored_tokens))
|
35 |
LOG.info("\n\n\n")
|
|
|
36 |
|
37 |
return " ".join(colored_tokens)
|
|
|
33 |
|
34 |
LOG.info(" ".join(colored_tokens))
|
35 |
LOG.info("\n\n\n")
|
36 |
+
print(" ".join(colored_tokens))
|
37 |
|
38 |
return " ".join(colored_tokens)
|
tests/fixtures/conversation.tokenized.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"input_ids": [1, 319, 13563, 1546, 263, 12758, 1404, 322, 385, 23116, 21082, 20255, 29889, 450, 20255, 4076, 8444, 29892, 13173, 29892, 322, 1248, 568, 6089, 304, 278, 1404, 29915, 29879, 5155, 29889, 3148, 1001, 29901, 920, 1033, 474, 2334, 263, 29086, 705, 11356, 5687, 393, 3667, 4637, 21531, 20159, 304, 4505, 1045, 3163, 29973, 319, 1799, 9047, 13566, 29901, 739, 29915, 29879, 1950, 304, 671, 21531, 20159, 304, 4505, 341, 1475, 705, 11356, 29892, 541, 372, 723, 5517, 367, 3755, 4280, 29889, 838, 5869, 293, 20159, 338, 263, 5443, 310, 23964, 393, 11898, 278, 4426, 310, 8162, 393, 526, 21634, 1090, 9126, 316, 689, 800, 29892, 1316, 408, 16116, 292, 322, 289, 2548, 29889, 512, 278, 3030, 310, 341, 1475, 705, 11356, 29892, 445, 1033, 367, 1304, 304, 27599, 278, 3829, 310, 278, 3748, 7613, 322, 8161, 278, 14354, 310, 29086, 2729, 373, 3058, 25002, 15939, 1934, 29889, 2398, 29892, 16049, 445, 723, 1996, 263, 6483, 8004, 310, 1716, 21531, 20159, 322, 278, 3748, 310, 341, 1475, 705, 11356, 29889, 739, 29915, 29879, 451, 1554, 393, 508, 367, 10824, 297, 263, 2560, 1234, 29889, 2, 3148, 1001, 29901, 1033, 366, 2367, 592, 278, 330, 391, 310, 920, 372, 1033, 2466, 29973, 319, 1799, 9047, 13566, 29901, 18585, 29889, 450, 6996, 2969, 723, 367, 304, 671, 21531, 20159, 304, 27599, 278, 3829, 310, 278, 341, 1475, 705, 11356, 3748, 7613, 322, 8161, 278, 14354, 310, 29086, 2729, 373, 3058, 25002, 15939, 1934, 29889, 910, 1033, 25135, 15783, 278, 3748, 7613, 408, 263, 25002, 2913, 322, 773, 25002, 15939, 1934, 1316, 408, 3632, 327, 2270, 6471, 322, 350, 9890, 3694, 304, 10115, 2472, 1048, 278, 4423, 310, 29086, 373, 278, 7613, 29889, 2398, 29892, 408, 306, 5276, 1434, 29892, 16049, 445, 723, 1996, 263, 6483, 8004, 310, 1716, 21531, 20159, 322, 278, 3748, 310, 341, 1475, 705, 11356, 29892, 577, 372, 29915, 29879, 451, 1554, 393, 508, 367, 10824, 297, 263, 2560, 1234, 29889, 2, 3148, 1001, 29901, 2367, 592, 263, 2702, 1342, 319, 1799, 9047, 13566, 29901, 1094, 263, 2702, 1342, 29892, 7755, 591, 505, 263, 341, 1475, 705, 11356, 3748, 7613, 411, 278, 1494, 5285, 29901, 13, 28956, 13, 29871, 29896, 259, 29896, 259, 29896, 259, 29896, 259, 29896, 13, 29871, 29896, 29871, 1577, 29871, 1577, 29871, 1577, 259, 29896, 13, 29871, 29896, 29871, 1577, 29871, 1577, 29871, 1577, 259, 29896, 13, 29871, 29896, 29871, 1577, 29871, 1577, 29871, 1577, 259, 29896, 13, 29871, 29896, 259, 29896, 259, 29896, 259, 29896, 259, 29896, 13, 28956, 13, 797, 445, 1206, 29892, 278, 3748, 7613, 508, 367, 9875, 408, 263, 25002, 2913, 29892, 988, 1269, 6862, 373, 278, 7613, 16161, 304, 263, 1298, 297, 278, 2913, 29889, 450, 29086, 373, 278, 7613, 508, 367, 2714, 310, 408, 376, 5391, 267, 29908, 297, 278, 2913, 29892, 607, 526, 3291, 393, 526, 451, 760, 310, 278, 2913, 29889, 5293, 21531, 20159, 29892, 591, 508, 27599, 278, 3829, 310, 445, 2913, 322, 8161, 278, 14354, 310, 278, 29086, 2729, 373, 3058, 25002, 15939, 1934, 29889, 13, 13, 2831, 2777, 29892, 697, 982, 304, 437, 445, 338, 304, 10272, 278, 3632, 327, 2270, 6471, 310, 278, 2913, 29889, 15089, 327, 2270, 6471, 526, 263, 982, 310, 20766, 278, 26532, 297, 263, 2913, 29892, 322, 896, 508, 367, 1304, 304, 10115, 2472, 1048, 278, 14354, 310, 278, 29086, 373, 278, 341, 1475, 705, 11356, 7613, 29889, 512, 278, 1206, 310, 278, 7613, 2038, 29892, 591, 508, 10272, 278, 937, 3632, 327, 2270, 2318, 310, 278, 2913, 29892, 607, 16612, 278, 1353, 322, 376, 12181, 29908, 310, 278, 26532, 297, 278, 2913, 29889, 512, 445, 1206, 29892, 278, 937, 3632, 327, 2270, 2318, 723, 2649, 502, 393, 727, 526, 3023, 26532, 297, 278, 2913, 29892, 21050, 297, 263, 6862, 8267, 29889, 13, 13, 29933, 1463, 373, 445, 2472, 29892, 591, 508, 10115, 393, 278, 29086, 297, 278, 3748, 526, 5982, 297, 278, 3023, 26995, 310, 278, 7613, 29892, 408, 4318, 2400, 29901, 13, 28956, 13, 29871, 29896, 259, 29896, 259, 29896, 259, 29896, 259, 29896, 13, 29871, 29896, 29871, 341, 29871, 1577, 29871, 1577, 259, 29896, 13, 29871, 29896, 29871, 1577, 29871, 1577, 29871, 1577, 259, 29896, 13, 29871, 29896, 29871, 1577, 29871, 1577, 29871, 341, 259, 29896, 13, 29871, 29896, 259, 29896, 259, 29896, 259, 29896, 259, 29896, 13, 28956, 13, 2776, 3236, 29892, 445, 338, 925, 697, 1342, 310, 920, 21531, 20159, 1033, 367, 1304, 304, 4505, 341, 1475, 705, 11356, 29889, 1670, 526, 1784, 916, 1950, 13501, 29892, 322, 278, 2702, 4902, 310, 920, 304, 2334, 963, 723, 8839, 373, 278, 2702, 3748, 7613, 322, 278, 25002, 15939, 1934, 1641, 1304, 29889, 1094, 306, 5276, 1434, 29892, 16049, 263, 1650, 763, 445, 723, 1996, 263, 6483, 8004, 310, 1716, 21531, 20159, 322, 278, 3748, 310, 341, 1475, 705, 11356, 29889, 2, 3148, 1001, 29901, 10241, 474, 471, 4856, 411, 263, 6483, 8004, 310, 1716, 29892, 1033, 366, 5649, 278, 1021, 6964, 304, 592, 411, 393, 11833, 29973, 319, 1799, 9047, 13566, 29901, 18585, 29889, 17090, 366, 505, 263, 6483, 8004, 310, 1716, 21531, 20159, 322, 278, 3748, 310, 341, 1475, 705, 11356, 29892, 1244, 338, 920, 366, 1795, 671, 21531, 20159, 304, 4505, 263, 341, 1475, 705, 11356, 3748, 7613, 29901, 13, 13, 29896, 29889, 16314, 278, 341, 1475, 705, 11356, 3748, 7613, 408, 263, 25002, 2913, 29889, 7806, 6862, 373, 278, 7613, 16161, 304, 263, 1298, 297, 278, 2913, 29892, 322, 278, 29086, 373, 278, 7613, 508, 367, 2714, 310, 408, 376, 5391, 267, 29908, 297, 278, 2913, 29892, 607, 526, 3291, 393, 526, 451, 760, 310, 278, 2913, 29889, 13, 29906, 29889, 4803, 25002, 15939, 1934, 304, 10115, 2472, 1048, 278, 14354, 310, 278, 29086, 373, 278, 7613, 29889, 1670, 526, 1784, 1950, 25002, 15939, 1934, 393, 1033, 367, 1304, 363, 445, 29892, 1316, 408, 3632, 327, 2270, 6471, 29892, 350, 9890, 3694, 29892, 470, 28152, 3632, 3002, 29889, 13, 29941, 29889, 4803, 278, 2472, 10115, 1127, 515, 278, 25002, 15939, 1934, 304, 8161, 278, 14354, 310, 278, 29086, 373, 278, 7613, 29889, 1152, 1342, 29892, 565, 278, 937, 3632, 327, 2270, 2318, 310, 278, 2913, 14088, 393, 727, 526, 3023, 26532, 297, 278, 2913, 29892, 21050, 297, 263, 6862, 8267, 29892, 769, 591, 508, 10115, 393, 278, 29086, 526, 5982, 297, 278, 3023, 26995, 310, 278, 7613, 29889, 13, 29946, 29889, 4803, 445, 2472, 304, 4505, 278, 341, 1475, 705, 11356, 3748, 29889, 9038, 366, 1073, 278, 14354, 310, 278, 29086, 29892, 366, 508, 671, 445, 2472, 304, 4772, 963, 322, 2821, 278, 7613, 29889, 13, 13, 2776, 3236, 29892, 445, 338, 925, 697, 1950, 2948, 304, 773, 21531, 20159, 304, 4505, 341, 1475, 705, 11356, 29889, 1670, 1122, 367, 916, 5837, 304, 437, 372, 29892, 8679, 373, 278, 2702, 3748, 7613, 322, 278, 25002, 15939, 1934, 1641, 1304, 29889, 2398, 29892, 445, 881, 2367, 366, 263, 2498, 2969, 310, 920, 21531, 20159, 508, 367, 1304, 304, 4505, 341, 1475, 705, 11356, 29889, 2, 3148, 1001, 29901, 920, 723, 474, 437, 372, 411, 28152, 3632, 3002, 470, 738, 310, 278, 916, 25002, 15939, 1934, 29973, 319, 1799, 9047, 13566, 29901, 1763, 671, 28152, 3632, 3002, 470, 738, 310, 278, 916, 25002, 15939, 1934, 304, 4505, 263, 341, 1475, 705, 11356, 3748, 7613, 29892, 366, 723, 1101, 278, 1021, 6996, 6576, 408, 5439, 2038, 29892, 541, 411, 777, 12651, 297, 278, 2702, 4902, 29889, 13, 13, 2831, 2777, 29892, 1244, 338, 920, 366, 1795, 671, 28152, 3632, 3002, 304, 4505, 263, 341, 1475, 705, 11356, 3748, 7613, 29901, 13, 13, 29896, 29889, 16314, 278, 341, 1475, 705, 11356, 3748, 7613, 408, 263, 25002, 2913, 29889, 7806, 6862, 373, 278, 7613, 16161, 304, 263, 1298, 297, 278, 2913, 29892, 322, 278, 29086, 373, 278, 7613, 508, 367, 2714, 310, 408, 376, 5391, 267, 29908, 297, 278, 2913, 29892, 607, 526, 3291, 393, 526, 451, 760, 310, 278, 2913, 29889, 13, 29906, 29889, 11796, 29872, 278, 28152, 3632, 3002, 310, 278, 2913, 29889, 9034, 9696, 3632, 3002, 338, 263, 982, 310, 20766, 278, 26532, 297, 263, 2913, 975, 931, 29892, 322, 372, 508, 367, 1304, 304, 10115, 2472, 1048, 278, 14354, 310, 278, 29086, 373, 278, 341, 1475, 705, 11356, 7613, 29889, 13, 29941, 29889, 4803, 278, 2472, 515, 278, 28152, 3632, 3002, 304, 8161, 278, 14354, 310, 278, 29086, 373, 278, 7613, 29889, 1152, 1342, 29892, 565, 278, 28152, 3632, 3002, 14088, 393, 727, 526, 3023, 26532, 297, 278, 2913, 29892, 21050, 297, 263, 6862, 8267, 29892, 769, 591, 508, 10115, 393, 278, 29086, 526, 5982, 297, 278, 3023, 26995, 310, 278, 7613, 29889, 13, 29946, 29889, 4803, 445, 2472, 304, 4505, 278, 341, 1475, 705, 11356, 3748, 29889, 9038, 366, 1073, 278, 14354, 310, 278, 29086, 29892, 366, 508, 671, 445, 2472, 304, 4772, 963, 322, 2821, 278, 7613, 29889, 13, 13, 2776, 3236, 29892, 445, 338, 925, 697, 1950, 2948, 304, 773, 28152, 3632, 3002, 304, 4505, 341, 1475, 705, 11356, 29889, 1670, 1122, 367, 916, 5837, 304, 437, 372, 29892, 8679, 373, 278, 2702, 3748, 7613, 322, 278, 2702, 4902, 310, 278, 28152, 3632, 3002, 16287, 29889, 2398, 29892, 445, 881, 2367, 366, 263, 2498, 2969, 310, 920, 28152, 3632, 3002, 508, 367, 1304, 304, 4505, 341, 1475, 705, 11356, 29889, 2, 3148, 1001, 29901, 920, 1033, 474, 2334, 445, 297, 3017, 29973, 319, 1799, 9047, 13566, 29901, 1763, 2334, 263, 341, 1475, 705, 11356, 899, 369, 393, 3913, 21531, 20159, 297, 5132, 29892, 366, 723, 817, 304, 437, 278, 1494, 29901, 13, 13, 29896, 29889, 16052, 263, 5132, 3577, 363, 21531, 20159, 29889, 1670, 526, 3196, 3987, 3625, 29892, 1316, 408, 402, 566, 2918, 29892, 360, 291, 952, 375, 29892, 470, 4560, 7354, 29899, 29873, 1388, 29889, 4525, 9741, 3867, 14009, 322, 848, 12286, 363, 20602, 25002, 15939, 1934, 29892, 1316, 408, 3632, 327, 2270, 6471, 29892, 350, 9890, 3694, 29892, 470, 28152, 3632, 3002, 29889, 13, 29906, 29889, 16314, 278, 341, 1475, 705, 11356, 3748, 7613, 408, 263, 25002, 2913, 29889, 910, 1033, 367, 2309, 773, 263, 848, 3829, 4944, 491, 278, 21531, 20159, 3577, 29892, 1316, 408, 263, 3053, 506, 616, 4280, 470, 263, 478, 2035, 29367, 29899, 29934, 4512, 4280, 29889, 7806, 6862, 373, 278, 7613, 723, 3928, 304, 263, 1298, 297, 278, 2913, 29892, 322, 278, 29086, 373, 278, 7613, 723, 367, 9875, 408, 376, 5391, 267, 29908, 297, 278, 2913, 29889, 13, 29941, 29889, 11796, 29872, 278, 25002, 15939, 1934, 310, 278, 2913, 29889, 910, 1033, 367, 2309, 773, 14009, 4944, 491, 278, 21531, 20159, 3577, 29889, 1152, 1342, 29892, 366, 1033, 10272, 278, 28152, 3632, 3002, 310, 278, 2913, 773, 278, 28152, 3632, 3002, 5687, 29889, 13, 29946, 29889, 4803, 278, 2472, 515, 278, 25002, 15939, 1934, 304, 8161, 278, 14354, 310, 278, 29086, 373, 278, 7613, 29889, 910, 1033, 367, 2309, 773, 263, 10296, 310, 19475, 24481, 322, 8720, 29889, 1152, 1342, 29892, 565, 278, 28152, 3632, 3002, 14088, 393, 727, 526, 3023, 26532, 297, 278, 2913, 29892, 21050, 297, 263, 6862, 8267, 29892, 769, 366, 1033, 671, 263, 8424, 310, 775, 304, 10115, 393, 278, 29086, 526, 5982, 297, 278, 3023, 26995, 310, 278, 7613, 29889, 13, 29945, 29889, 4803, 445, 2472, 304, 4505, 278, 341, 1475, 705, 11356, 3748, 29889, 9038, 366, 1073, 278, 14354, 310, 278, 29086, 29892, 366, 1033, 671, 263, 8424, 310, 775, 304, 6336, 2828, 373, 278, 9109, 25256, 322, 2821, 278, 7613, 29889, 13, 13, 2776, 3236, 29892, 445, 338, 925, 697, 1950, 2948, 304, 16049, 263, 341, 1475, 705, 11356, 899, 369, 393, 3913, 21531, 20159, 297, 5132, 29889, 1670, 1122, 367, 916, 5837, 304, 437, 372, 29892, 8679, 373, 278, 2702, 4902, 310, 278, 5314, 29889, 2398, 29892, 445, 881, 2367, 366, 263, 2498, 2969, 310, 920, 304, 679, 4687, 411, 1316, 263, 2060, 29889, 2], "attention_masklabels": [-100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 319, 1799, 9047, 13566, 29901, 739, 29915, 29879, 1950, 304, 671, 21531, 20159, 304, 4505, 341, 1475, 705, 11356, 29892, 541, 372, 723, 5517, 367, 3755, 4280, 29889, 838, 5869, 293, 20159, 338, 263, 5443, 310, 23964, 393, 11898, 278, 4426, 310, 8162, 393, 526, 21634, 1090, 9126, 316, 689, 800, 29892, 1316, 408, 16116, 292, 322, 289, 2548, 29889, 512, 278, 3030, 310, 341, 1475, 705, 11356, 29892, 445, 1033, 367, 1304, 304, 27599, 278, 3829, 310, 278, 3748, 7613, 322, 8161, 278, 14354, 310, 29086, 2729, 373, 3058, 25002, 15939, 1934, 29889, 2398, 29892, 16049, 445, 723, 1996, 263, 6483, 8004, 310, 1716, 21531, 20159, 322, 278, 3748, 310, 341, 1475, 705, 11356, 29889, 739, 29915, 29879, 451, 1554, 393, 508, 367, 10824, 297, 263, 2560, 1234, 29889, 2, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 319, 1799, 9047, 13566, 29901, 18585, 29889, 450, 6996, 2969, 723, 367, 304, 671, 21531, 20159, 304, 27599, 278, 3829, 310, 278, 341, 1475, 705, 11356, 3748, 7613, 322, 8161, 278, 14354, 310, 29086, 2729, 373, 3058, 25002, 15939, 1934, 29889, 910, 1033, 25135, 15783, 278, 3748, 7613, 408, 263, 25002, 2913, 322, 773, 25002, 15939, 1934, 1316, 408, 3632, 327, 2270, 6471, 322, 350, 9890, 3694, 304, 10115, 2472, 1048, 278, 4423, 310, 29086, 373, 278, 7613, 29889, 2398, 29892, 408, 306, 5276, 1434, 29892, 16049, 445, 723, 1996, 263, 6483, 8004, 310, 1716, 21531, 20159, 322, 278, 3748, 310, 341, 1475, 705, 11356, 29892, 577, 372, 29915, 29879, 451, 1554, 393, 508, 367, 10824, 297, 263, 2560, 1234, 29889, 2, -100, -100, -100, -100, -100, -100, -100, -100, 319, 1799, 9047, 13566, 29901, 1094, 263, 2702, 1342, 29892, 7755, 591, 505, 263, 341, 1475, 705, 11356, 3748, 7613, 411, 278, 1494, 5285, 29901, 13, 28956, 13, 29871, 29896, 259, 29896, 259, 29896, 259, 29896, 259, 29896, 13, 29871, 29896, 29871, 1577, 29871, 1577, 29871, 1577, 259, 29896, 13, 29871, 29896, 29871, 1577, 29871, 1577, 29871, 1577, 259, 29896, 13, 29871, 29896, 29871, 1577, 29871, 1577, 29871, 1577, 259, 29896, 13, 29871, 29896, 259, 29896, 259, 29896, 259, 29896, 259, 29896, 13, 28956, 13, 797, 445, 1206, 29892, 278, 3748, 7613, 508, 367, 9875, 408, 263, 25002, 2913, 29892, 988, 1269, 6862, 373, 278, 7613, 16161, 304, 263, 1298, 297, 278, 2913, 29889, 450, 29086, 373, 278, 7613, 508, 367, 2714, 310, 408, 376, 5391, 267, 29908, 297, 278, 2913, 29892, 607, 526, 3291, 393, 526, 451, 760, 310, 278, 2913, 29889, 5293, 21531, 20159, 29892, 591, 508, 27599, 278, 3829, 310, 445, 2913, 322, 8161, 278, 14354, 310, 278, 29086, 2729, 373, 3058, 25002, 15939, 1934, 29889, 13, 13, 2831, 2777, 29892, 697, 982, 304, 437, 445, 338, 304, 10272, 278, 3632, 327, 2270, 6471, 310, 278, 2913, 29889, 15089, 327, 2270, 6471, 526, 263, 982, 310, 20766, 278, 26532, 297, 263, 2913, 29892, 322, 896, 508, 367, 1304, 304, 10115, 2472, 1048, 278, 14354, 310, 278, 29086, 373, 278, 341, 1475, 705, 11356, 7613, 29889, 512, 278, 1206, 310, 278, 7613, 2038, 29892, 591, 508, 10272, 278, 937, 3632, 327, 2270, 2318, 310, 278, 2913, 29892, 607, 16612, 278, 1353, 322, 376, 12181, 29908, 310, 278, 26532, 297, 278, 2913, 29889, 512, 445, 1206, 29892, 278, 937, 3632, 327, 2270, 2318, 723, 2649, 502, 393, 727, 526, 3023, 26532, 297, 278, 2913, 29892, 21050, 297, 263, 6862, 8267, 29889, 13, 13, 29933, 1463, 373, 445, 2472, 29892, 591, 508, 10115, 393, 278, 29086, 297, 278, 3748, 526, 5982, 297, 278, 3023, 26995, 310, 278, 7613, 29892, 408, 4318, 2400, 29901, 13, 28956, 13, 29871, 29896, 259, 29896, 259, 29896, 259, 29896, 259, 29896, 13, 29871, 29896, 29871, 341, 29871, 1577, 29871, 1577, 259, 29896, 13, 29871, 29896, 29871, 1577, 29871, 1577, 29871, 1577, 259, 29896, 13, 29871, 29896, 29871, 1577, 29871, 1577, 29871, 341, 259, 29896, 13, 29871, 29896, 259, 29896, 259, 29896, 259, 29896, 259, 29896, 13, 28956, 13, 2776, 3236, 29892, 445, 338, 925, 697, 1342, 310, 920, 21531, 20159, 1033, 367, 1304, 304, 4505, 341, 1475, 705, 11356, 29889, 1670, 526, 1784, 916, 1950, 13501, 29892, 322, 278, 2702, 4902, 310, 920, 304, 2334, 963, 723, 8839, 373, 278, 2702, 3748, 7613, 322, 278, 25002, 15939, 1934, 1641, 1304, 29889, 1094, 306, 5276, 1434, 29892, 16049, 263, 1650, 763, 445, 723, 1996, 263, 6483, 8004, 310, 1716, 21531, 20159, 322, 278, 3748, 310, 341, 1475, 705, 11356, 29889, 2, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 319, 1799, 9047, 13566, 29901, 18585, 29889, 17090, 366, 505, 263, 6483, 8004, 310, 1716, 21531, 20159, 322, 278, 3748, 310, 341, 1475, 705, 11356, 29892, 1244, 338, 920, 366, 1795, 671, 21531, 20159, 304, 4505, 263, 341, 1475, 705, 11356, 3748, 7613, 29901, 13, 13, 29896, 29889, 16314, 278, 341, 1475, 705, 11356, 3748, 7613, 408, 263, 25002, 2913, 29889, 7806, 6862, 373, 278, 7613, 16161, 304, 263, 1298, 297, 278, 2913, 29892, 322, 278, 29086, 373, 278, 7613, 508, 367, 2714, 310, 408, 376, 5391, 267, 29908, 297, 278, 2913, 29892, 607, 526, 3291, 393, 526, 451, 760, 310, 278, 2913, 29889, 13, 29906, 29889, 4803, 25002, 15939, 1934, 304, 10115, 2472, 1048, 278, 14354, 310, 278, 29086, 373, 278, 7613, 29889, 1670, 526, 1784, 1950, 25002, 15939, 1934, 393, 1033, 367, 1304, 363, 445, 29892, 1316, 408, 3632, 327, 2270, 6471, 29892, 350, 9890, 3694, 29892, 470, 28152, 3632, 3002, 29889, 13, 29941, 29889, 4803, 278, 2472, 10115, 1127, 515, 278, 25002, 15939, 1934, 304, 8161, 278, 14354, 310, 278, 29086, 373, 278, 7613, 29889, 1152, 1342, 29892, 565, 278, 937, 3632, 327, 2270, 2318, 310, 278, 2913, 14088, 393, 727, 526, 3023, 26532, 297, 278, 2913, 29892, 21050, 297, 263, 6862, 8267, 29892, 769, 591, 508, 10115, 393, 278, 29086, 526, 5982, 297, 278, 3023, 26995, 310, 278, 7613, 29889, 13, 29946, 29889, 4803, 445, 2472, 304, 4505, 278, 341, 1475, 705, 11356, 3748, 29889, 9038, 366, 1073, 278, 14354, 310, 278, 29086, 29892, 366, 508, 671, 445, 2472, 304, 4772, 963, 322, 2821, 278, 7613, 29889, 13, 13, 2776, 3236, 29892, 445, 338, 925, 697, 1950, 2948, 304, 773, 21531, 20159, 304, 4505, 341, 1475, 705, 11356, 29889, 1670, 1122, 367, 916, 5837, 304, 437, 372, 29892, 8679, 373, 278, 2702, 3748, 7613, 322, 278, 25002, 15939, 1934, 1641, 1304, 29889, 2398, 29892, 445, 881, 2367, 366, 263, 2498, 2969, 310, 920, 21531, 20159, 508, 367, 1304, 304, 4505, 341, 1475, 705, 11356, 29889, 2, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 319, 1799, 9047, 13566, 29901, 1763, 671, 28152, 3632, 3002, 470, 738, 310, 278, 916, 25002, 15939, 1934, 304, 4505, 263, 341, 1475, 705, 11356, 3748, 7613, 29892, 366, 723, 1101, 278, 1021, 6996, 6576, 408, 5439, 2038, 29892, 541, 411, 777, 12651, 297, 278, 2702, 4902, 29889, 13, 13, 2831, 2777, 29892, 1244, 338, 920, 366, 1795, 671, 28152, 3632, 3002, 304, 4505, 263, 341, 1475, 705, 11356, 3748, 7613, 29901, 13, 13, 29896, 29889, 16314, 278, 341, 1475, 705, 11356, 3748, 7613, 408, 263, 25002, 2913, 29889, 7806, 6862, 373, 278, 7613, 16161, 304, 263, 1298, 297, 278, 2913, 29892, 322, 278, 29086, 373, 278, 7613, 508, 367, 2714, 310, 408, 376, 5391, 267, 29908, 297, 278, 2913, 29892, 607, 526, 3291, 393, 526, 451, 760, 310, 278, 2913, 29889, 13, 29906, 29889, 11796, 29872, 278, 28152, 3632, 3002, 310, 278, 2913, 29889, 9034, 9696, 3632, 3002, 338, 263, 982, 310, 20766, 278, 26532, 297, 263, 2913, 975, 931, 29892, 322, 372, 508, 367, 1304, 304, 10115, 2472, 1048, 278, 14354, 310, 278, 29086, 373, 278, 341, 1475, 705, 11356, 7613, 29889, 13, 29941, 29889, 4803, 278, 2472, 515, 278, 28152, 3632, 3002, 304, 8161, 278, 14354, 310, 278, 29086, 373, 278, 7613, 29889, 1152, 1342, 29892, 565, 278, 28152, 3632, 3002, 14088, 393, 727, 526, 3023, 26532, 297, 278, 2913, 29892, 21050, 297, 263, 6862, 8267, 29892, 769, 591, 508, 10115, 393, 278, 29086, 526, 5982, 297, 278, 3023, 26995, 310, 278, 7613, 29889, 13, 29946, 29889, 4803, 445, 2472, 304, 4505, 278, 341, 1475, 705, 11356, 3748, 29889, 9038, 366, 1073, 278, 14354, 310, 278, 29086, 29892, 366, 508, 671, 445, 2472, 304, 4772, 963, 322, 2821, 278, 7613, 29889, 13, 13, 2776, 3236, 29892, 445, 338, 925, 697, 1950, 2948, 304, 773, 28152, 3632, 3002, 304, 4505, 341, 1475, 705, 11356, 29889, 1670, 1122, 367, 916, 5837, 304, 437, 372, 29892, 8679, 373, 278, 2702, 3748, 7613, 322, 278, 2702, 4902, 310, 278, 28152, 3632, 3002, 16287, 29889, 2398, 29892, 445, 881, 2367, 366, 263, 2498, 2969, 310, 920, 28152, 3632, 3002, 508, 367, 1304, 304, 4505, 341, 1475, 705, 11356, 29889, 2, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 319, 1799, 9047, 13566, 29901, 1763, 2334, 263, 341, 1475, 705, 11356, 899, 369, 393, 3913, 21531, 20159, 297, 5132, 29892, 366, 723, 817, 304, 437, 278, 1494, 29901, 13, 13, 29896, 29889, 16052, 263, 5132, 3577, 363, 21531, 20159, 29889, 1670, 526, 3196, 3987, 3625, 29892, 1316, 408, 402, 566, 2918, 29892, 360, 291, 952, 375, 29892, 470, 4560, 7354, 29899, 29873, 1388, 29889, 4525, 9741, 3867, 14009, 322, 848, 12286, 363, 20602, 25002, 15939, 1934, 29892, 1316, 408, 3632, 327, 2270, 6471, 29892, 350, 9890, 3694, 29892, 470, 28152, 3632, 3002, 29889, 13, 29906, 29889, 16314, 278, 341, 1475, 705, 11356, 3748, 7613, 408, 263, 25002, 2913, 29889, 910, 1033, 367, 2309, 773, 263, 848, 3829, 4944, 491, 278, 21531, 20159, 3577, 29892, 1316, 408, 263, 3053, 506, 616, 4280, 470, 263, 478, 2035, 29367, 29899, 29934, 4512, 4280, 29889, 7806, 6862, 373, 278, 7613, 723, 3928, 304, 263, 1298, 297, 278, 2913, 29892, 322, 278, 29086, 373, 278, 7613, 723, 367, 9875, 408, 376, 5391, 267, 29908, 297, 278, 2913, 29889, 13, 29941, 29889, 11796, 29872, 278, 25002, 15939, 1934, 310, 278, 2913, 29889, 910, 1033, 367, 2309, 773, 14009, 4944, 491, 278, 21531, 20159, 3577, 29889, 1152, 1342, 29892, 366, 1033, 10272, 278, 28152, 3632, 3002, 310, 278, 2913, 773, 278, 28152, 3632, 3002, 5687, 29889, 13, 29946, 29889, 4803, 278, 2472, 515, 278, 25002, 15939, 1934, 304, 8161, 278, 14354, 310, 278, 29086, 373, 278, 7613, 29889, 910, 1033, 367, 2309, 773, 263, 10296, 310, 19475, 24481, 322, 8720, 29889, 1152, 1342, 29892, 565, 278, 28152, 3632, 3002, 14088, 393, 727, 526, 3023, 26532, 297, 278, 2913, 29892, 21050, 297, 263, 6862, 8267, 29892, 769, 366, 1033, 671, 263, 8424, 310, 775, 304, 10115, 393, 278, 29086, 526, 5982, 297, 278, 3023, 26995, 310, 278, 7613, 29889, 13, 29945, 29889, 4803, 445, 2472, 304, 4505, 278, 341, 1475, 705, 11356, 3748, 29889, 9038, 366, 1073, 278, 14354, 310, 278, 29086, 29892, 366, 1033, 671, 263, 8424, 310, 775, 304, 6336, 2828, 373, 278, 9109, 25256, 322, 2821, 278, 7613, 29889, 13, 13, 2776, 3236, 29892, 445, 338, 925, 697, 1950, 2948, 304, 16049, 263, 341, 1475, 705, 11356, 899, 369, 393, 3913, 21531, 20159, 297, 5132, 29889, 1670, 1122, 367, 916, 5837, 304, 437, 372, 29892, 8679, 373, 278, 2702, 4902, 310, 278, 5314, 29889, 2398, 29892, 445, 881, 2367, 366, 263, 2498, 2969, 310, 920, 304, 679, 4687, 411, 1316, 263, 2060, 29889, 2]}
|
|
|
1 |
+
{"input_ids": [1, 319, 13563, 1546, 263, 12758, 1404, 322, 385, 23116, 21082, 20255, 29889, 450, 20255, 4076, 8444, 29892, 13173, 29892, 322, 1248, 568, 6089, 304, 278, 1404, 29915, 29879, 5155, 29889, 29871, 3148, 1001, 29901, 920, 1033, 474, 2334, 263, 29086, 705, 11356, 5687, 393, 3667, 4637, 21531, 20159, 304, 4505, 1045, 3163, 29973, 29871, 319, 1799, 9047, 13566, 29901, 739, 29915, 29879, 1950, 304, 671, 21531, 20159, 304, 4505, 341, 1475, 705, 11356, 29892, 541, 372, 723, 5517, 367, 3755, 4280, 29889, 838, 5869, 293, 20159, 338, 263, 5443, 310, 23964, 393, 11898, 278, 4426, 310, 8162, 393, 526, 21634, 1090, 9126, 316, 689, 800, 29892, 1316, 408, 16116, 292, 322, 289, 2548, 29889, 512, 278, 3030, 310, 341, 1475, 705, 11356, 29892, 445, 1033, 367, 1304, 304, 27599, 278, 3829, 310, 278, 3748, 7613, 322, 8161, 278, 14354, 310, 29086, 2729, 373, 3058, 25002, 15939, 1934, 29889, 2398, 29892, 16049, 445, 723, 1996, 263, 6483, 8004, 310, 1716, 21531, 20159, 322, 278, 3748, 310, 341, 1475, 705, 11356, 29889, 739, 29915, 29879, 451, 1554, 393, 508, 367, 10824, 297, 263, 2560, 1234, 21106, 29879, 29958, 2, 3148, 1001, 29901, 1033, 366, 2367, 592, 278, 330, 391, 310, 920, 372, 1033, 2466, 29973, 29871, 319, 1799, 9047, 13566, 29901, 18585, 29889, 450, 6996, 2969, 723, 367, 304, 671, 21531, 20159, 304, 27599, 278, 3829, 310, 278, 341, 1475, 705, 11356, 3748, 7613, 322, 8161, 278, 14354, 310, 29086, 2729, 373, 3058, 25002, 15939, 1934, 29889, 910, 1033, 25135, 15783, 278, 3748, 7613, 408, 263, 25002, 2913, 322, 773, 25002, 15939, 1934, 1316, 408, 3632, 327, 2270, 6471, 322, 350, 9890, 3694, 304, 10115, 2472, 1048, 278, 4423, 310, 29086, 373, 278, 7613, 29889, 2398, 29892, 408, 306, 5276, 1434, 29892, 16049, 445, 723, 1996, 263, 6483, 8004, 310, 1716, 21531, 20159, 322, 278, 3748, 310, 341, 1475, 705, 11356, 29892, 577, 372, 29915, 29879, 451, 1554, 393, 508, 367, 10824, 297, 263, 2560, 1234, 21106, 29879, 29958, 2, 3148, 1001, 29901, 2367, 592, 263, 2702, 1342, 29871, 319, 1799, 9047, 13566, 29901, 1094, 263, 2702, 1342, 29892, 7755, 591, 505, 263, 341, 1475, 705, 11356, 3748, 7613, 411, 278, 1494, 5285, 29901, 13, 28956, 13, 29871, 29896, 259, 29896, 259, 29896, 259, 29896, 259, 29896, 13, 29871, 29896, 29871, 1577, 29871, 1577, 29871, 1577, 259, 29896, 13, 29871, 29896, 29871, 1577, 29871, 1577, 29871, 1577, 259, 29896, 13, 29871, 29896, 29871, 1577, 29871, 1577, 29871, 1577, 259, 29896, 13, 29871, 29896, 259, 29896, 259, 29896, 259, 29896, 259, 29896, 13, 28956, 13, 797, 445, 1206, 29892, 278, 3748, 7613, 508, 367, 9875, 408, 263, 25002, 2913, 29892, 988, 1269, 6862, 373, 278, 7613, 16161, 304, 263, 1298, 297, 278, 2913, 29889, 450, 29086, 373, 278, 7613, 508, 367, 2714, 310, 408, 376, 5391, 267, 29908, 297, 278, 2913, 29892, 607, 526, 3291, 393, 526, 451, 760, 310, 278, 2913, 29889, 5293, 21531, 20159, 29892, 591, 508, 27599, 278, 3829, 310, 445, 2913, 322, 8161, 278, 14354, 310, 278, 29086, 2729, 373, 3058, 25002, 15939, 1934, 29889, 13, 13, 2831, 2777, 29892, 697, 982, 304, 437, 445, 338, 304, 10272, 278, 3632, 327, 2270, 6471, 310, 278, 2913, 29889, 15089, 327, 2270, 6471, 526, 263, 982, 310, 20766, 278, 26532, 297, 263, 2913, 29892, 322, 896, 508, 367, 1304, 304, 10115, 2472, 1048, 278, 14354, 310, 278, 29086, 373, 278, 341, 1475, 705, 11356, 7613, 29889, 512, 278, 1206, 310, 278, 7613, 2038, 29892, 591, 508, 10272, 278, 937, 3632, 327, 2270, 2318, 310, 278, 2913, 29892, 607, 16612, 278, 1353, 322, 376, 12181, 29908, 310, 278, 26532, 297, 278, 2913, 29889, 512, 445, 1206, 29892, 278, 937, 3632, 327, 2270, 2318, 723, 2649, 502, 393, 727, 526, 3023, 26532, 297, 278, 2913, 29892, 21050, 297, 263, 6862, 8267, 29889, 13, 13, 29933, 1463, 373, 445, 2472, 29892, 591, 508, 10115, 393, 278, 29086, 297, 278, 3748, 526, 5982, 297, 278, 3023, 26995, 310, 278, 7613, 29892, 408, 4318, 2400, 29901, 13, 28956, 13, 29871, 29896, 259, 29896, 259, 29896, 259, 29896, 259, 29896, 13, 29871, 29896, 29871, 341, 29871, 1577, 29871, 1577, 259, 29896, 13, 29871, 29896, 29871, 1577, 29871, 1577, 29871, 1577, 259, 29896, 13, 29871, 29896, 29871, 1577, 29871, 1577, 29871, 341, 259, 29896, 13, 29871, 29896, 259, 29896, 259, 29896, 259, 29896, 259, 29896, 13, 28956, 13, 2776, 3236, 29892, 445, 338, 925, 697, 1342, 310, 920, 21531, 20159, 1033, 367, 1304, 304, 4505, 341, 1475, 705, 11356, 29889, 1670, 526, 1784, 916, 1950, 13501, 29892, 322, 278, 2702, 4902, 310, 920, 304, 2334, 963, 723, 8839, 373, 278, 2702, 3748, 7613, 322, 278, 25002, 15939, 1934, 1641, 1304, 29889, 1094, 306, 5276, 1434, 29892, 16049, 263, 1650, 763, 445, 723, 1996, 263, 6483, 8004, 310, 1716, 21531, 20159, 322, 278, 3748, 310, 341, 1475, 705, 11356, 21106, 29879, 29958, 2, 3148, 1001, 29901, 10241, 474, 471, 4856, 411, 263, 6483, 8004, 310, 1716, 29892, 1033, 366, 5649, 278, 1021, 6964, 304, 592, 411, 393, 11833, 29973, 29871, 319, 1799, 9047, 13566, 29901, 18585, 29889, 17090, 366, 505, 263, 6483, 8004, 310, 1716, 21531, 20159, 322, 278, 3748, 310, 341, 1475, 705, 11356, 29892, 1244, 338, 920, 366, 1795, 671, 21531, 20159, 304, 4505, 263, 341, 1475, 705, 11356, 3748, 7613, 29901, 13, 13, 29896, 29889, 16314, 278, 341, 1475, 705, 11356, 3748, 7613, 408, 263, 25002, 2913, 29889, 7806, 6862, 373, 278, 7613, 16161, 304, 263, 1298, 297, 278, 2913, 29892, 322, 278, 29086, 373, 278, 7613, 508, 367, 2714, 310, 408, 376, 5391, 267, 29908, 297, 278, 2913, 29892, 607, 526, 3291, 393, 526, 451, 760, 310, 278, 2913, 29889, 13, 29906, 29889, 4803, 25002, 15939, 1934, 304, 10115, 2472, 1048, 278, 14354, 310, 278, 29086, 373, 278, 7613, 29889, 1670, 526, 1784, 1950, 25002, 15939, 1934, 393, 1033, 367, 1304, 363, 445, 29892, 1316, 408, 3632, 327, 2270, 6471, 29892, 350, 9890, 3694, 29892, 470, 28152, 3632, 3002, 29889, 13, 29941, 29889, 4803, 278, 2472, 10115, 1127, 515, 278, 25002, 15939, 1934, 304, 8161, 278, 14354, 310, 278, 29086, 373, 278, 7613, 29889, 1152, 1342, 29892, 565, 278, 937, 3632, 327, 2270, 2318, 310, 278, 2913, 14088, 393, 727, 526, 3023, 26532, 297, 278, 2913, 29892, 21050, 297, 263, 6862, 8267, 29892, 769, 591, 508, 10115, 393, 278, 29086, 526, 5982, 297, 278, 3023, 26995, 310, 278, 7613, 29889, 13, 29946, 29889, 4803, 445, 2472, 304, 4505, 278, 341, 1475, 705, 11356, 3748, 29889, 9038, 366, 1073, 278, 14354, 310, 278, 29086, 29892, 366, 508, 671, 445, 2472, 304, 4772, 963, 322, 2821, 278, 7613, 29889, 13, 13, 2776, 3236, 29892, 445, 338, 925, 697, 1950, 2948, 304, 773, 21531, 20159, 304, 4505, 341, 1475, 705, 11356, 29889, 1670, 1122, 367, 916, 5837, 304, 437, 372, 29892, 8679, 373, 278, 2702, 3748, 7613, 322, 278, 25002, 15939, 1934, 1641, 1304, 29889, 2398, 29892, 445, 881, 2367, 366, 263, 2498, 2969, 310, 920, 21531, 20159, 508, 367, 1304, 304, 4505, 341, 1475, 705, 11356, 21106, 29879, 29958, 2, 3148, 1001, 29901, 920, 723, 474, 437, 372, 411, 28152, 3632, 3002, 470, 738, 310, 278, 916, 25002, 15939, 1934, 29973, 29871, 319, 1799, 9047, 13566, 29901, 1763, 671, 28152, 3632, 3002, 470, 738, 310, 278, 916, 25002, 15939, 1934, 304, 4505, 263, 341, 1475, 705, 11356, 3748, 7613, 29892, 366, 723, 1101, 278, 1021, 6996, 6576, 408, 5439, 2038, 29892, 541, 411, 777, 12651, 297, 278, 2702, 4902, 29889, 13, 13, 2831, 2777, 29892, 1244, 338, 920, 366, 1795, 671, 28152, 3632, 3002, 304, 4505, 263, 341, 1475, 705, 11356, 3748, 7613, 29901, 13, 13, 29896, 29889, 16314, 278, 341, 1475, 705, 11356, 3748, 7613, 408, 263, 25002, 2913, 29889, 7806, 6862, 373, 278, 7613, 16161, 304, 263, 1298, 297, 278, 2913, 29892, 322, 278, 29086, 373, 278, 7613, 508, 367, 2714, 310, 408, 376, 5391, 267, 29908, 297, 278, 2913, 29892, 607, 526, 3291, 393, 526, 451, 760, 310, 278, 2913, 29889, 13, 29906, 29889, 11796, 29872, 278, 28152, 3632, 3002, 310, 278, 2913, 29889, 9034, 9696, 3632, 3002, 338, 263, 982, 310, 20766, 278, 26532, 297, 263, 2913, 975, 931, 29892, 322, 372, 508, 367, 1304, 304, 10115, 2472, 1048, 278, 14354, 310, 278, 29086, 373, 278, 341, 1475, 705, 11356, 7613, 29889, 13, 29941, 29889, 4803, 278, 2472, 515, 278, 28152, 3632, 3002, 304, 8161, 278, 14354, 310, 278, 29086, 373, 278, 7613, 29889, 1152, 1342, 29892, 565, 278, 28152, 3632, 3002, 14088, 393, 727, 526, 3023, 26532, 297, 278, 2913, 29892, 21050, 297, 263, 6862, 8267, 29892, 769, 591, 508, 10115, 393, 278, 29086, 526, 5982, 297, 278, 3023, 26995, 310, 278, 7613, 29889, 13, 29946, 29889, 4803, 445, 2472, 304, 4505, 278, 341, 1475, 705, 11356, 3748, 29889, 9038, 366, 1073, 278, 14354, 310, 278, 29086, 29892, 366, 508, 671, 445, 2472, 304, 4772, 963, 322, 2821, 278, 7613, 29889, 13, 13, 2776, 3236, 29892, 445, 338, 925, 697, 1950, 2948, 304, 773, 28152, 3632, 3002, 304, 4505, 341, 1475, 705, 11356, 29889, 1670, 1122, 367, 916, 5837, 304, 437, 372, 29892, 8679, 373, 278, 2702, 3748, 7613, 322, 278, 2702, 4902, 310, 278, 28152, 3632, 3002, 16287, 29889, 2398, 29892, 445, 881, 2367, 366, 263, 2498, 2969, 310, 920, 28152, 3632, 3002, 508, 367, 1304, 304, 4505, 341, 1475, 705, 11356, 21106, 29879, 29958, 2, 3148, 1001, 29901, 920, 1033, 474, 2334, 445, 297, 3017, 29973, 29871, 319, 1799, 9047, 13566, 29901, 1763, 2334, 263, 341, 1475, 705, 11356, 899, 369, 393, 3913, 21531, 20159, 297, 5132, 29892, 366, 723, 817, 304, 437, 278, 1494, 29901, 13, 13, 29896, 29889, 16052, 263, 5132, 3577, 363, 21531, 20159, 29889, 1670, 526, 3196, 3987, 3625, 29892, 1316, 408, 402, 566, 2918, 29892, 360, 291, 952, 375, 29892, 470, 4560, 7354, 29899, 29873, 1388, 29889, 4525, 9741, 3867, 14009, 322, 848, 12286, 363, 20602, 25002, 15939, 1934, 29892, 1316, 408, 3632, 327, 2270, 6471, 29892, 350, 9890, 3694, 29892, 470, 28152, 3632, 3002, 29889, 13, 29906, 29889, 16314, 278, 341, 1475, 705, 11356, 3748, 7613, 408, 263, 25002, 2913, 29889, 910, 1033, 367, 2309, 773, 263, 848, 3829, 4944, 491, 278, 21531, 20159, 3577, 29892, 1316, 408, 263, 3053, 506, 616, 4280, 470, 263, 478, 2035, 29367, 29899, 29934, 4512, 4280, 29889, 7806, 6862, 373, 278, 7613, 723, 3928, 304, 263, 1298, 297, 278, 2913, 29892, 322, 278, 29086, 373, 278, 7613, 723, 367, 9875, 408, 376, 5391, 267, 29908, 297, 278, 2913, 29889, 13, 29941, 29889, 11796, 29872, 278, 25002, 15939, 1934, 310, 278, 2913, 29889, 910, 1033, 367, 2309, 773, 14009, 4944, 491, 278, 21531, 20159, 3577, 29889, 1152, 1342, 29892, 366, 1033, 10272, 278, 28152, 3632, 3002, 310, 278, 2913, 773, 278, 28152, 3632, 3002, 5687, 29889, 13, 29946, 29889, 4803, 278, 2472, 515, 278, 25002, 15939, 1934, 304, 8161, 278, 14354, 310, 278, 29086, 373, 278, 7613, 29889, 910, 1033, 367, 2309, 773, 263, 10296, 310, 19475, 24481, 322, 8720, 29889, 1152, 1342, 29892, 565, 278, 28152, 3632, 3002, 14088, 393, 727, 526, 3023, 26532, 297, 278, 2913, 29892, 21050, 297, 263, 6862, 8267, 29892, 769, 366, 1033, 671, 263, 8424, 310, 775, 304, 10115, 393, 278, 29086, 526, 5982, 297, 278, 3023, 26995, 310, 278, 7613, 29889, 13, 29945, 29889, 4803, 445, 2472, 304, 4505, 278, 341, 1475, 705, 11356, 3748, 29889, 9038, 366, 1073, 278, 14354, 310, 278, 29086, 29892, 366, 1033, 671, 263, 8424, 310, 775, 304, 6336, 2828, 373, 278, 9109, 25256, 322, 2821, 278, 7613, 29889, 13, 13, 2776, 3236, 29892, 445, 338, 925, 697, 1950, 2948, 304, 16049, 263, 341, 1475, 705, 11356, 899, 369, 393, 3913, 21531, 20159, 297, 5132, 29889, 1670, 1122, 367, 916, 5837, 304, 437, 372, 29892, 8679, 373, 278, 2702, 4902, 310, 278, 5314, 29889, 2398, 29892, 445, 881, 2367, 366, 263, 2498, 2969, 310, 920, 304, 679, 4687, 411, 1316, 263, 2060, 21106, 29879, 29958, 2], "attention_masklabels": [-100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 319, 1799, 9047, 13566, 29901, 739, 29915, 29879, 1950, 304, 671, 21531, 20159, 304, 4505, 341, 1475, 705, 11356, 29892, 541, 372, 723, 5517, 367, 3755, 4280, 29889, 838, 5869, 293, 20159, 338, 263, 5443, 310, 23964, 393, 11898, 278, 4426, 310, 8162, 393, 526, 21634, 1090, 9126, 316, 689, 800, 29892, 1316, 408, 16116, 292, 322, 289, 2548, 29889, 512, 278, 3030, 310, 341, 1475, 705, 11356, 29892, 445, 1033, 367, 1304, 304, 27599, 278, 3829, 310, 278, 3748, 7613, 322, 8161, 278, 14354, 310, 29086, 2729, 373, 3058, 25002, 15939, 1934, 29889, 2398, 29892, 16049, 445, 723, 1996, 263, 6483, 8004, 310, 1716, 21531, 20159, 322, 278, 3748, 310, 341, 1475, 705, 11356, 29889, 739, 29915, 29879, 451, 1554, 393, 508, 367, 10824, 297, 263, 2560, 1234, 21106, 29879, 29958, 2, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 319, 1799, 9047, 13566, 29901, 18585, 29889, 450, 6996, 2969, 723, 367, 304, 671, 21531, 20159, 304, 27599, 278, 3829, 310, 278, 341, 1475, 705, 11356, 3748, 7613, 322, 8161, 278, 14354, 310, 29086, 2729, 373, 3058, 25002, 15939, 1934, 29889, 910, 1033, 25135, 15783, 278, 3748, 7613, 408, 263, 25002, 2913, 322, 773, 25002, 15939, 1934, 1316, 408, 3632, 327, 2270, 6471, 322, 350, 9890, 3694, 304, 10115, 2472, 1048, 278, 4423, 310, 29086, 373, 278, 7613, 29889, 2398, 29892, 408, 306, 5276, 1434, 29892, 16049, 445, 723, 1996, 263, 6483, 8004, 310, 1716, 21531, 20159, 322, 278, 3748, 310, 341, 1475, 705, 11356, 29892, 577, 372, 29915, 29879, 451, 1554, 393, 508, 367, 10824, 297, 263, 2560, 1234, 21106, 29879, 29958, 2, -100, -100, -100, -100, -100, -100, -100, -100, -100, 319, 1799, 9047, 13566, 29901, 1094, 263, 2702, 1342, 29892, 7755, 591, 505, 263, 341, 1475, 705, 11356, 3748, 7613, 411, 278, 1494, 5285, 29901, 13, 28956, 13, 29871, 29896, 259, 29896, 259, 29896, 259, 29896, 259, 29896, 13, 29871, 29896, 29871, 1577, 29871, 1577, 29871, 1577, 259, 29896, 13, 29871, 29896, 29871, 1577, 29871, 1577, 29871, 1577, 259, 29896, 13, 29871, 29896, 29871, 1577, 29871, 1577, 29871, 1577, 259, 29896, 13, 29871, 29896, 259, 29896, 259, 29896, 259, 29896, 259, 29896, 13, 28956, 13, 797, 445, 1206, 29892, 278, 3748, 7613, 508, 367, 9875, 408, 263, 25002, 2913, 29892, 988, 1269, 6862, 373, 278, 7613, 16161, 304, 263, 1298, 297, 278, 2913, 29889, 450, 29086, 373, 278, 7613, 508, 367, 2714, 310, 408, 376, 5391, 267, 29908, 297, 278, 2913, 29892, 607, 526, 3291, 393, 526, 451, 760, 310, 278, 2913, 29889, 5293, 21531, 20159, 29892, 591, 508, 27599, 278, 3829, 310, 445, 2913, 322, 8161, 278, 14354, 310, 278, 29086, 2729, 373, 3058, 25002, 15939, 1934, 29889, 13, 13, 2831, 2777, 29892, 697, 982, 304, 437, 445, 338, 304, 10272, 278, 3632, 327, 2270, 6471, 310, 278, 2913, 29889, 15089, 327, 2270, 6471, 526, 263, 982, 310, 20766, 278, 26532, 297, 263, 2913, 29892, 322, 896, 508, 367, 1304, 304, 10115, 2472, 1048, 278, 14354, 310, 278, 29086, 373, 278, 341, 1475, 705, 11356, 7613, 29889, 512, 278, 1206, 310, 278, 7613, 2038, 29892, 591, 508, 10272, 278, 937, 3632, 327, 2270, 2318, 310, 278, 2913, 29892, 607, 16612, 278, 1353, 322, 376, 12181, 29908, 310, 278, 26532, 297, 278, 2913, 29889, 512, 445, 1206, 29892, 278, 937, 3632, 327, 2270, 2318, 723, 2649, 502, 393, 727, 526, 3023, 26532, 297, 278, 2913, 29892, 21050, 297, 263, 6862, 8267, 29889, 13, 13, 29933, 1463, 373, 445, 2472, 29892, 591, 508, 10115, 393, 278, 29086, 297, 278, 3748, 526, 5982, 297, 278, 3023, 26995, 310, 278, 7613, 29892, 408, 4318, 2400, 29901, 13, 28956, 13, 29871, 29896, 259, 29896, 259, 29896, 259, 29896, 259, 29896, 13, 29871, 29896, 29871, 341, 29871, 1577, 29871, 1577, 259, 29896, 13, 29871, 29896, 29871, 1577, 29871, 1577, 29871, 1577, 259, 29896, 13, 29871, 29896, 29871, 1577, 29871, 1577, 29871, 341, 259, 29896, 13, 29871, 29896, 259, 29896, 259, 29896, 259, 29896, 259, 29896, 13, 28956, 13, 2776, 3236, 29892, 445, 338, 925, 697, 1342, 310, 920, 21531, 20159, 1033, 367, 1304, 304, 4505, 341, 1475, 705, 11356, 29889, 1670, 526, 1784, 916, 1950, 13501, 29892, 322, 278, 2702, 4902, 310, 920, 304, 2334, 963, 723, 8839, 373, 278, 2702, 3748, 7613, 322, 278, 25002, 15939, 1934, 1641, 1304, 29889, 1094, 306, 5276, 1434, 29892, 16049, 263, 1650, 763, 445, 723, 1996, 263, 6483, 8004, 310, 1716, 21531, 20159, 322, 278, 3748, 310, 341, 1475, 705, 11356, 21106, 29879, 29958, 2, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 319, 1799, 9047, 13566, 29901, 18585, 29889, 17090, 366, 505, 263, 6483, 8004, 310, 1716, 21531, 20159, 322, 278, 3748, 310, 341, 1475, 705, 11356, 29892, 1244, 338, 920, 366, 1795, 671, 21531, 20159, 304, 4505, 263, 341, 1475, 705, 11356, 3748, 7613, 29901, 13, 13, 29896, 29889, 16314, 278, 341, 1475, 705, 11356, 3748, 7613, 408, 263, 25002, 2913, 29889, 7806, 6862, 373, 278, 7613, 16161, 304, 263, 1298, 297, 278, 2913, 29892, 322, 278, 29086, 373, 278, 7613, 508, 367, 2714, 310, 408, 376, 5391, 267, 29908, 297, 278, 2913, 29892, 607, 526, 3291, 393, 526, 451, 760, 310, 278, 2913, 29889, 13, 29906, 29889, 4803, 25002, 15939, 1934, 304, 10115, 2472, 1048, 278, 14354, 310, 278, 29086, 373, 278, 7613, 29889, 1670, 526, 1784, 1950, 25002, 15939, 1934, 393, 1033, 367, 1304, 363, 445, 29892, 1316, 408, 3632, 327, 2270, 6471, 29892, 350, 9890, 3694, 29892, 470, 28152, 3632, 3002, 29889, 13, 29941, 29889, 4803, 278, 2472, 10115, 1127, 515, 278, 25002, 15939, 1934, 304, 8161, 278, 14354, 310, 278, 29086, 373, 278, 7613, 29889, 1152, 1342, 29892, 565, 278, 937, 3632, 327, 2270, 2318, 310, 278, 2913, 14088, 393, 727, 526, 3023, 26532, 297, 278, 2913, 29892, 21050, 297, 263, 6862, 8267, 29892, 769, 591, 508, 10115, 393, 278, 29086, 526, 5982, 297, 278, 3023, 26995, 310, 278, 7613, 29889, 13, 29946, 29889, 4803, 445, 2472, 304, 4505, 278, 341, 1475, 705, 11356, 3748, 29889, 9038, 366, 1073, 278, 14354, 310, 278, 29086, 29892, 366, 508, 671, 445, 2472, 304, 4772, 963, 322, 2821, 278, 7613, 29889, 13, 13, 2776, 3236, 29892, 445, 338, 925, 697, 1950, 2948, 304, 773, 21531, 20159, 304, 4505, 341, 1475, 705, 11356, 29889, 1670, 1122, 367, 916, 5837, 304, 437, 372, 29892, 8679, 373, 278, 2702, 3748, 7613, 322, 278, 25002, 15939, 1934, 1641, 1304, 29889, 2398, 29892, 445, 881, 2367, 366, 263, 2498, 2969, 310, 920, 21531, 20159, 508, 367, 1304, 304, 4505, 341, 1475, 705, 11356, 21106, 29879, 29958, 2, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 319, 1799, 9047, 13566, 29901, 1763, 671, 28152, 3632, 3002, 470, 738, 310, 278, 916, 25002, 15939, 1934, 304, 4505, 263, 341, 1475, 705, 11356, 3748, 7613, 29892, 366, 723, 1101, 278, 1021, 6996, 6576, 408, 5439, 2038, 29892, 541, 411, 777, 12651, 297, 278, 2702, 4902, 29889, 13, 13, 2831, 2777, 29892, 1244, 338, 920, 366, 1795, 671, 28152, 3632, 3002, 304, 4505, 263, 341, 1475, 705, 11356, 3748, 7613, 29901, 13, 13, 29896, 29889, 16314, 278, 341, 1475, 705, 11356, 3748, 7613, 408, 263, 25002, 2913, 29889, 7806, 6862, 373, 278, 7613, 16161, 304, 263, 1298, 297, 278, 2913, 29892, 322, 278, 29086, 373, 278, 7613, 508, 367, 2714, 310, 408, 376, 5391, 267, 29908, 297, 278, 2913, 29892, 607, 526, 3291, 393, 526, 451, 760, 310, 278, 2913, 29889, 13, 29906, 29889, 11796, 29872, 278, 28152, 3632, 3002, 310, 278, 2913, 29889, 9034, 9696, 3632, 3002, 338, 263, 982, 310, 20766, 278, 26532, 297, 263, 2913, 975, 931, 29892, 322, 372, 508, 367, 1304, 304, 10115, 2472, 1048, 278, 14354, 310, 278, 29086, 373, 278, 341, 1475, 705, 11356, 7613, 29889, 13, 29941, 29889, 4803, 278, 2472, 515, 278, 28152, 3632, 3002, 304, 8161, 278, 14354, 310, 278, 29086, 373, 278, 7613, 29889, 1152, 1342, 29892, 565, 278, 28152, 3632, 3002, 14088, 393, 727, 526, 3023, 26532, 297, 278, 2913, 29892, 21050, 297, 263, 6862, 8267, 29892, 769, 591, 508, 10115, 393, 278, 29086, 526, 5982, 297, 278, 3023, 26995, 310, 278, 7613, 29889, 13, 29946, 29889, 4803, 445, 2472, 304, 4505, 278, 341, 1475, 705, 11356, 3748, 29889, 9038, 366, 1073, 278, 14354, 310, 278, 29086, 29892, 366, 508, 671, 445, 2472, 304, 4772, 963, 322, 2821, 278, 7613, 29889, 13, 13, 2776, 3236, 29892, 445, 338, 925, 697, 1950, 2948, 304, 773, 28152, 3632, 3002, 304, 4505, 341, 1475, 705, 11356, 29889, 1670, 1122, 367, 916, 5837, 304, 437, 372, 29892, 8679, 373, 278, 2702, 3748, 7613, 322, 278, 2702, 4902, 310, 278, 28152, 3632, 3002, 16287, 29889, 2398, 29892, 445, 881, 2367, 366, 263, 2498, 2969, 310, 920, 28152, 3632, 3002, 508, 367, 1304, 304, 4505, 341, 1475, 705, 11356, 21106, 29879, 29958, 2, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 319, 1799, 9047, 13566, 29901, 1763, 2334, 263, 341, 1475, 705, 11356, 899, 369, 393, 3913, 21531, 20159, 297, 5132, 29892, 366, 723, 817, 304, 437, 278, 1494, 29901, 13, 13, 29896, 29889, 16052, 263, 5132, 3577, 363, 21531, 20159, 29889, 1670, 526, 3196, 3987, 3625, 29892, 1316, 408, 402, 566, 2918, 29892, 360, 291, 952, 375, 29892, 470, 4560, 7354, 29899, 29873, 1388, 29889, 4525, 9741, 3867, 14009, 322, 848, 12286, 363, 20602, 25002, 15939, 1934, 29892, 1316, 408, 3632, 327, 2270, 6471, 29892, 350, 9890, 3694, 29892, 470, 28152, 3632, 3002, 29889, 13, 29906, 29889, 16314, 278, 341, 1475, 705, 11356, 3748, 7613, 408, 263, 25002, 2913, 29889, 910, 1033, 367, 2309, 773, 263, 848, 3829, 4944, 491, 278, 21531, 20159, 3577, 29892, 1316, 408, 263, 3053, 506, 616, 4280, 470, 263, 478, 2035, 29367, 29899, 29934, 4512, 4280, 29889, 7806, 6862, 373, 278, 7613, 723, 3928, 304, 263, 1298, 297, 278, 2913, 29892, 322, 278, 29086, 373, 278, 7613, 723, 367, 9875, 408, 376, 5391, 267, 29908, 297, 278, 2913, 29889, 13, 29941, 29889, 11796, 29872, 278, 25002, 15939, 1934, 310, 278, 2913, 29889, 910, 1033, 367, 2309, 773, 14009, 4944, 491, 278, 21531, 20159, 3577, 29889, 1152, 1342, 29892, 366, 1033, 10272, 278, 28152, 3632, 3002, 310, 278, 2913, 773, 278, 28152, 3632, 3002, 5687, 29889, 13, 29946, 29889, 4803, 278, 2472, 515, 278, 25002, 15939, 1934, 304, 8161, 278, 14354, 310, 278, 29086, 373, 278, 7613, 29889, 910, 1033, 367, 2309, 773, 263, 10296, 310, 19475, 24481, 322, 8720, 29889, 1152, 1342, 29892, 565, 278, 28152, 3632, 3002, 14088, 393, 727, 526, 3023, 26532, 297, 278, 2913, 29892, 21050, 297, 263, 6862, 8267, 29892, 769, 366, 1033, 671, 263, 8424, 310, 775, 304, 10115, 393, 278, 29086, 526, 5982, 297, 278, 3023, 26995, 310, 278, 7613, 29889, 13, 29945, 29889, 4803, 445, 2472, 304, 4505, 278, 341, 1475, 705, 11356, 3748, 29889, 9038, 366, 1073, 278, 14354, 310, 278, 29086, 29892, 366, 1033, 671, 263, 8424, 310, 775, 304, 6336, 2828, 373, 278, 9109, 25256, 322, 2821, 278, 7613, 29889, 13, 13, 2776, 3236, 29892, 445, 338, 925, 697, 1950, 2948, 304, 16049, 263, 341, 1475, 705, 11356, 899, 369, 393, 3913, 21531, 20159, 297, 5132, 29889, 1670, 1122, 367, 916, 5837, 304, 437, 372, 29892, 8679, 373, 278, 2702, 4902, 310, 278, 5314, 29889, 2398, 29892, 445, 881, 2367, 366, 263, 2498, 2969, 310, 920, 304, 679, 4687, 411, 1316, 263, 2060, 21106, 29879, 29958, 2]}
|
tests/test_prompt_tokenizers.py
CHANGED
@@ -21,7 +21,7 @@ from axolotl.prompt_tokenizers import (
|
|
21 |
AlpacaPromptTokenizingStrategy,
|
22 |
ShareGPTPromptTokenizingStrategy,
|
23 |
)
|
24 |
-
from axolotl.prompters import AlpacaPrompter, PromptStyle,
|
25 |
|
26 |
LOG = logging.getLogger("axolotl")
|
27 |
|
@@ -60,7 +60,7 @@ class TestPromptTokenizationStrategies(unittest.TestCase):
|
|
60 |
) as fin:
|
61 |
data = fin.read()
|
62 |
tokenized_conversation = json.loads(data)
|
63 |
-
prompter =
|
64 |
strat = ShareGPTPromptTokenizingStrategy(
|
65 |
prompter,
|
66 |
self.tokenizer,
|
@@ -79,7 +79,7 @@ class TestPromptTokenizationStrategies(unittest.TestCase):
|
|
79 |
) as fin:
|
80 |
data = fin.read()
|
81 |
conversation = json.loads(data)
|
82 |
-
prompter =
|
83 |
strat = ShareGPTPromptTokenizingStrategy(
|
84 |
prompter,
|
85 |
self.tokenizer,
|
|
|
21 |
AlpacaPromptTokenizingStrategy,
|
22 |
ShareGPTPromptTokenizingStrategy,
|
23 |
)
|
24 |
+
from axolotl.prompters import AlpacaPrompter, PromptStyle, ShareGPTPrompterV2
|
25 |
|
26 |
LOG = logging.getLogger("axolotl")
|
27 |
|
|
|
60 |
) as fin:
|
61 |
data = fin.read()
|
62 |
tokenized_conversation = json.loads(data)
|
63 |
+
prompter = ShareGPTPrompterV2()
|
64 |
strat = ShareGPTPromptTokenizingStrategy(
|
65 |
prompter,
|
66 |
self.tokenizer,
|
|
|
79 |
) as fin:
|
80 |
data = fin.read()
|
81 |
conversation = json.loads(data)
|
82 |
+
prompter = ShareGPTPrompterV2()
|
83 |
strat = ShareGPTPromptTokenizingStrategy(
|
84 |
prompter,
|
85 |
self.tokenizer,
|
tests/test_validation.py
CHANGED
@@ -374,3 +374,26 @@ class ValidationTest(unittest.TestCase):
|
|
374 |
)
|
375 |
|
376 |
validate_config(cfg)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
374 |
)
|
375 |
|
376 |
validate_config(cfg)
|
377 |
+
|
378 |
+
def test_sharegpt_deprecation(self):
|
379 |
+
cfg = DictDefault(
|
380 |
+
{"datasets": [{"path": "lorem/ipsum", "type": "sharegpt:chat"}]}
|
381 |
+
)
|
382 |
+
with self._caplog.at_level(logging.WARNING):
|
383 |
+
validate_config(cfg)
|
384 |
+
assert any(
|
385 |
+
"`type: sharegpt:chat` will soon be deprecated." in record.message
|
386 |
+
for record in self._caplog.records
|
387 |
+
)
|
388 |
+
assert cfg.datasets[0].type == "sharegpt"
|
389 |
+
|
390 |
+
cfg = DictDefault(
|
391 |
+
{"datasets": [{"path": "lorem/ipsum", "type": "sharegpt_simple:load_role"}]}
|
392 |
+
)
|
393 |
+
with self._caplog.at_level(logging.WARNING):
|
394 |
+
validate_config(cfg)
|
395 |
+
assert any(
|
396 |
+
"`type: sharegpt_simple` will soon be deprecated." in record.message
|
397 |
+
for record in self._caplog.records
|
398 |
+
)
|
399 |
+
assert cfg.datasets[0].type == "sharegpt:load_role"
|