winglian commited on
Commit
badda37
·
unverified ·
1 Parent(s): a01b998

make sure to register the base chatml template even if no system message is provided (#1207)

Browse files
.github/workflows/tests.yml CHANGED
@@ -106,3 +106,7 @@ jobs:
106
  - name: GPU Unit Tests monkeypatched w docker image
107
  run: |
108
  docker run --privileged --gpus "all" --env WANDB_DISABLED=true --rm ${{ steps.metadata.outputs.tags }}-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }} pytest /workspace/axolotl/tests/e2e/patched/
 
 
 
 
 
106
  - name: GPU Unit Tests monkeypatched w docker image
107
  run: |
108
  docker run --privileged --gpus "all" --env WANDB_DISABLED=true --rm ${{ steps.metadata.outputs.tags }}-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }} pytest /workspace/axolotl/tests/e2e/patched/
109
+ - name: Prune image from docker
110
+ if: github.ref != 'refs/heads/main'
111
+ run: |
112
+ docker rmi -f ${{ steps.metadata.outputs.tags }}-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}
src/axolotl/cli/preprocess.py CHANGED
@@ -40,6 +40,8 @@ def do_cli(config: Path = Path("examples/"), **kwargs):
40
  f"ChatML set. Adding default system message: {parsed_cfg.default_system_message}"
41
  )
42
  register_chatml_template(parsed_cfg.default_system_message)
 
 
43
 
44
  if not parsed_cfg.dataset_prepared_path:
45
  msg = (
 
40
  f"ChatML set. Adding default system message: {parsed_cfg.default_system_message}"
41
  )
42
  register_chatml_template(parsed_cfg.default_system_message)
43
+ else:
44
+ register_chatml_template()
45
 
46
  if not parsed_cfg.dataset_prepared_path:
47
  msg = (
src/axolotl/cli/train.py CHANGED
@@ -43,7 +43,10 @@ def do_train(cfg, cli_args) -> Tuple[PreTrainedModel, PreTrainedTokenizer]:
43
  f"ChatML set. Adding default system message: {cfg.default_system_message}"
44
  )
45
  register_chatml_template(cfg.default_system_message)
 
 
46
 
 
47
  dataset_meta = load_rl_datasets(cfg=cfg, cli_args=cli_args)
48
  else:
49
  dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
 
43
  f"ChatML set. Adding default system message: {cfg.default_system_message}"
44
  )
45
  register_chatml_template(cfg.default_system_message)
46
+ else:
47
+ register_chatml_template()
48
 
49
+ if cfg.rl:
50
  dataset_meta = load_rl_datasets(cfg=cfg, cli_args=cli_args)
51
  else:
52
  dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
src/axolotl/utils/data.py CHANGED
@@ -16,6 +16,7 @@ from datasets import (
16
  load_from_disk,
17
  )
18
  from huggingface_hub import hf_hub_download
 
19
  from torch.utils.data import RandomSampler
20
  from transformers import PreTrainedTokenizerBase
21
 
@@ -213,7 +214,7 @@ def load_tokenized_prepared_datasets(
213
  token=use_auth_token,
214
  )
215
  ds_from_hub = True
216
- except (FileNotFoundError, ConnectionError):
217
  pass
218
 
219
  ds_from_cloud = False
 
16
  load_from_disk,
17
  )
18
  from huggingface_hub import hf_hub_download
19
+ from huggingface_hub.utils import HFValidationError
20
  from torch.utils.data import RandomSampler
21
  from transformers import PreTrainedTokenizerBase
22
 
 
214
  token=use_auth_token,
215
  )
216
  ds_from_hub = True
217
+ except (FileNotFoundError, ConnectionError, HFValidationError):
218
  pass
219
 
220
  ds_from_cloud = False