Fix and document test_datasets (#1228)
Browse files* Make sure test_dataset are used and treat val_set_size.
* Add test_datasets docs.
* Apply suggestions from code review
---------
Co-authored-by: Wing Lian <wing.lian@gmail.com>
- README.md +11 -0
- src/axolotl/core/trainer_builder.py +2 -1
- src/axolotl/utils/data.py +1 -1
README.md
CHANGED
@@ -607,6 +607,17 @@ datasets:
|
|
607 |
# For `completion` datsets only, uses the provided field instead of `text` column
|
608 |
field:
|
609 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
610 |
# use RL training: dpo, ipo, kto_pair
|
611 |
rl:
|
612 |
|
|
|
607 |
# For `completion` datsets only, uses the provided field instead of `text` column
|
608 |
field:
|
609 |
|
610 |
+
# A list of one or more datasets to eval the model with.
|
611 |
+
# You can use either test_datasets, or val_set_size, but not both.
|
612 |
+
test_datasets:
|
613 |
+
- path: /workspace/data/eval.jsonl
|
614 |
+
ds_type: json
|
615 |
+
# You need to specify a split. For "json" datasets the default split is called "train".
|
616 |
+
split: train
|
617 |
+
type: completion
|
618 |
+
data_files:
|
619 |
+
- /workspace/data/eval.jsonl
|
620 |
+
|
621 |
# use RL training: dpo, ipo, kto_pair
|
622 |
rl:
|
623 |
|
src/axolotl/core/trainer_builder.py
CHANGED
@@ -735,7 +735,7 @@ class HFCausalTrainerBuilder(TrainerBuilderBase):
|
|
735 |
elif self.cfg.sample_packing and self.cfg.eval_sample_packing is False:
|
736 |
training_arguments_kwargs["dataloader_drop_last"] = True
|
737 |
|
738 |
-
if self.cfg.val_set_size == 0:
|
739 |
# no eval set, so don't eval
|
740 |
training_arguments_kwargs["evaluation_strategy"] = "no"
|
741 |
elif self.cfg.eval_steps:
|
@@ -822,6 +822,7 @@ class HFCausalTrainerBuilder(TrainerBuilderBase):
|
|
822 |
self.cfg.load_best_model_at_end is not False
|
823 |
or self.cfg.early_stopping_patience
|
824 |
)
|
|
|
825 |
and self.cfg.val_set_size > 0
|
826 |
and self.cfg.save_steps
|
827 |
and self.cfg.eval_steps
|
|
|
735 |
elif self.cfg.sample_packing and self.cfg.eval_sample_packing is False:
|
736 |
training_arguments_kwargs["dataloader_drop_last"] = True
|
737 |
|
738 |
+
if not self.cfg.test_datasets and self.cfg.val_set_size == 0:
|
739 |
# no eval set, so don't eval
|
740 |
training_arguments_kwargs["evaluation_strategy"] = "no"
|
741 |
elif self.cfg.eval_steps:
|
|
|
822 |
self.cfg.load_best_model_at_end is not False
|
823 |
or self.cfg.early_stopping_patience
|
824 |
)
|
825 |
+
and not self.cfg.test_datasets
|
826 |
and self.cfg.val_set_size > 0
|
827 |
and self.cfg.save_steps
|
828 |
and self.cfg.eval_steps
|
src/axolotl/utils/data.py
CHANGED
@@ -440,7 +440,7 @@ def load_prepare_datasets(
|
|
440 |
split="train",
|
441 |
) -> Tuple[Dataset, Dataset, List[Prompter]]:
|
442 |
dataset, prompters = load_tokenized_prepared_datasets(
|
443 |
-
tokenizer, cfg, default_dataset_prepared_path
|
444 |
)
|
445 |
|
446 |
if cfg.dataset_shard_num and cfg.dataset_shard_idx is not None:
|
|
|
440 |
split="train",
|
441 |
) -> Tuple[Dataset, Dataset, List[Prompter]]:
|
442 |
dataset, prompters = load_tokenized_prepared_datasets(
|
443 |
+
tokenizer, cfg, default_dataset_prepared_path, split=split
|
444 |
)
|
445 |
|
446 |
if cfg.dataset_shard_num and cfg.dataset_shard_idx is not None:
|