tmm1 commited on
Commit
b4d1d22
1 Parent(s): 9f99104

note pattern when using groups

Browse files
Files changed (1) hide show
  1. README.md +2 -1
README.md CHANGED
@@ -427,7 +427,8 @@ save_safetensors:
427
  # whether to mask out or include the human's prompt from the training labels
428
  train_on_inputs: false
429
  # group similarly sized data to minimize padding
430
- # may be slower to start as it must download and sort the entire dataset
 
431
  group_by_length: false
432
 
433
  # Whether to use gradient checkpointing https://huggingface.co/docs/transformers/v4.18.0/en/performance#gradient-checkpointing
 
427
  # whether to mask out or include the human's prompt from the training labels
428
  train_on_inputs: false
429
  # group similarly sized data to minimize padding
430
+ # may be slower to start, as it must download and sort the entire dataset
431
+ # note that training loss may have an oscillating pattern with this enabled
432
  group_by_length: false
433
 
434
  # Whether to use gradient checkpointing https://huggingface.co/docs/transformers/v4.18.0/en/performance#gradient-checkpointing