winglian commited on
Commit
3437149
1 Parent(s): 245c5c4

recommend padding when using sample packing (#531)

Browse files
examples/code-llama/13b/lora.yml CHANGED
@@ -17,6 +17,7 @@ output_dir: ./lora-out
17
 
18
  sequence_len: 100000
19
  sample_packing: true
 
20
 
21
  adapter: lora
22
  lora_model_dir:
 
17
 
18
  sequence_len: 100000
19
  sample_packing: true
20
+ pad_to_sequence_len: true
21
 
22
  adapter: lora
23
  lora_model_dir:
examples/code-llama/13b/qlora.yml CHANGED
@@ -20,6 +20,7 @@ lora_model_dir:
20
 
21
  sequence_len: 100000
22
  sample_packing: true
 
23
 
24
  lora_r: 32
25
  lora_alpha: 16
 
20
 
21
  sequence_len: 100000
22
  sample_packing: true
23
+ pad_to_sequence_len: true
24
 
25
  lora_r: 32
26
  lora_alpha: 16
examples/code-llama/34b/lora.yml CHANGED
@@ -17,6 +17,7 @@ output_dir: ./lora-out
17
 
18
  sequence_len: 100000
19
  sample_packing: true
 
20
 
21
  adapter: lora
22
  lora_model_dir:
 
17
 
18
  sequence_len: 100000
19
  sample_packing: true
20
+ pad_to_sequence_len: true
21
 
22
  adapter: lora
23
  lora_model_dir:
examples/code-llama/34b/qlora.yml CHANGED
@@ -20,6 +20,7 @@ lora_model_dir:
20
 
21
  sequence_len: 100000
22
  sample_packing: true
 
23
 
24
  lora_r: 32
25
  lora_alpha: 16
 
20
 
21
  sequence_len: 100000
22
  sample_packing: true
23
+ pad_to_sequence_len: true
24
 
25
  lora_r: 32
26
  lora_alpha: 16
examples/code-llama/7b/lora.yml CHANGED
@@ -17,6 +17,7 @@ output_dir: ./lora-out
17
 
18
  sequence_len: 100000
19
  sample_packing: true
 
20
 
21
  adapter: lora
22
  lora_model_dir:
 
17
 
18
  sequence_len: 100000
19
  sample_packing: true
20
+ pad_to_sequence_len: true
21
 
22
  adapter: lora
23
  lora_model_dir:
examples/code-llama/7b/qlora.yml CHANGED
@@ -20,6 +20,7 @@ lora_model_dir:
20
 
21
  sequence_len: 100000
22
  sample_packing: true
 
23
 
24
  lora_r: 32
25
  lora_alpha: 16
 
20
 
21
  sequence_len: 100000
22
  sample_packing: true
23
+ pad_to_sequence_len: true
24
 
25
  lora_r: 32
26
  lora_alpha: 16
examples/llama-2/lora.yml CHANGED
@@ -17,6 +17,7 @@ output_dir: ./lora-out
17
 
18
  sequence_len: 4096
19
  sample_packing: true
 
20
 
21
  adapter: lora
22
  lora_model_dir:
 
17
 
18
  sequence_len: 4096
19
  sample_packing: true
20
+ pad_to_sequence_len: true
21
 
22
  adapter: lora
23
  lora_model_dir:
examples/llama-2/qlora.yml CHANGED
@@ -20,6 +20,7 @@ lora_model_dir:
20
 
21
  sequence_len: 4096
22
  sample_packing: true
 
23
 
24
  lora_r: 32
25
  lora_alpha: 16
 
20
 
21
  sequence_len: 4096
22
  sample_packing: true
23
+ pad_to_sequence_len: true
24
 
25
  lora_r: 32
26
  lora_alpha: 16
examples/llama-2/relora.yml CHANGED
@@ -20,6 +20,7 @@ lora_model_dir:
20
 
21
  sequence_len: 4096
22
  sample_packing: true
 
23
 
24
  lora_r: 8
25
  lora_alpha: 16
 
20
 
21
  sequence_len: 4096
22
  sample_packing: true
23
+ pad_to_sequence_len: true
24
 
25
  lora_r: 8
26
  lora_alpha: 16
src/axolotl/utils/config.py CHANGED
@@ -97,6 +97,11 @@ def validate_config(cfg):
97
  )
98
  )
99
 
 
 
 
 
 
100
  if cfg.gradient_accumulation_steps and cfg.batch_size:
101
  raise ValueError(
102
  "please set only one of gradient_accumulation_steps or batch_size"
 
97
  )
98
  )
99
 
100
+ if cfg.sample_packing and not cfg.pad_to_sequence_len:
101
+ LOG.warning(
102
+ "`pad_to_sequence_len: true` is recommended when using sample_packing"
103
+ )
104
+
105
  if cfg.gradient_accumulation_steps and cfg.batch_size:
106
  raise ValueError(
107
  "please set only one of gradient_accumulation_steps or batch_size"
tests/test_validation.py CHANGED
@@ -328,6 +328,20 @@ class ValidationTest(unittest.TestCase):
328
  for record in self._caplog.records
329
  )
330
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
331
  cfg = DictDefault(
332
  {
333
  "max_packed_sequence_len": 2048,
 
328
  for record in self._caplog.records
329
  )
330
 
331
+ cfg = DictDefault(
332
+ {
333
+ "sample_packing": True,
334
+ "pad_to_sequence_len": None,
335
+ }
336
+ )
337
+ with self._caplog.at_level(logging.WARNING):
338
+ validate_config(cfg)
339
+ assert any(
340
+ "`pad_to_sequence_len: true` is recommended when using sample_packing"
341
+ in record.message
342
+ for record in self._caplog.records
343
+ )
344
+
345
  cfg = DictDefault(
346
  {
347
  "max_packed_sequence_len": 2048,