barneystinson commited on
Commit
ef420a7
1 Parent(s): 4f8997f

Upload training_config.yml with huggingface_hub

Browse files
Files changed (1) hide show
  1. training_config.yml +11 -11
training_config.yml CHANGED
@@ -19,17 +19,17 @@ checkpointer:
19
  - meta_model_0.pt
20
  adapter_checkpoint: null
21
  recipe_checkpoint: null
22
- output_dir: output_checkpoints/experiment_2
23
  model_type: LLAMA3
24
  resume_from_checkpoint: false
25
- interim_checkpoint_steps: 200
26
  interim_gen_steps: null
27
  max_new_tokens: 77
28
  temperature: 0.6
29
  top_k: 231
30
  dataset:
31
  _component_: ds.EvenBatcher
32
- buffer_size: 36
33
  dataset:
34
  _component_: ds.RoundRobinDataset
35
  datasets:
@@ -46,25 +46,25 @@ dataset:
46
  train_on_input: false
47
  seed: null
48
  shuffle: true
49
- batch_size: 8
50
  optimizer:
51
  _component_: torch.optim.AdamW
52
- weight_decay: 0.01
53
- lr: 1.0e-05
54
  lr_scheduler:
55
  _component_: torchtune.modules.get_cosine_schedule_with_warmup
56
- num_warmup_steps: 200
57
  loss:
58
  _component_: torch.nn.CrossEntropyLoss
59
- epochs: 1
60
  max_steps_per_epoch: null
61
- gradient_accumulation_steps: 64
62
  compile: false
63
  output_dir: /tmp/lora_finetune_output
64
  metric_logger:
65
- _component_: torchtune.utils.metric_logging.WandBLogger
66
  log_dir: ${output_dir}
67
- log_every_n_steps: 100
68
  device: cuda
69
  dtype: bf16
70
  enable_activation_checkpointing: false
 
19
  - meta_model_0.pt
20
  adapter_checkpoint: null
21
  recipe_checkpoint: null
22
+ output_dir: output_checkpoints/experiment_5
23
  model_type: LLAMA3
24
  resume_from_checkpoint: false
25
+ interim_checkpoint_steps: 15000
26
  interim_gen_steps: null
27
  max_new_tokens: 77
28
  temperature: 0.6
29
  top_k: 231
30
  dataset:
31
  _component_: ds.EvenBatcher
32
+ buffer_size: 72
33
  dataset:
34
  _component_: ds.RoundRobinDataset
35
  datasets:
 
46
  train_on_input: false
47
  seed: null
48
  shuffle: true
49
+ batch_size: 6
50
  optimizer:
51
  _component_: torch.optim.AdamW
52
+ weight_decay: 0.008
53
+ lr: 0.0001
54
  lr_scheduler:
55
  _component_: torchtune.modules.get_cosine_schedule_with_warmup
56
+ num_warmup_steps: 100
57
  loss:
58
  _component_: torch.nn.CrossEntropyLoss
59
+ epochs: 6
60
  max_steps_per_epoch: null
61
+ gradient_accumulation_steps: 16
62
  compile: false
63
  output_dir: /tmp/lora_finetune_output
64
  metric_logger:
65
+ _component_: torchtune.utils.metric_logging.DiskLogger
66
  log_dir: ${output_dir}
67
+ log_every_n_steps: null
68
  device: cuda
69
  dtype: bf16
70
  enable_activation_checkpointing: false