jan-hq commited on
Commit
f0c9e0a
1 Parent(s): 98b9dd5

Upload 2 files

Browse files
Files changed (2) hide show
  1. loss_log.txt +0 -0
  2. pretrain_config.yaml +92 -0
loss_log.txt ADDED
The diff for this file is too large to render. See raw diff
 
pretrain_config.yaml ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Config for multi-device full finetuning in full_finetune_distributed.py
2
+ # using a Llama3 8B Instruct model
3
+ #
4
+ # This config assumes that you've run the following command before launching
5
+ # this run:
6
+ # tune download meta-llama/Meta-Llama-3-8B-Instruct --output-dir /tmp/Meta-Llama-3-8B-Instruct --hf-token <HF_TOKEN>
7
+ #
8
+ # To launch on 4 devices, run the following command from root:
9
+ # tune run --nproc_per_node 4 full_finetune_distributed --config llama3/8B_full
10
+ #
11
+ # You can add specific overrides through the command line. For example
12
+ # to override the checkpointer directory while launching training
13
+ # you can run:
14
+ # tune run --nproc_per_node 4 full_finetune_distributed --config llama3/8B_full checkpointer.checkpoint_dir=<YOUR_CHECKPOINT_DIR>
15
+ #
16
+ # This config works best when the model is being fine-tuned on 2+ GPUs.
17
+ # Single device full finetuning requires more memory optimizations. It's
18
+ # best to use 8B_full_single_device.yaml for those cases
19
+ # Tokenizer
20
+ tokenizer:
21
+ _component_: torchtune.models.llama3.llama3_s_tokenizer
22
+ path: ../model_zoo/tokenizer.model
23
+ max_seq_len: 512
24
+
25
+ # Dataset
26
+ dataset:
27
+ _component_: torchtune.datasets.sound_completion_dataset
28
+ source: jan-hq/raw_audio_with_audio_tokens_for_pretraining_using_Whisper_VQ
29
+ max_seq_len: 512
30
+ split: train
31
+ column: text
32
+
33
+ seed: 42
34
+ shuffle: True
35
+ # Model Arguments
36
+ model:
37
+ _component_: torchtune.models.llama3_1.llama3_1_s_8b
38
+ # path: model_zoo/Llama3.1_s_8b_init
39
+ checkpointer:
40
+ _component_: torchtune.training.FullModelHFCheckpointerSaveSteps
41
+ checkpoint_dir: ../model_zoo/Llama3.1_s_8b_init
42
+ checkpoint_files: [
43
+ model-00001-of-00004.safetensors,
44
+ model-00002-of-00004.safetensors,
45
+ model-00003-of-00004.safetensors,
46
+ model-00004-of-00004.safetensors,
47
+ ]
48
+ recipe_checkpoint: null
49
+ output_dir: ../model_zoo/llama3-s
50
+ model_type: LLAMA3
51
+ resume_from_checkpoint: False
52
+ save_every_n_steps: 1000
53
+ max_checkpoints: 3
54
+ # Fine-tuning arguments
55
+ batch_size: 12
56
+ epochs: 1
57
+ max_steps_per_epoch: null
58
+ gradient_accumulation_steps: 4
59
+ compile: False
60
+ # Optimizer and Scheduler
61
+ optimizer:
62
+ _component_: torch.optim.AdamW #change this to use adam_mini: torchtune.modules.optimizer.Adam_mini
63
+ weight_decay: 0.01
64
+ lr: 2e-4
65
+ fused: True
66
+ lr_scheduler:
67
+ _component_: torchtune.modules.get_cosine_schedule_with_warmup
68
+ num_warmup_steps: 50
69
+
70
+ loss:
71
+ _component_: torch.nn.CrossEntropyLoss
72
+
73
+ fsdp:
74
+ cpu_offload: False
75
+
76
+ # Training env
77
+ device: cuda
78
+ dtype: bf16
79
+
80
+ # Memory management
81
+ enable_activation_checkpointing: True
82
+ memory_efficient_fsdp_wrap: True
83
+ ac_mode: 'selective'
84
+
85
+
86
+ # Logging
87
+ metric_logger:
88
+ _component_: torchtune.training.metric_logging.DiskLogger
89
+ log_dir: ${output_dir}
90
+ output_dir: ../model_zoo/Llama3-sound-log/
91
+ log_every_n_steps: 1
92
+ log_peak_memory_stats: False