File size: 2,075 Bytes
c4dcff9
 
 
 
 
 
 
224c45d
 
c4dcff9
 
 
 
224c45d
c4dcff9
 
224c45d
c4dcff9
224c45d
c4dcff9
 
224c45d
c4dcff9
 
224c45d
c4dcff9
224c45d
 
 
c4dcff9
 
224c45d
c4dcff9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
224c45d
c4dcff9
 
224c45d
 
c4dcff9
 
224c45d
c4dcff9
 
224c45d
 
 
c4dcff9
224c45d
c4dcff9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
224c45d
 
 
c4dcff9
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
model:
  _component_: models.lora_mmllama3_8b
  lora_attn_modules:
  - q_proj
  - v_proj
  apply_lora_to_mlp: false
  apply_lora_to_output: false
  lora_rank: 32
  lora_alpha: 64
  perception_tokens: 2
  use_clip: false
tokenizer:
  _component_: models.a2a_tokenizer
  path: models/tokenizer.model
checkpointer:
  _component_: torchtune.utils.FullModelMetaCheckpointer
  checkpoint_dir: 
  checkpoint_files:
  - 
  adapter_checkpoint: null
  recipe_checkpoint: null
  output_dir: output_checkpoints/experiment_1
  model_type: LLAMA3
resume_from_checkpoint: false
interim_checkpoint_steps: 15000
interim_gen_steps: null
max_new_tokens: 88
temperature: 0.7
top_k: 232
dataset:
  _component_: ds.EvenBatcher
  buffer_size: 73
  dataset:
    _component_: ds.RoundRobinDataset
    datasets:
    - _component_: ds.OmegaVideoCaptionDataset
      length: 500000
    - _component_: ds.LlavaInstructDataset
      dataset_path: ds/coco_llava_instruct/output.parquet
      train_on_input: false
    - _component_: ds.LlavaInstructDataset
      dataset_path: ds/vision_flan/output.parquet
      train_on_input: false
    - _component_: ds.CaptionInstructDataset
      dataset_path: ds/sam_llava/output.parquet
      train_on_input: false
seed: null
shuffle: true
batch_size: 6
optimizer:
  _component_: torch.optim.AdamW
  weight_decay: 0.99
  lr: 20.0e-05
lr_scheduler:
  _component_: torchtune.modules.get_cosine_schedule_with_warmup
  num_warmup_steps: 4
loss:
  _component_: torch.nn.CrossEntropyLoss
epochs: 60
max_steps_per_epoch: null
gradient_accumulation_steps: 260
compile: false
output_dir: /workspace/hebbanvogola/lora_finetune_output
metric_logger:
  _component_: torchtune.utils.metric_logging.DiskLogger
  log_dir: ${output_dir}
log_every_n_steps: null
device: cuda
dtype: bf16
enable_activation_checkpointing: false
profiler:
  _component_: torchtune.utils.profiler
  enabled: false
inference:
  prompt_template: 'Video:

    {video}

    Caption the previous video.'
  max_new_tokens: 231
  temperature: 0.8
  top_k: 231
  quantizer: null
gradient-accumulation-steps: 32