davidr99 commited on
Commit
9e5b2fb
·
verified ·
1 Parent(s): e1c4a59

End of training

Browse files
README.md CHANGED
@@ -1,7 +1,7 @@
1
  ---
2
  library_name: peft
3
  license: gemma
4
- base_model: google/paligemma2-3b-pt-224
5
  tags:
6
  - generated_from_trainer
7
  model-index:
@@ -14,7 +14,7 @@ should probably proofread and complete it, then remove this comment. -->
14
 
15
  # finetuned_paligemma2_blackjack
16
 
17
- This model is a fine-tuned version of [google/paligemma2-3b-pt-224](https://huggingface.co/google/paligemma2-3b-pt-224) on an unknown dataset.
18
 
19
  ## Model description
20
 
@@ -37,9 +37,9 @@ The following hyperparameters were used during training:
37
  - train_batch_size: 3
38
  - eval_batch_size: 8
39
  - seed: 42
40
- - gradient_accumulation_steps: 5
41
- - total_train_batch_size: 15
42
- - optimizer: Use paged_adamw_8bit with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
43
  - lr_scheduler_type: linear
44
  - lr_scheduler_warmup_steps: 2
45
  - num_epochs: 150
@@ -52,6 +52,6 @@ The following hyperparameters were used during training:
52
 
53
  - PEFT 0.14.0
54
  - Transformers 4.47.1
55
- - Pytorch 2.2.2+cu121
56
  - Datasets 3.2.0
57
  - Tokenizers 0.21.0
 
1
  ---
2
  library_name: peft
3
  license: gemma
4
+ base_model: google/paligemma2-10b-pt-224
5
  tags:
6
  - generated_from_trainer
7
  model-index:
 
14
 
15
  # finetuned_paligemma2_blackjack
16
 
17
+ This model is a fine-tuned version of [google/paligemma2-10b-pt-224](https://huggingface.co/google/paligemma2-10b-pt-224) on an unknown dataset.
18
 
19
  ## Model description
20
 
 
37
  - train_batch_size: 3
38
  - eval_batch_size: 8
39
  - seed: 42
40
+ - gradient_accumulation_steps: 4
41
+ - total_train_batch_size: 12
42
+ - optimizer: Use adamw_hf with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
43
  - lr_scheduler_type: linear
44
  - lr_scheduler_warmup_steps: 2
45
  - num_epochs: 150
 
52
 
53
  - PEFT 0.14.0
54
  - Transformers 4.47.1
55
+ - Pytorch 2.5.1+cu121
56
  - Datasets 3.2.0
57
  - Tokenizers 0.21.0
adapter_config.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "alpha_pattern": {},
3
  "auto_mapping": null,
4
- "base_model_name_or_path": "google/paligemma2-3b-pt-224",
5
  "bias": "none",
6
  "eva_config": null,
7
  "exclude_modules": null,
@@ -23,16 +23,16 @@
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
26
- "q_proj",
27
- "o_proj",
28
  "v_proj",
29
- "k_proj",
30
  "down_proj",
31
- "lora_megnitude_vector",
 
32
  "up_proj",
33
- "gate_proj"
 
34
  ],
35
  "task_type": "CAUSAL_LM",
36
- "use_dora": true,
37
  "use_rslora": false
38
  }
 
1
  {
2
  "alpha_pattern": {},
3
  "auto_mapping": null,
4
+ "base_model_name_or_path": "google/paligemma2-10b-pt-224",
5
  "bias": "none",
6
  "eva_config": null,
7
  "exclude_modules": null,
 
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
26
+ "lora_megnitude_vector",
 
27
  "v_proj",
 
28
  "down_proj",
29
+ "o_proj",
30
+ "gate_proj",
31
  "up_proj",
32
+ "k_proj",
33
+ "q_proj"
34
  ],
35
  "task_type": "CAUSAL_LM",
36
+ "use_dora": false,
37
  "use_rslora": false
38
  }
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dc32b2d802f1350daa58160c548a8cf696af2f01b13cffb743c329948d3f4975
3
- size 50821720
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e665b6da20addd901b82421a662d77ee436804decb7d88fb76434f24893f021d
3
+ size 114121264
runs/Dec26_23-02-43_6d5fcf09ec9f/events.out.tfevents.1735254163.6d5fcf09ec9f.7637.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e428b0c76b4033d924e390c33d322984fd37a38017d29f1e115b63d8fbf1223d
3
+ size 5836
runs/Dec26_23-07-02_6d5fcf09ec9f/events.out.tfevents.1735254423.6d5fcf09ec9f.12348.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4450b509ce9e1046dc82f6066f2802f6f2d4a7c8628b37f16de6d186680368ca
3
+ size 4184
runs/Dec26_23-08-18_6d5fcf09ec9f/events.out.tfevents.1735254498.6d5fcf09ec9f.12830.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f70a4a50e1976a173bbd2217b2a7d124ffcf3b5bbfc45e48a00288d5b3ae8cbc
3
+ size 22485
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:65fb7c9c2bbe97824e43c756000a70c85e558c4d730775968e97a7ddf3ae29c5
3
  size 5368
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2786f2b12eac92c733de0303062835d15883e56fc81d96a762b8846ebc0544f
3
  size 5368