ZeroUniqueness
commited on
Commit
β’
3ca1cd1
1
Parent(s):
39da2b1
donezos
Browse files- README.md +13 -0
- adapter_model.bin +1 -1
- checkpoint-77500/adapter_model.bin +0 -3
- checkpoint-77500/adapter_model/adapter_model.bin +0 -3
- checkpoint-78000/adapter_model.bin +0 -3
- checkpoint-78000/adapter_model/adapter_model.bin +0 -3
- {checkpoint-76500 β checkpoint-78500}/README.md +0 -0
- {checkpoint-76500 β checkpoint-78500}/adapter_config.json +0 -0
- {checkpoint-76500 β checkpoint-78500}/adapter_model.bin +1 -1
- {checkpoint-76500 β checkpoint-78500}/adapter_model/README.md +0 -0
- {checkpoint-76500 β checkpoint-78500}/adapter_model/adapter_config.json +0 -0
- {checkpoint-76500 β checkpoint-78500}/adapter_model/adapter_model.bin +1 -1
- {checkpoint-77000 β checkpoint-78500}/optimizer.pt +1 -1
- {checkpoint-77000 β checkpoint-78500}/rng_state.pth +1 -1
- {checkpoint-77500 β checkpoint-78500}/scheduler.pt +1 -1
- {checkpoint-78000 β checkpoint-78500}/trainer_state.json +19 -5
- {checkpoint-76500 β checkpoint-78500}/training_args.bin +0 -0
- {checkpoint-77000 β checkpoint-79000}/README.md +0 -0
- {checkpoint-77000 β checkpoint-79000}/adapter_config.json +0 -0
- {checkpoint-77000 β checkpoint-79000}/adapter_model.bin +1 -1
- {checkpoint-77000 β checkpoint-79000}/adapter_model/README.md +0 -0
- {checkpoint-77000 β checkpoint-79000}/adapter_model/adapter_config.json +0 -0
- {checkpoint-77000 β checkpoint-79000}/adapter_model/adapter_model.bin +1 -1
- {checkpoint-77500 β checkpoint-79000}/optimizer.pt +1 -1
- {checkpoint-77500 β checkpoint-79000}/rng_state.pth +1 -1
- {checkpoint-77000 β checkpoint-79000}/scheduler.pt +1 -1
- {checkpoint-77500 β checkpoint-79000}/trainer_state.json +47 -5
- {checkpoint-77000 β checkpoint-79000}/training_args.bin +0 -0
- {checkpoint-77500 β checkpoint-79500}/README.md +0 -0
- {checkpoint-77500 β checkpoint-79500}/adapter_config.json +0 -0
- checkpoint-79500/adapter_model.bin +3 -0
- {checkpoint-77500 β checkpoint-79500}/adapter_model/README.md +0 -0
- {checkpoint-77500 β checkpoint-79500}/adapter_model/adapter_config.json +0 -0
- checkpoint-79500/adapter_model/adapter_model.bin +3 -0
- {checkpoint-76500 β checkpoint-79500}/optimizer.pt +1 -1
- {checkpoint-78000 β checkpoint-79500}/rng_state.pth +1 -1
- {checkpoint-76500 β checkpoint-79500}/scheduler.pt +1 -1
- {checkpoint-77000 β checkpoint-79500}/trainer_state.json +75 -5
- {checkpoint-77500 β checkpoint-79500}/training_args.bin +0 -0
- {checkpoint-78000 β checkpoint-80000}/README.md +0 -0
- {checkpoint-78000 β checkpoint-80000}/adapter_config.json +0 -0
- checkpoint-80000/adapter_model.bin +3 -0
- {checkpoint-78000 β checkpoint-80000}/adapter_model/README.md +0 -0
- {checkpoint-78000 β checkpoint-80000}/adapter_model/adapter_config.json +0 -0
- checkpoint-80000/adapter_model/adapter_model.bin +3 -0
- {checkpoint-78000 β checkpoint-80000}/optimizer.pt +1 -1
- {checkpoint-76500 β checkpoint-80000}/rng_state.pth +1 -1
- {checkpoint-78000 β checkpoint-80000}/scheduler.pt +1 -1
- {checkpoint-76500 β checkpoint-80000}/trainer_state.json +103 -5
- {checkpoint-78000 β checkpoint-80000}/training_args.bin +0 -0
README.md
CHANGED
@@ -4,6 +4,18 @@ library_name: peft
|
|
4 |
## Training procedure
|
5 |
|
6 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
The following `bitsandbytes` quantization config was used during training:
|
8 |
- load_in_8bit: False
|
9 |
- load_in_4bit: True
|
@@ -76,5 +88,6 @@ The following `bitsandbytes` quantization config was used during training:
|
|
76 |
- PEFT 0.5.0.dev0
|
77 |
- PEFT 0.5.0.dev0
|
78 |
- PEFT 0.5.0.dev0
|
|
|
79 |
|
80 |
- PEFT 0.5.0.dev0
|
|
|
4 |
## Training procedure
|
5 |
|
6 |
|
7 |
+
The following `bitsandbytes` quantization config was used during training:
|
8 |
+
- quant_method: bitsandbytes
|
9 |
+
- load_in_8bit: False
|
10 |
+
- load_in_4bit: True
|
11 |
+
- llm_int8_threshold: 6.0
|
12 |
+
- llm_int8_skip_modules: None
|
13 |
+
- llm_int8_enable_fp32_cpu_offload: False
|
14 |
+
- llm_int8_has_fp16_weight: False
|
15 |
+
- bnb_4bit_quant_type: nf4
|
16 |
+
- bnb_4bit_use_double_quant: True
|
17 |
+
- bnb_4bit_compute_dtype: bfloat16
|
18 |
+
|
19 |
The following `bitsandbytes` quantization config was used during training:
|
20 |
- load_in_8bit: False
|
21 |
- load_in_4bit: True
|
|
|
88 |
- PEFT 0.5.0.dev0
|
89 |
- PEFT 0.5.0.dev0
|
90 |
- PEFT 0.5.0.dev0
|
91 |
+
- PEFT 0.5.0.dev0
|
92 |
|
93 |
- PEFT 0.5.0.dev0
|
adapter_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 500897101
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a63171e37485368f4ae8c7a1aa1cb52caf8554a5c9b41c606ea3445d147683d5
|
3 |
size 500897101
|
checkpoint-77500/adapter_model.bin
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:a884eb86c6d63307c9acb52dd6a2b1b5697accc52a54a952355462a2d1ff4bb1
|
3 |
-
size 500897101
|
|
|
|
|
|
|
|
checkpoint-77500/adapter_model/adapter_model.bin
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:a884eb86c6d63307c9acb52dd6a2b1b5697accc52a54a952355462a2d1ff4bb1
|
3 |
-
size 500897101
|
|
|
|
|
|
|
|
checkpoint-78000/adapter_model.bin
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:8a95e49326f95cbf72dea87596b1fd0bcc1abcccd06310612c0ac6bdb3314bc1
|
3 |
-
size 500897101
|
|
|
|
|
|
|
|
checkpoint-78000/adapter_model/adapter_model.bin
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:8a95e49326f95cbf72dea87596b1fd0bcc1abcccd06310612c0ac6bdb3314bc1
|
3 |
-
size 500897101
|
|
|
|
|
|
|
|
{checkpoint-76500 β checkpoint-78500}/README.md
RENAMED
File without changes
|
{checkpoint-76500 β checkpoint-78500}/adapter_config.json
RENAMED
File without changes
|
{checkpoint-76500 β checkpoint-78500}/adapter_model.bin
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 500897101
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a63171e37485368f4ae8c7a1aa1cb52caf8554a5c9b41c606ea3445d147683d5
|
3 |
size 500897101
|
{checkpoint-76500 β checkpoint-78500}/adapter_model/README.md
RENAMED
File without changes
|
{checkpoint-76500 β checkpoint-78500}/adapter_model/adapter_config.json
RENAMED
File without changes
|
{checkpoint-76500 β checkpoint-78500}/adapter_model/adapter_model.bin
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 500897101
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a63171e37485368f4ae8c7a1aa1cb52caf8554a5c9b41c606ea3445d147683d5
|
3 |
size 500897101
|
{checkpoint-77000 β checkpoint-78500}/optimizer.pt
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1001724605
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b94c97020101dfe8988e5cf3a67d34dc5cd78e5e4685fc7e9f6428c508ff53ce
|
3 |
size 1001724605
|
{checkpoint-77000 β checkpoint-78500}/rng_state.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14575
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b76070aa3a97b4296cfe30e77dcd74707dea42092548a8b07d9605050d31f3af
|
3 |
size 14575
|
{checkpoint-77500 β checkpoint-78500}/scheduler.pt
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 627
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b0b4a7a47b7400044f7b7933b36f7c700f5c879afac5320d98411ec041812fdc
|
3 |
size 627
|
{checkpoint-78000 β checkpoint-78500}/trainer_state.json
RENAMED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
-
"best_metric": 0.
|
3 |
-
"best_model_checkpoint": "./qlora-out/checkpoint-
|
4 |
-
"epoch": 2.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -4551,13 +4551,27 @@
|
|
4551 |
"eval_samples_per_second": 0.427,
|
4552 |
"eval_steps_per_second": 0.427,
|
4553 |
"step": 78000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4554 |
}
|
4555 |
],
|
4556 |
"logging_steps": 500,
|
4557 |
"max_steps": 80463,
|
4558 |
"num_train_epochs": 3,
|
4559 |
"save_steps": 500,
|
4560 |
-
"total_flos": 2.
|
4561 |
"trial_name": null,
|
4562 |
"trial_params": null
|
4563 |
}
|
|
|
1 |
{
|
2 |
+
"best_metric": 0.47572794556617737,
|
3 |
+
"best_model_checkpoint": "./qlora-out/checkpoint-78500",
|
4 |
+
"epoch": 2.926811080869468,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 78500,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
4551 |
"eval_samples_per_second": 0.427,
|
4552 |
"eval_steps_per_second": 0.427,
|
4553 |
"step": 78000
|
4554 |
+
},
|
4555 |
+
{
|
4556 |
+
"epoch": 2.93,
|
4557 |
+
"learning_rate": 2.9363864105907967e-07,
|
4558 |
+
"loss": 0.3633,
|
4559 |
+
"step": 78500
|
4560 |
+
},
|
4561 |
+
{
|
4562 |
+
"epoch": 2.93,
|
4563 |
+
"eval_loss": 0.47572794556617737,
|
4564 |
+
"eval_runtime": 1284.2805,
|
4565 |
+
"eval_samples_per_second": 0.422,
|
4566 |
+
"eval_steps_per_second": 0.422,
|
4567 |
+
"step": 78500
|
4568 |
}
|
4569 |
],
|
4570 |
"logging_steps": 500,
|
4571 |
"max_steps": 80463,
|
4572 |
"num_train_epochs": 3,
|
4573 |
"save_steps": 500,
|
4574 |
+
"total_flos": 2.205083454877704e+19,
|
4575 |
"trial_name": null,
|
4576 |
"trial_params": null
|
4577 |
}
|
{checkpoint-76500 β checkpoint-78500}/training_args.bin
RENAMED
File without changes
|
{checkpoint-77000 β checkpoint-79000}/README.md
RENAMED
File without changes
|
{checkpoint-77000 β checkpoint-79000}/adapter_config.json
RENAMED
File without changes
|
{checkpoint-77000 β checkpoint-79000}/adapter_model.bin
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 500897101
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:54cdaa3b50871ca412658af6dafb529a52b86753ff9526fee0e1b74954640aa4
|
3 |
size 500897101
|
{checkpoint-77000 β checkpoint-79000}/adapter_model/README.md
RENAMED
File without changes
|
{checkpoint-77000 β checkpoint-79000}/adapter_model/adapter_config.json
RENAMED
File without changes
|
{checkpoint-77000 β checkpoint-79000}/adapter_model/adapter_model.bin
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 500897101
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:54cdaa3b50871ca412658af6dafb529a52b86753ff9526fee0e1b74954640aa4
|
3 |
size 500897101
|
{checkpoint-77500 β checkpoint-79000}/optimizer.pt
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1001724605
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:50d0a52aeee43e33265bda6c05b10da12477313f3f2c4ef29276aa29bb0cd412
|
3 |
size 1001724605
|
{checkpoint-77500 β checkpoint-79000}/rng_state.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14575
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:eac837ea1eac58f4aaf90b3c888590c6a34139d2391df0c00f93ad613d9d2fe4
|
3 |
size 14575
|
{checkpoint-77000 β checkpoint-79000}/scheduler.pt
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 627
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:818c4b15e458686761dbc223e6f958ab0a422ba783d422c1d2eee54f27b9caa2
|
3 |
size 627
|
{checkpoint-77500 β checkpoint-79000}/trainer_state.json
RENAMED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
-
"best_metric": 0.
|
3 |
-
"best_model_checkpoint": "./qlora-out/checkpoint-
|
4 |
-
"epoch": 2.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -4537,13 +4537,55 @@
|
|
4537 |
"eval_samples_per_second": 0.413,
|
4538 |
"eval_steps_per_second": 0.413,
|
4539 |
"step": 77500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4540 |
}
|
4541 |
],
|
4542 |
"logging_steps": 500,
|
4543 |
"max_steps": 80463,
|
4544 |
"num_train_epochs": 3,
|
4545 |
"save_steps": 500,
|
4546 |
-
"total_flos": 2.
|
4547 |
"trial_name": null,
|
4548 |
"trial_params": null
|
4549 |
}
|
|
|
1 |
{
|
2 |
+
"best_metric": 0.47572794556617737,
|
3 |
+
"best_model_checkpoint": "./qlora-out/checkpoint-78500",
|
4 |
+
"epoch": 2.945453189664815,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 79000,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
4537 |
"eval_samples_per_second": 0.413,
|
4538 |
"eval_steps_per_second": 0.413,
|
4539 |
"step": 77500
|
4540 |
+
},
|
4541 |
+
{
|
4542 |
+
"epoch": 2.91,
|
4543 |
+
"learning_rate": 4.6214609844061894e-07,
|
4544 |
+
"loss": 0.3696,
|
4545 |
+
"step": 78000
|
4546 |
+
},
|
4547 |
+
{
|
4548 |
+
"epoch": 2.91,
|
4549 |
+
"eval_loss": 0.4757947325706482,
|
4550 |
+
"eval_runtime": 1268.9631,
|
4551 |
+
"eval_samples_per_second": 0.427,
|
4552 |
+
"eval_steps_per_second": 0.427,
|
4553 |
+
"step": 78000
|
4554 |
+
},
|
4555 |
+
{
|
4556 |
+
"epoch": 2.93,
|
4557 |
+
"learning_rate": 2.9363864105907967e-07,
|
4558 |
+
"loss": 0.3633,
|
4559 |
+
"step": 78500
|
4560 |
+
},
|
4561 |
+
{
|
4562 |
+
"epoch": 2.93,
|
4563 |
+
"eval_loss": 0.47572794556617737,
|
4564 |
+
"eval_runtime": 1284.2805,
|
4565 |
+
"eval_samples_per_second": 0.422,
|
4566 |
+
"eval_steps_per_second": 0.422,
|
4567 |
+
"step": 78500
|
4568 |
+
},
|
4569 |
+
{
|
4570 |
+
"epoch": 2.95,
|
4571 |
+
"learning_rate": 1.6313824917496555e-07,
|
4572 |
+
"loss": 0.3712,
|
4573 |
+
"step": 79000
|
4574 |
+
},
|
4575 |
+
{
|
4576 |
+
"epoch": 2.95,
|
4577 |
+
"eval_loss": 0.47579219937324524,
|
4578 |
+
"eval_runtime": 1333.9827,
|
4579 |
+
"eval_samples_per_second": 0.406,
|
4580 |
+
"eval_steps_per_second": 0.406,
|
4581 |
+
"step": 79000
|
4582 |
}
|
4583 |
],
|
4584 |
"logging_steps": 500,
|
4585 |
"max_steps": 80463,
|
4586 |
"num_train_epochs": 3,
|
4587 |
"save_steps": 500,
|
4588 |
+
"total_flos": 2.219205598424703e+19,
|
4589 |
"trial_name": null,
|
4590 |
"trial_params": null
|
4591 |
}
|
{checkpoint-77000 β checkpoint-79000}/training_args.bin
RENAMED
File without changes
|
{checkpoint-77500 β checkpoint-79500}/README.md
RENAMED
File without changes
|
{checkpoint-77500 β checkpoint-79500}/adapter_config.json
RENAMED
File without changes
|
checkpoint-79500/adapter_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0aec4531d523e5f1c6d00064a04d505d2c8cc46f2254d5f97c50a6d3f19d57e7
|
3 |
+
size 500897101
|
{checkpoint-77500 β checkpoint-79500}/adapter_model/README.md
RENAMED
File without changes
|
{checkpoint-77500 β checkpoint-79500}/adapter_model/adapter_config.json
RENAMED
File without changes
|
checkpoint-79500/adapter_model/adapter_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0aec4531d523e5f1c6d00064a04d505d2c8cc46f2254d5f97c50a6d3f19d57e7
|
3 |
+
size 500897101
|
{checkpoint-76500 β checkpoint-79500}/optimizer.pt
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1001724605
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:66d9344a9200b364b04ea1c7047cb7f771a81a931263b0a8299e74a1dbc21123
|
3 |
size 1001724605
|
{checkpoint-78000 β checkpoint-79500}/rng_state.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14575
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:66d629d451a682f1240d2ed68e0aa31eff4552d6f44511ee41e17ea77a083291
|
3 |
size 14575
|
{checkpoint-76500 β checkpoint-79500}/scheduler.pt
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 627
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:85630629004d37069a695741c2bbf4ff767e7519f39623048a26a45a1e93ca39
|
3 |
size 627
|
{checkpoint-77000 β checkpoint-79500}/trainer_state.json
RENAMED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
-
"best_metric": 0.
|
3 |
-
"best_model_checkpoint": "./qlora-out/checkpoint-
|
4 |
-
"epoch": 2.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -4523,13 +4523,83 @@
|
|
4523 |
"eval_samples_per_second": 0.419,
|
4524 |
"eval_steps_per_second": 0.419,
|
4525 |
"step": 77000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4526 |
}
|
4527 |
],
|
4528 |
"logging_steps": 500,
|
4529 |
"max_steps": 80463,
|
4530 |
"num_train_epochs": 3,
|
4531 |
"save_steps": 500,
|
4532 |
-
"total_flos": 2.
|
4533 |
"trial_name": null,
|
4534 |
"trial_params": null
|
4535 |
}
|
|
|
1 |
{
|
2 |
+
"best_metric": 0.47572794556617737,
|
3 |
+
"best_model_checkpoint": "./qlora-out/checkpoint-78500",
|
4 |
+
"epoch": 2.964095298460162,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 79500,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
4523 |
"eval_samples_per_second": 0.419,
|
4524 |
"eval_steps_per_second": 0.419,
|
4525 |
"step": 77000
|
4526 |
+
},
|
4527 |
+
{
|
4528 |
+
"epoch": 2.89,
|
4529 |
+
"learning_rate": 6.685963879659362e-07,
|
4530 |
+
"loss": 0.3675,
|
4531 |
+
"step": 77500
|
4532 |
+
},
|
4533 |
+
{
|
4534 |
+
"epoch": 2.89,
|
4535 |
+
"eval_loss": 0.4758478105068207,
|
4536 |
+
"eval_runtime": 1311.0096,
|
4537 |
+
"eval_samples_per_second": 0.413,
|
4538 |
+
"eval_steps_per_second": 0.413,
|
4539 |
+
"step": 77500
|
4540 |
+
},
|
4541 |
+
{
|
4542 |
+
"epoch": 2.91,
|
4543 |
+
"learning_rate": 4.6214609844061894e-07,
|
4544 |
+
"loss": 0.3696,
|
4545 |
+
"step": 78000
|
4546 |
+
},
|
4547 |
+
{
|
4548 |
+
"epoch": 2.91,
|
4549 |
+
"eval_loss": 0.4757947325706482,
|
4550 |
+
"eval_runtime": 1268.9631,
|
4551 |
+
"eval_samples_per_second": 0.427,
|
4552 |
+
"eval_steps_per_second": 0.427,
|
4553 |
+
"step": 78000
|
4554 |
+
},
|
4555 |
+
{
|
4556 |
+
"epoch": 2.93,
|
4557 |
+
"learning_rate": 2.9363864105907967e-07,
|
4558 |
+
"loss": 0.3633,
|
4559 |
+
"step": 78500
|
4560 |
+
},
|
4561 |
+
{
|
4562 |
+
"epoch": 2.93,
|
4563 |
+
"eval_loss": 0.47572794556617737,
|
4564 |
+
"eval_runtime": 1284.2805,
|
4565 |
+
"eval_samples_per_second": 0.422,
|
4566 |
+
"eval_steps_per_second": 0.422,
|
4567 |
+
"step": 78500
|
4568 |
+
},
|
4569 |
+
{
|
4570 |
+
"epoch": 2.95,
|
4571 |
+
"learning_rate": 1.6313824917496555e-07,
|
4572 |
+
"loss": 0.3712,
|
4573 |
+
"step": 79000
|
4574 |
+
},
|
4575 |
+
{
|
4576 |
+
"epoch": 2.95,
|
4577 |
+
"eval_loss": 0.47579219937324524,
|
4578 |
+
"eval_runtime": 1333.9827,
|
4579 |
+
"eval_samples_per_second": 0.406,
|
4580 |
+
"eval_steps_per_second": 0.406,
|
4581 |
+
"step": 79000
|
4582 |
+
},
|
4583 |
+
{
|
4584 |
+
"epoch": 2.96,
|
4585 |
+
"learning_rate": 7.069466822952065e-08,
|
4586 |
+
"loss": 0.37,
|
4587 |
+
"step": 79500
|
4588 |
+
},
|
4589 |
+
{
|
4590 |
+
"epoch": 2.96,
|
4591 |
+
"eval_loss": 0.47579482197761536,
|
4592 |
+
"eval_runtime": 1343.7136,
|
4593 |
+
"eval_samples_per_second": 0.403,
|
4594 |
+
"eval_steps_per_second": 0.403,
|
4595 |
+
"step": 79500
|
4596 |
}
|
4597 |
],
|
4598 |
"logging_steps": 500,
|
4599 |
"max_steps": 80463,
|
4600 |
"num_train_epochs": 3,
|
4601 |
"save_steps": 500,
|
4602 |
+
"total_flos": 2.2334186767367946e+19,
|
4603 |
"trial_name": null,
|
4604 |
"trial_params": null
|
4605 |
}
|
{checkpoint-77500 β checkpoint-79500}/training_args.bin
RENAMED
File without changes
|
{checkpoint-78000 β checkpoint-80000}/README.md
RENAMED
File without changes
|
{checkpoint-78000 β checkpoint-80000}/adapter_config.json
RENAMED
File without changes
|
checkpoint-80000/adapter_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bb4d05e080adb0e5195e39d104132e2826c45ae5f84e265fc57e9babe3f31e2e
|
3 |
+
size 500897101
|
{checkpoint-78000 β checkpoint-80000}/adapter_model/README.md
RENAMED
File without changes
|
{checkpoint-78000 β checkpoint-80000}/adapter_model/adapter_config.json
RENAMED
File without changes
|
checkpoint-80000/adapter_model/adapter_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bb4d05e080adb0e5195e39d104132e2826c45ae5f84e265fc57e9babe3f31e2e
|
3 |
+
size 500897101
|
{checkpoint-78000 β checkpoint-80000}/optimizer.pt
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1001724605
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d872aa8649276d07a16445e8ad1f9857b024aeb2bc7ef434f10697d68093bd70
|
3 |
size 1001724605
|
{checkpoint-76500 β checkpoint-80000}/rng_state.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14575
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:88aed6b668511a80acef03aeb5954058975243c279e1a2f0605b6635eddb4d13
|
3 |
size 14575
|
{checkpoint-78000 β checkpoint-80000}/scheduler.pt
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 627
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:07c85d472815ff7897a1300024f3cab1032a73a2e8ce83cd10c397998eec29c6
|
3 |
size 627
|
{checkpoint-76500 β checkpoint-80000}/trainer_state.json
RENAMED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
-
"best_metric": 0.
|
3 |
-
"best_model_checkpoint": "./qlora-out/checkpoint-
|
4 |
-
"epoch": 2.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -4509,13 +4509,111 @@
|
|
4509 |
"eval_samples_per_second": 0.419,
|
4510 |
"eval_steps_per_second": 0.419,
|
4511 |
"step": 76500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4512 |
}
|
4513 |
],
|
4514 |
"logging_steps": 500,
|
4515 |
"max_steps": 80463,
|
4516 |
"num_train_epochs": 3,
|
4517 |
"save_steps": 500,
|
4518 |
-
"total_flos": 2.
|
4519 |
"trial_name": null,
|
4520 |
"trial_params": null
|
4521 |
}
|
|
|
1 |
{
|
2 |
+
"best_metric": 0.47572794556617737,
|
3 |
+
"best_model_checkpoint": "./qlora-out/checkpoint-78500",
|
4 |
+
"epoch": 2.9827374072555086,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 80000,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
4509 |
"eval_samples_per_second": 0.419,
|
4510 |
"eval_steps_per_second": 0.419,
|
4511 |
"step": 76500
|
4512 |
+
},
|
4513 |
+
{
|
4514 |
+
"epoch": 2.87,
|
4515 |
+
"learning_rate": 9.129108128541176e-07,
|
4516 |
+
"loss": 0.3658,
|
4517 |
+
"step": 77000
|
4518 |
+
},
|
4519 |
+
{
|
4520 |
+
"epoch": 2.87,
|
4521 |
+
"eval_loss": 0.47582224011421204,
|
4522 |
+
"eval_runtime": 1293.6591,
|
4523 |
+
"eval_samples_per_second": 0.419,
|
4524 |
+
"eval_steps_per_second": 0.419,
|
4525 |
+
"step": 77000
|
4526 |
+
},
|
4527 |
+
{
|
4528 |
+
"epoch": 2.89,
|
4529 |
+
"learning_rate": 6.685963879659362e-07,
|
4530 |
+
"loss": 0.3675,
|
4531 |
+
"step": 77500
|
4532 |
+
},
|
4533 |
+
{
|
4534 |
+
"epoch": 2.89,
|
4535 |
+
"eval_loss": 0.4758478105068207,
|
4536 |
+
"eval_runtime": 1311.0096,
|
4537 |
+
"eval_samples_per_second": 0.413,
|
4538 |
+
"eval_steps_per_second": 0.413,
|
4539 |
+
"step": 77500
|
4540 |
+
},
|
4541 |
+
{
|
4542 |
+
"epoch": 2.91,
|
4543 |
+
"learning_rate": 4.6214609844061894e-07,
|
4544 |
+
"loss": 0.3696,
|
4545 |
+
"step": 78000
|
4546 |
+
},
|
4547 |
+
{
|
4548 |
+
"epoch": 2.91,
|
4549 |
+
"eval_loss": 0.4757947325706482,
|
4550 |
+
"eval_runtime": 1268.9631,
|
4551 |
+
"eval_samples_per_second": 0.427,
|
4552 |
+
"eval_steps_per_second": 0.427,
|
4553 |
+
"step": 78000
|
4554 |
+
},
|
4555 |
+
{
|
4556 |
+
"epoch": 2.93,
|
4557 |
+
"learning_rate": 2.9363864105907967e-07,
|
4558 |
+
"loss": 0.3633,
|
4559 |
+
"step": 78500
|
4560 |
+
},
|
4561 |
+
{
|
4562 |
+
"epoch": 2.93,
|
4563 |
+
"eval_loss": 0.47572794556617737,
|
4564 |
+
"eval_runtime": 1284.2805,
|
4565 |
+
"eval_samples_per_second": 0.422,
|
4566 |
+
"eval_steps_per_second": 0.422,
|
4567 |
+
"step": 78500
|
4568 |
+
},
|
4569 |
+
{
|
4570 |
+
"epoch": 2.95,
|
4571 |
+
"learning_rate": 1.6313824917496555e-07,
|
4572 |
+
"loss": 0.3712,
|
4573 |
+
"step": 79000
|
4574 |
+
},
|
4575 |
+
{
|
4576 |
+
"epoch": 2.95,
|
4577 |
+
"eval_loss": 0.47579219937324524,
|
4578 |
+
"eval_runtime": 1333.9827,
|
4579 |
+
"eval_samples_per_second": 0.406,
|
4580 |
+
"eval_steps_per_second": 0.406,
|
4581 |
+
"step": 79000
|
4582 |
+
},
|
4583 |
+
{
|
4584 |
+
"epoch": 2.96,
|
4585 |
+
"learning_rate": 7.069466822952065e-08,
|
4586 |
+
"loss": 0.37,
|
4587 |
+
"step": 79500
|
4588 |
+
},
|
4589 |
+
{
|
4590 |
+
"epoch": 2.96,
|
4591 |
+
"eval_loss": 0.47579482197761536,
|
4592 |
+
"eval_runtime": 1343.7136,
|
4593 |
+
"eval_samples_per_second": 0.403,
|
4594 |
+
"eval_steps_per_second": 0.403,
|
4595 |
+
"step": 79500
|
4596 |
+
},
|
4597 |
+
{
|
4598 |
+
"epoch": 2.98,
|
4599 |
+
"learning_rate": 1.6343136789165324e-08,
|
4600 |
+
"loss": 0.3647,
|
4601 |
+
"step": 80000
|
4602 |
+
},
|
4603 |
+
{
|
4604 |
+
"epoch": 2.98,
|
4605 |
+
"eval_loss": 0.475759357213974,
|
4606 |
+
"eval_runtime": 1306.8248,
|
4607 |
+
"eval_samples_per_second": 0.415,
|
4608 |
+
"eval_steps_per_second": 0.415,
|
4609 |
+
"step": 80000
|
4610 |
}
|
4611 |
],
|
4612 |
"logging_steps": 500,
|
4613 |
"max_steps": 80463,
|
4614 |
"num_train_epochs": 3,
|
4615 |
"save_steps": 500,
|
4616 |
+
"total_flos": 2.2475568675952804e+19,
|
4617 |
"trial_name": null,
|
4618 |
"trial_params": null
|
4619 |
}
|
{checkpoint-78000 β checkpoint-80000}/training_args.bin
RENAMED
File without changes
|