ZeroUniqueness
commited on
Commit
β’
1823816
1
Parent(s):
f967b08
almost donezies
Browse files- adapter_model.bin +1 -1
- checkpoint-70000/adapter_model.bin +0 -3
- checkpoint-70000/adapter_model/adapter_model.bin +0 -3
- checkpoint-70500/adapter_model.bin +0 -3
- checkpoint-70500/adapter_model/adapter_model.bin +0 -3
- {checkpoint-69000 β checkpoint-76500}/README.md +0 -0
- {checkpoint-69000 β checkpoint-76500}/adapter_config.json +0 -0
- {checkpoint-69000 β checkpoint-76500}/adapter_model.bin +1 -1
- {checkpoint-69000 β checkpoint-76500}/adapter_model/README.md +0 -0
- {checkpoint-69000 β checkpoint-76500}/adapter_model/adapter_config.json +0 -0
- {checkpoint-69000 β checkpoint-76500}/adapter_model/adapter_model.bin +1 -1
- {checkpoint-69500 β checkpoint-76500}/optimizer.pt +1 -1
- {checkpoint-69500 β checkpoint-76500}/rng_state.pth +1 -1
- {checkpoint-70000 β checkpoint-76500}/scheduler.pt +1 -1
- {checkpoint-70500 β checkpoint-76500}/trainer_state.json +173 -5
- {checkpoint-69000 β checkpoint-76500}/training_args.bin +0 -0
- {checkpoint-69500 β checkpoint-77000}/README.md +0 -0
- {checkpoint-69500 β checkpoint-77000}/adapter_config.json +0 -0
- {checkpoint-69500 β checkpoint-77000}/adapter_model.bin +1 -1
- {checkpoint-69500 β checkpoint-77000}/adapter_model/README.md +0 -0
- {checkpoint-69500 β checkpoint-77000}/adapter_model/adapter_config.json +0 -0
- {checkpoint-69500 β checkpoint-77000}/adapter_model/adapter_model.bin +1 -1
- {checkpoint-70000 β checkpoint-77000}/optimizer.pt +1 -1
- {checkpoint-70000 β checkpoint-77000}/rng_state.pth +1 -1
- {checkpoint-69500 β checkpoint-77000}/scheduler.pt +1 -1
- {checkpoint-70000 β checkpoint-77000}/trainer_state.json +201 -5
- {checkpoint-69500 β checkpoint-77000}/training_args.bin +0 -0
- {checkpoint-70000 β checkpoint-77500}/README.md +0 -0
- {checkpoint-70000 β checkpoint-77500}/adapter_config.json +0 -0
- checkpoint-77500/adapter_model.bin +3 -0
- {checkpoint-70000 β checkpoint-77500}/adapter_model/README.md +0 -0
- {checkpoint-70000 β checkpoint-77500}/adapter_model/adapter_config.json +0 -0
- checkpoint-77500/adapter_model/adapter_model.bin +3 -0
- {checkpoint-69000 β checkpoint-77500}/optimizer.pt +1 -1
- {checkpoint-70500 β checkpoint-77500}/rng_state.pth +1 -1
- {checkpoint-69000 β checkpoint-77500}/scheduler.pt +1 -1
- {checkpoint-69500 β checkpoint-77500}/trainer_state.json +229 -5
- {checkpoint-70000 β checkpoint-77500}/training_args.bin +0 -0
- {checkpoint-70500 β checkpoint-78000}/README.md +0 -0
- {checkpoint-70500 β checkpoint-78000}/adapter_config.json +0 -0
- checkpoint-78000/adapter_model.bin +3 -0
- {checkpoint-70500 β checkpoint-78000}/adapter_model/README.md +0 -0
- {checkpoint-70500 β checkpoint-78000}/adapter_model/adapter_config.json +0 -0
- checkpoint-78000/adapter_model/adapter_model.bin +3 -0
- {checkpoint-70500 β checkpoint-78000}/optimizer.pt +1 -1
- {checkpoint-69000 β checkpoint-78000}/rng_state.pth +1 -1
- {checkpoint-70500 β checkpoint-78000}/scheduler.pt +1 -1
- {checkpoint-69000 β checkpoint-78000}/trainer_state.json +257 -5
- {checkpoint-70500 β checkpoint-78000}/training_args.bin +0 -0
adapter_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 500897101
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8a95e49326f95cbf72dea87596b1fd0bcc1abcccd06310612c0ac6bdb3314bc1
|
3 |
size 500897101
|
checkpoint-70000/adapter_model.bin
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:35b27172603bfaa42af020910d0f3a0724656396738e74f39eebef1c4c53cd6c
|
3 |
-
size 500897101
|
|
|
|
|
|
|
|
checkpoint-70000/adapter_model/adapter_model.bin
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:35b27172603bfaa42af020910d0f3a0724656396738e74f39eebef1c4c53cd6c
|
3 |
-
size 500897101
|
|
|
|
|
|
|
|
checkpoint-70500/adapter_model.bin
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:8a26259b6c7f10eacd37169a51779a24aa9d6a76d8fdef027422bdcbf2557c2f
|
3 |
-
size 500897101
|
|
|
|
|
|
|
|
checkpoint-70500/adapter_model/adapter_model.bin
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:8a26259b6c7f10eacd37169a51779a24aa9d6a76d8fdef027422bdcbf2557c2f
|
3 |
-
size 500897101
|
|
|
|
|
|
|
|
{checkpoint-69000 β checkpoint-76500}/README.md
RENAMED
File without changes
|
{checkpoint-69000 β checkpoint-76500}/adapter_config.json
RENAMED
File without changes
|
{checkpoint-69000 β checkpoint-76500}/adapter_model.bin
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 500897101
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3aac0f31027f89872a0b3f8a0a220bec95e100567f0e22fd44826f62a28c01a6
|
3 |
size 500897101
|
{checkpoint-69000 β checkpoint-76500}/adapter_model/README.md
RENAMED
File without changes
|
{checkpoint-69000 β checkpoint-76500}/adapter_model/adapter_config.json
RENAMED
File without changes
|
{checkpoint-69000 β checkpoint-76500}/adapter_model/adapter_model.bin
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 500897101
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3aac0f31027f89872a0b3f8a0a220bec95e100567f0e22fd44826f62a28c01a6
|
3 |
size 500897101
|
{checkpoint-69500 β checkpoint-76500}/optimizer.pt
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1001724605
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dbbb89893d47261fd01a9f3778a4172d980e5d5d899645ffb5c27307d67df6b6
|
3 |
size 1001724605
|
{checkpoint-69500 β checkpoint-76500}/rng_state.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14575
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:77cc93baddf8ab32d29af2c1c219f38a87a8af2868d723737c13df696ee1f2ad
|
3 |
size 14575
|
{checkpoint-70000 β checkpoint-76500}/scheduler.pt
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 627
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:220829e44432d3fbc0f85c119c5401ba1c2989f858b8188acbaba28a1379d42e
|
3 |
size 627
|
{checkpoint-70500 β checkpoint-76500}/trainer_state.json
RENAMED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
-
"best_metric": 0.
|
3 |
-
"best_model_checkpoint": "./qlora-out/checkpoint-
|
4 |
-
"epoch": 2.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -4341,13 +4341,181 @@
|
|
4341 |
"eval_samples_per_second": 0.42,
|
4342 |
"eval_steps_per_second": 0.42,
|
4343 |
"step": 70500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4344 |
}
|
4345 |
],
|
4346 |
"logging_steps": 500,
|
4347 |
"max_steps": 80463,
|
4348 |
"num_train_epochs": 3,
|
4349 |
"save_steps": 500,
|
4350 |
-
"total_flos":
|
4351 |
"trial_name": null,
|
4352 |
"trial_params": null
|
4353 |
}
|
|
|
1 |
{
|
2 |
+
"best_metric": 0.4760077893733978,
|
3 |
+
"best_model_checkpoint": "./qlora-out/checkpoint-76500",
|
4 |
+
"epoch": 2.8522426456880803,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 76500,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
4341 |
"eval_samples_per_second": 0.42,
|
4342 |
"eval_steps_per_second": 0.42,
|
4343 |
"step": 70500
|
4344 |
+
},
|
4345 |
+
{
|
4346 |
+
"epoch": 2.65,
|
4347 |
+
"learning_rate": 6.749873173827314e-06,
|
4348 |
+
"loss": 0.3746,
|
4349 |
+
"step": 71000
|
4350 |
+
},
|
4351 |
+
{
|
4352 |
+
"epoch": 2.65,
|
4353 |
+
"eval_loss": 0.47773027420043945,
|
4354 |
+
"eval_runtime": 1293.7698,
|
4355 |
+
"eval_samples_per_second": 0.419,
|
4356 |
+
"eval_steps_per_second": 0.419,
|
4357 |
+
"step": 71000
|
4358 |
+
},
|
4359 |
+
{
|
4360 |
+
"epoch": 2.67,
|
4361 |
+
"learning_rate": 6.0625347721849805e-06,
|
4362 |
+
"loss": 0.365,
|
4363 |
+
"step": 71500
|
4364 |
+
},
|
4365 |
+
{
|
4366 |
+
"epoch": 2.67,
|
4367 |
+
"eval_loss": 0.47759953141212463,
|
4368 |
+
"eval_runtime": 1287.2533,
|
4369 |
+
"eval_samples_per_second": 0.421,
|
4370 |
+
"eval_steps_per_second": 0.421,
|
4371 |
+
"step": 71500
|
4372 |
+
},
|
4373 |
+
{
|
4374 |
+
"epoch": 2.68,
|
4375 |
+
"learning_rate": 5.411004390662034e-06,
|
4376 |
+
"loss": 0.3614,
|
4377 |
+
"step": 72000
|
4378 |
+
},
|
4379 |
+
{
|
4380 |
+
"epoch": 2.68,
|
4381 |
+
"eval_loss": 0.4774133861064911,
|
4382 |
+
"eval_runtime": 1290.2562,
|
4383 |
+
"eval_samples_per_second": 0.42,
|
4384 |
+
"eval_steps_per_second": 0.42,
|
4385 |
+
"step": 72000
|
4386 |
+
},
|
4387 |
+
{
|
4388 |
+
"epoch": 2.7,
|
4389 |
+
"learning_rate": 4.795530386109038e-06,
|
4390 |
+
"loss": 0.3672,
|
4391 |
+
"step": 72500
|
4392 |
+
},
|
4393 |
+
{
|
4394 |
+
"epoch": 2.7,
|
4395 |
+
"eval_loss": 0.4771479070186615,
|
4396 |
+
"eval_runtime": 1313.3814,
|
4397 |
+
"eval_samples_per_second": 0.413,
|
4398 |
+
"eval_steps_per_second": 0.413,
|
4399 |
+
"step": 72500
|
4400 |
+
},
|
4401 |
+
{
|
4402 |
+
"epoch": 2.72,
|
4403 |
+
"learning_rate": 4.2163473710470355e-06,
|
4404 |
+
"loss": 0.3536,
|
4405 |
+
"step": 73000
|
4406 |
+
},
|
4407 |
+
{
|
4408 |
+
"epoch": 2.72,
|
4409 |
+
"eval_loss": 0.4770236909389496,
|
4410 |
+
"eval_runtime": 1301.9499,
|
4411 |
+
"eval_samples_per_second": 0.416,
|
4412 |
+
"eval_steps_per_second": 0.416,
|
4413 |
+
"step": 73000
|
4414 |
+
},
|
4415 |
+
{
|
4416 |
+
"epoch": 2.74,
|
4417 |
+
"learning_rate": 3.67367612423567e-06,
|
4418 |
+
"loss": 0.3693,
|
4419 |
+
"step": 73500
|
4420 |
+
},
|
4421 |
+
{
|
4422 |
+
"epoch": 2.74,
|
4423 |
+
"eval_loss": 0.4766899645328522,
|
4424 |
+
"eval_runtime": 1310.5415,
|
4425 |
+
"eval_samples_per_second": 0.414,
|
4426 |
+
"eval_steps_per_second": 0.414,
|
4427 |
+
"step": 73500
|
4428 |
+
},
|
4429 |
+
{
|
4430 |
+
"epoch": 2.76,
|
4431 |
+
"learning_rate": 3.1677235065144862e-06,
|
4432 |
+
"loss": 0.358,
|
4433 |
+
"step": 74000
|
4434 |
+
},
|
4435 |
+
{
|
4436 |
+
"epoch": 2.76,
|
4437 |
+
"eval_loss": 0.47646036744117737,
|
4438 |
+
"eval_runtime": 1327.3256,
|
4439 |
+
"eval_samples_per_second": 0.408,
|
4440 |
+
"eval_steps_per_second": 0.408,
|
4441 |
+
"step": 74000
|
4442 |
+
},
|
4443 |
+
{
|
4444 |
+
"epoch": 2.78,
|
4445 |
+
"learning_rate": 2.6986823819497353e-06,
|
4446 |
+
"loss": 0.3653,
|
4447 |
+
"step": 74500
|
4448 |
+
},
|
4449 |
+
{
|
4450 |
+
"epoch": 2.78,
|
4451 |
+
"eval_loss": 0.47627386450767517,
|
4452 |
+
"eval_runtime": 1332.1149,
|
4453 |
+
"eval_samples_per_second": 0.407,
|
4454 |
+
"eval_steps_per_second": 0.407,
|
4455 |
+
"step": 74500
|
4456 |
+
},
|
4457 |
+
{
|
4458 |
+
"epoch": 2.8,
|
4459 |
+
"learning_rate": 2.266731544316425e-06,
|
4460 |
+
"loss": 0.3743,
|
4461 |
+
"step": 75000
|
4462 |
+
},
|
4463 |
+
{
|
4464 |
+
"epoch": 2.8,
|
4465 |
+
"eval_loss": 0.47608959674835205,
|
4466 |
+
"eval_runtime": 1305.4101,
|
4467 |
+
"eval_samples_per_second": 0.415,
|
4468 |
+
"eval_steps_per_second": 0.415,
|
4469 |
+
"step": 75000
|
4470 |
+
},
|
4471 |
+
{
|
4472 |
+
"epoch": 2.81,
|
4473 |
+
"learning_rate": 1.872035648944026e-06,
|
4474 |
+
"loss": 0.3659,
|
4475 |
+
"step": 75500
|
4476 |
+
},
|
4477 |
+
{
|
4478 |
+
"epoch": 2.81,
|
4479 |
+
"eval_loss": 0.476179838180542,
|
4480 |
+
"eval_runtime": 1301.8331,
|
4481 |
+
"eval_samples_per_second": 0.416,
|
4482 |
+
"eval_steps_per_second": 0.416,
|
4483 |
+
"step": 75500
|
4484 |
+
},
|
4485 |
+
{
|
4486 |
+
"epoch": 2.83,
|
4487 |
+
"learning_rate": 1.5147451499514353e-06,
|
4488 |
+
"loss": 0.3678,
|
4489 |
+
"step": 76000
|
4490 |
+
},
|
4491 |
+
{
|
4492 |
+
"epoch": 2.83,
|
4493 |
+
"eval_loss": 0.4760454595088959,
|
4494 |
+
"eval_runtime": 1297.73,
|
4495 |
+
"eval_samples_per_second": 0.418,
|
4496 |
+
"eval_steps_per_second": 0.418,
|
4497 |
+
"step": 76000
|
4498 |
+
},
|
4499 |
+
{
|
4500 |
+
"epoch": 2.85,
|
4501 |
+
"learning_rate": 1.1949962428953965e-06,
|
4502 |
+
"loss": 0.3672,
|
4503 |
+
"step": 76500
|
4504 |
+
},
|
4505 |
+
{
|
4506 |
+
"epoch": 2.85,
|
4507 |
+
"eval_loss": 0.4760077893733978,
|
4508 |
+
"eval_runtime": 1293.9854,
|
4509 |
+
"eval_samples_per_second": 0.419,
|
4510 |
+
"eval_steps_per_second": 0.419,
|
4511 |
+
"step": 76500
|
4512 |
}
|
4513 |
],
|
4514 |
"logging_steps": 500,
|
4515 |
"max_steps": 80463,
|
4516 |
"num_train_epochs": 3,
|
4517 |
"save_steps": 500,
|
4518 |
+
"total_flos": 2.14853629526682e+19,
|
4519 |
"trial_name": null,
|
4520 |
"trial_params": null
|
4521 |
}
|
{checkpoint-69000 β checkpoint-76500}/training_args.bin
RENAMED
File without changes
|
{checkpoint-69500 β checkpoint-77000}/README.md
RENAMED
File without changes
|
{checkpoint-69500 β checkpoint-77000}/adapter_config.json
RENAMED
File without changes
|
{checkpoint-69500 β checkpoint-77000}/adapter_model.bin
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 500897101
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1427c39ed89366176a26d42309badaecc4eaac4173d3df01371a38612540cbce
|
3 |
size 500897101
|
{checkpoint-69500 β checkpoint-77000}/adapter_model/README.md
RENAMED
File without changes
|
{checkpoint-69500 β checkpoint-77000}/adapter_model/adapter_config.json
RENAMED
File without changes
|
{checkpoint-69500 β checkpoint-77000}/adapter_model/adapter_model.bin
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 500897101
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1427c39ed89366176a26d42309badaecc4eaac4173d3df01371a38612540cbce
|
3 |
size 500897101
|
{checkpoint-70000 β checkpoint-77000}/optimizer.pt
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1001724605
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b09958ee930a5c1cdb447dc5ca98b44b0a8ac3e23351c47128a6daf915aa3809
|
3 |
size 1001724605
|
{checkpoint-70000 β checkpoint-77000}/rng_state.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14575
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f162395a6f7d2e1af70b53e882440048027967f43d5301d750609c6c591e4ca3
|
3 |
size 14575
|
{checkpoint-69500 β checkpoint-77000}/scheduler.pt
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 627
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:35a18202efac039c56b779ce26337552adb710311faff67d76d05cf3142d22af
|
3 |
size 627
|
{checkpoint-70000 β checkpoint-77000}/trainer_state.json
RENAMED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
-
"best_metric": 0.
|
3 |
-
"best_model_checkpoint": "./qlora-out/checkpoint-
|
4 |
-
"epoch": 2.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -4327,13 +4327,209 @@
|
|
4327 |
"eval_samples_per_second": 0.415,
|
4328 |
"eval_steps_per_second": 0.415,
|
4329 |
"step": 70000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4330 |
}
|
4331 |
],
|
4332 |
"logging_steps": 500,
|
4333 |
"max_steps": 80463,
|
4334 |
"num_train_epochs": 3,
|
4335 |
"save_steps": 500,
|
4336 |
-
"total_flos":
|
4337 |
"trial_name": null,
|
4338 |
"trial_params": null
|
4339 |
}
|
|
|
1 |
{
|
2 |
+
"best_metric": 0.47582224011421204,
|
3 |
+
"best_model_checkpoint": "./qlora-out/checkpoint-77000",
|
4 |
+
"epoch": 2.870884754483427,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 77000,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
4327 |
"eval_samples_per_second": 0.415,
|
4328 |
"eval_steps_per_second": 0.415,
|
4329 |
"step": 70000
|
4330 |
+
},
|
4331 |
+
{
|
4332 |
+
"epoch": 2.63,
|
4333 |
+
"learning_rate": 7.472757589080226e-06,
|
4334 |
+
"loss": 0.3614,
|
4335 |
+
"step": 70500
|
4336 |
+
},
|
4337 |
+
{
|
4338 |
+
"epoch": 2.63,
|
4339 |
+
"eval_loss": 0.4780386686325073,
|
4340 |
+
"eval_runtime": 1290.4017,
|
4341 |
+
"eval_samples_per_second": 0.42,
|
4342 |
+
"eval_steps_per_second": 0.42,
|
4343 |
+
"step": 70500
|
4344 |
+
},
|
4345 |
+
{
|
4346 |
+
"epoch": 2.65,
|
4347 |
+
"learning_rate": 6.749873173827314e-06,
|
4348 |
+
"loss": 0.3746,
|
4349 |
+
"step": 71000
|
4350 |
+
},
|
4351 |
+
{
|
4352 |
+
"epoch": 2.65,
|
4353 |
+
"eval_loss": 0.47773027420043945,
|
4354 |
+
"eval_runtime": 1293.7698,
|
4355 |
+
"eval_samples_per_second": 0.419,
|
4356 |
+
"eval_steps_per_second": 0.419,
|
4357 |
+
"step": 71000
|
4358 |
+
},
|
4359 |
+
{
|
4360 |
+
"epoch": 2.67,
|
4361 |
+
"learning_rate": 6.0625347721849805e-06,
|
4362 |
+
"loss": 0.365,
|
4363 |
+
"step": 71500
|
4364 |
+
},
|
4365 |
+
{
|
4366 |
+
"epoch": 2.67,
|
4367 |
+
"eval_loss": 0.47759953141212463,
|
4368 |
+
"eval_runtime": 1287.2533,
|
4369 |
+
"eval_samples_per_second": 0.421,
|
4370 |
+
"eval_steps_per_second": 0.421,
|
4371 |
+
"step": 71500
|
4372 |
+
},
|
4373 |
+
{
|
4374 |
+
"epoch": 2.68,
|
4375 |
+
"learning_rate": 5.411004390662034e-06,
|
4376 |
+
"loss": 0.3614,
|
4377 |
+
"step": 72000
|
4378 |
+
},
|
4379 |
+
{
|
4380 |
+
"epoch": 2.68,
|
4381 |
+
"eval_loss": 0.4774133861064911,
|
4382 |
+
"eval_runtime": 1290.2562,
|
4383 |
+
"eval_samples_per_second": 0.42,
|
4384 |
+
"eval_steps_per_second": 0.42,
|
4385 |
+
"step": 72000
|
4386 |
+
},
|
4387 |
+
{
|
4388 |
+
"epoch": 2.7,
|
4389 |
+
"learning_rate": 4.795530386109038e-06,
|
4390 |
+
"loss": 0.3672,
|
4391 |
+
"step": 72500
|
4392 |
+
},
|
4393 |
+
{
|
4394 |
+
"epoch": 2.7,
|
4395 |
+
"eval_loss": 0.4771479070186615,
|
4396 |
+
"eval_runtime": 1313.3814,
|
4397 |
+
"eval_samples_per_second": 0.413,
|
4398 |
+
"eval_steps_per_second": 0.413,
|
4399 |
+
"step": 72500
|
4400 |
+
},
|
4401 |
+
{
|
4402 |
+
"epoch": 2.72,
|
4403 |
+
"learning_rate": 4.2163473710470355e-06,
|
4404 |
+
"loss": 0.3536,
|
4405 |
+
"step": 73000
|
4406 |
+
},
|
4407 |
+
{
|
4408 |
+
"epoch": 2.72,
|
4409 |
+
"eval_loss": 0.4770236909389496,
|
4410 |
+
"eval_runtime": 1301.9499,
|
4411 |
+
"eval_samples_per_second": 0.416,
|
4412 |
+
"eval_steps_per_second": 0.416,
|
4413 |
+
"step": 73000
|
4414 |
+
},
|
4415 |
+
{
|
4416 |
+
"epoch": 2.74,
|
4417 |
+
"learning_rate": 3.67367612423567e-06,
|
4418 |
+
"loss": 0.3693,
|
4419 |
+
"step": 73500
|
4420 |
+
},
|
4421 |
+
{
|
4422 |
+
"epoch": 2.74,
|
4423 |
+
"eval_loss": 0.4766899645328522,
|
4424 |
+
"eval_runtime": 1310.5415,
|
4425 |
+
"eval_samples_per_second": 0.414,
|
4426 |
+
"eval_steps_per_second": 0.414,
|
4427 |
+
"step": 73500
|
4428 |
+
},
|
4429 |
+
{
|
4430 |
+
"epoch": 2.76,
|
4431 |
+
"learning_rate": 3.1677235065144862e-06,
|
4432 |
+
"loss": 0.358,
|
4433 |
+
"step": 74000
|
4434 |
+
},
|
4435 |
+
{
|
4436 |
+
"epoch": 2.76,
|
4437 |
+
"eval_loss": 0.47646036744117737,
|
4438 |
+
"eval_runtime": 1327.3256,
|
4439 |
+
"eval_samples_per_second": 0.408,
|
4440 |
+
"eval_steps_per_second": 0.408,
|
4441 |
+
"step": 74000
|
4442 |
+
},
|
4443 |
+
{
|
4444 |
+
"epoch": 2.78,
|
4445 |
+
"learning_rate": 2.6986823819497353e-06,
|
4446 |
+
"loss": 0.3653,
|
4447 |
+
"step": 74500
|
4448 |
+
},
|
4449 |
+
{
|
4450 |
+
"epoch": 2.78,
|
4451 |
+
"eval_loss": 0.47627386450767517,
|
4452 |
+
"eval_runtime": 1332.1149,
|
4453 |
+
"eval_samples_per_second": 0.407,
|
4454 |
+
"eval_steps_per_second": 0.407,
|
4455 |
+
"step": 74500
|
4456 |
+
},
|
4457 |
+
{
|
4458 |
+
"epoch": 2.8,
|
4459 |
+
"learning_rate": 2.266731544316425e-06,
|
4460 |
+
"loss": 0.3743,
|
4461 |
+
"step": 75000
|
4462 |
+
},
|
4463 |
+
{
|
4464 |
+
"epoch": 2.8,
|
4465 |
+
"eval_loss": 0.47608959674835205,
|
4466 |
+
"eval_runtime": 1305.4101,
|
4467 |
+
"eval_samples_per_second": 0.415,
|
4468 |
+
"eval_steps_per_second": 0.415,
|
4469 |
+
"step": 75000
|
4470 |
+
},
|
4471 |
+
{
|
4472 |
+
"epoch": 2.81,
|
4473 |
+
"learning_rate": 1.872035648944026e-06,
|
4474 |
+
"loss": 0.3659,
|
4475 |
+
"step": 75500
|
4476 |
+
},
|
4477 |
+
{
|
4478 |
+
"epoch": 2.81,
|
4479 |
+
"eval_loss": 0.476179838180542,
|
4480 |
+
"eval_runtime": 1301.8331,
|
4481 |
+
"eval_samples_per_second": 0.416,
|
4482 |
+
"eval_steps_per_second": 0.416,
|
4483 |
+
"step": 75500
|
4484 |
+
},
|
4485 |
+
{
|
4486 |
+
"epoch": 2.83,
|
4487 |
+
"learning_rate": 1.5147451499514353e-06,
|
4488 |
+
"loss": 0.3678,
|
4489 |
+
"step": 76000
|
4490 |
+
},
|
4491 |
+
{
|
4492 |
+
"epoch": 2.83,
|
4493 |
+
"eval_loss": 0.4760454595088959,
|
4494 |
+
"eval_runtime": 1297.73,
|
4495 |
+
"eval_samples_per_second": 0.418,
|
4496 |
+
"eval_steps_per_second": 0.418,
|
4497 |
+
"step": 76000
|
4498 |
+
},
|
4499 |
+
{
|
4500 |
+
"epoch": 2.85,
|
4501 |
+
"learning_rate": 1.1949962428953965e-06,
|
4502 |
+
"loss": 0.3672,
|
4503 |
+
"step": 76500
|
4504 |
+
},
|
4505 |
+
{
|
4506 |
+
"epoch": 2.85,
|
4507 |
+
"eval_loss": 0.4760077893733978,
|
4508 |
+
"eval_runtime": 1293.9854,
|
4509 |
+
"eval_samples_per_second": 0.419,
|
4510 |
+
"eval_steps_per_second": 0.419,
|
4511 |
+
"step": 76500
|
4512 |
+
},
|
4513 |
+
{
|
4514 |
+
"epoch": 2.87,
|
4515 |
+
"learning_rate": 9.129108128541176e-07,
|
4516 |
+
"loss": 0.3658,
|
4517 |
+
"step": 77000
|
4518 |
+
},
|
4519 |
+
{
|
4520 |
+
"epoch": 2.87,
|
4521 |
+
"eval_loss": 0.47582224011421204,
|
4522 |
+
"eval_runtime": 1293.6591,
|
4523 |
+
"eval_samples_per_second": 0.419,
|
4524 |
+
"eval_steps_per_second": 0.419,
|
4525 |
+
"step": 77000
|
4526 |
}
|
4527 |
],
|
4528 |
"logging_steps": 500,
|
4529 |
"max_steps": 80463,
|
4530 |
"num_train_epochs": 3,
|
4531 |
"save_steps": 500,
|
4532 |
+
"total_flos": 2.1626487594830807e+19,
|
4533 |
"trial_name": null,
|
4534 |
"trial_params": null
|
4535 |
}
|
{checkpoint-69500 β checkpoint-77000}/training_args.bin
RENAMED
File without changes
|
{checkpoint-70000 β checkpoint-77500}/README.md
RENAMED
File without changes
|
{checkpoint-70000 β checkpoint-77500}/adapter_config.json
RENAMED
File without changes
|
checkpoint-77500/adapter_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a884eb86c6d63307c9acb52dd6a2b1b5697accc52a54a952355462a2d1ff4bb1
|
3 |
+
size 500897101
|
{checkpoint-70000 β checkpoint-77500}/adapter_model/README.md
RENAMED
File without changes
|
{checkpoint-70000 β checkpoint-77500}/adapter_model/adapter_config.json
RENAMED
File without changes
|
checkpoint-77500/adapter_model/adapter_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a884eb86c6d63307c9acb52dd6a2b1b5697accc52a54a952355462a2d1ff4bb1
|
3 |
+
size 500897101
|
{checkpoint-69000 β checkpoint-77500}/optimizer.pt
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1001724605
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:21a191193d80b976badbd98b5f53b496006b6f2a29af8b6d8dca0b1e0b7ecbe4
|
3 |
size 1001724605
|
{checkpoint-70500 β checkpoint-77500}/rng_state.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14575
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:077d679486328cc243c50097dc890c036a89503397938038ef9689bd7097c327
|
3 |
size 14575
|
{checkpoint-69000 β checkpoint-77500}/scheduler.pt
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 627
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ea97f91717d4e226e2157501dd3a83bc130311da389f901511bd22351a008c26
|
3 |
size 627
|
{checkpoint-69500 β checkpoint-77500}/trainer_state.json
RENAMED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
-
"best_metric": 0.
|
3 |
-
"best_model_checkpoint": "./qlora-out/checkpoint-
|
4 |
-
"epoch": 2.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -4313,13 +4313,237 @@
|
|
4313 |
"eval_samples_per_second": 0.412,
|
4314 |
"eval_steps_per_second": 0.412,
|
4315 |
"step": 69500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4316 |
}
|
4317 |
],
|
4318 |
"logging_steps": 500,
|
4319 |
"max_steps": 80463,
|
4320 |
"num_train_epochs": 3,
|
4321 |
"save_steps": 500,
|
4322 |
-
"total_flos":
|
4323 |
"trial_name": null,
|
4324 |
"trial_params": null
|
4325 |
}
|
|
|
1 |
{
|
2 |
+
"best_metric": 0.47582224011421204,
|
3 |
+
"best_model_checkpoint": "./qlora-out/checkpoint-77000",
|
4 |
+
"epoch": 2.889526863278774,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 77500,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
4313 |
"eval_samples_per_second": 0.412,
|
4314 |
"eval_steps_per_second": 0.412,
|
4315 |
"step": 69500
|
4316 |
+
},
|
4317 |
+
{
|
4318 |
+
"epoch": 2.61,
|
4319 |
+
"learning_rate": 8.230912461650797e-06,
|
4320 |
+
"loss": 0.3601,
|
4321 |
+
"step": 70000
|
4322 |
+
},
|
4323 |
+
{
|
4324 |
+
"epoch": 2.61,
|
4325 |
+
"eval_loss": 0.47838443517684937,
|
4326 |
+
"eval_runtime": 1306.7325,
|
4327 |
+
"eval_samples_per_second": 0.415,
|
4328 |
+
"eval_steps_per_second": 0.415,
|
4329 |
+
"step": 70000
|
4330 |
+
},
|
4331 |
+
{
|
4332 |
+
"epoch": 2.63,
|
4333 |
+
"learning_rate": 7.472757589080226e-06,
|
4334 |
+
"loss": 0.3614,
|
4335 |
+
"step": 70500
|
4336 |
+
},
|
4337 |
+
{
|
4338 |
+
"epoch": 2.63,
|
4339 |
+
"eval_loss": 0.4780386686325073,
|
4340 |
+
"eval_runtime": 1290.4017,
|
4341 |
+
"eval_samples_per_second": 0.42,
|
4342 |
+
"eval_steps_per_second": 0.42,
|
4343 |
+
"step": 70500
|
4344 |
+
},
|
4345 |
+
{
|
4346 |
+
"epoch": 2.65,
|
4347 |
+
"learning_rate": 6.749873173827314e-06,
|
4348 |
+
"loss": 0.3746,
|
4349 |
+
"step": 71000
|
4350 |
+
},
|
4351 |
+
{
|
4352 |
+
"epoch": 2.65,
|
4353 |
+
"eval_loss": 0.47773027420043945,
|
4354 |
+
"eval_runtime": 1293.7698,
|
4355 |
+
"eval_samples_per_second": 0.419,
|
4356 |
+
"eval_steps_per_second": 0.419,
|
4357 |
+
"step": 71000
|
4358 |
+
},
|
4359 |
+
{
|
4360 |
+
"epoch": 2.67,
|
4361 |
+
"learning_rate": 6.0625347721849805e-06,
|
4362 |
+
"loss": 0.365,
|
4363 |
+
"step": 71500
|
4364 |
+
},
|
4365 |
+
{
|
4366 |
+
"epoch": 2.67,
|
4367 |
+
"eval_loss": 0.47759953141212463,
|
4368 |
+
"eval_runtime": 1287.2533,
|
4369 |
+
"eval_samples_per_second": 0.421,
|
4370 |
+
"eval_steps_per_second": 0.421,
|
4371 |
+
"step": 71500
|
4372 |
+
},
|
4373 |
+
{
|
4374 |
+
"epoch": 2.68,
|
4375 |
+
"learning_rate": 5.411004390662034e-06,
|
4376 |
+
"loss": 0.3614,
|
4377 |
+
"step": 72000
|
4378 |
+
},
|
4379 |
+
{
|
4380 |
+
"epoch": 2.68,
|
4381 |
+
"eval_loss": 0.4774133861064911,
|
4382 |
+
"eval_runtime": 1290.2562,
|
4383 |
+
"eval_samples_per_second": 0.42,
|
4384 |
+
"eval_steps_per_second": 0.42,
|
4385 |
+
"step": 72000
|
4386 |
+
},
|
4387 |
+
{
|
4388 |
+
"epoch": 2.7,
|
4389 |
+
"learning_rate": 4.795530386109038e-06,
|
4390 |
+
"loss": 0.3672,
|
4391 |
+
"step": 72500
|
4392 |
+
},
|
4393 |
+
{
|
4394 |
+
"epoch": 2.7,
|
4395 |
+
"eval_loss": 0.4771479070186615,
|
4396 |
+
"eval_runtime": 1313.3814,
|
4397 |
+
"eval_samples_per_second": 0.413,
|
4398 |
+
"eval_steps_per_second": 0.413,
|
4399 |
+
"step": 72500
|
4400 |
+
},
|
4401 |
+
{
|
4402 |
+
"epoch": 2.72,
|
4403 |
+
"learning_rate": 4.2163473710470355e-06,
|
4404 |
+
"loss": 0.3536,
|
4405 |
+
"step": 73000
|
4406 |
+
},
|
4407 |
+
{
|
4408 |
+
"epoch": 2.72,
|
4409 |
+
"eval_loss": 0.4770236909389496,
|
4410 |
+
"eval_runtime": 1301.9499,
|
4411 |
+
"eval_samples_per_second": 0.416,
|
4412 |
+
"eval_steps_per_second": 0.416,
|
4413 |
+
"step": 73000
|
4414 |
+
},
|
4415 |
+
{
|
4416 |
+
"epoch": 2.74,
|
4417 |
+
"learning_rate": 3.67367612423567e-06,
|
4418 |
+
"loss": 0.3693,
|
4419 |
+
"step": 73500
|
4420 |
+
},
|
4421 |
+
{
|
4422 |
+
"epoch": 2.74,
|
4423 |
+
"eval_loss": 0.4766899645328522,
|
4424 |
+
"eval_runtime": 1310.5415,
|
4425 |
+
"eval_samples_per_second": 0.414,
|
4426 |
+
"eval_steps_per_second": 0.414,
|
4427 |
+
"step": 73500
|
4428 |
+
},
|
4429 |
+
{
|
4430 |
+
"epoch": 2.76,
|
4431 |
+
"learning_rate": 3.1677235065144862e-06,
|
4432 |
+
"loss": 0.358,
|
4433 |
+
"step": 74000
|
4434 |
+
},
|
4435 |
+
{
|
4436 |
+
"epoch": 2.76,
|
4437 |
+
"eval_loss": 0.47646036744117737,
|
4438 |
+
"eval_runtime": 1327.3256,
|
4439 |
+
"eval_samples_per_second": 0.408,
|
4440 |
+
"eval_steps_per_second": 0.408,
|
4441 |
+
"step": 74000
|
4442 |
+
},
|
4443 |
+
{
|
4444 |
+
"epoch": 2.78,
|
4445 |
+
"learning_rate": 2.6986823819497353e-06,
|
4446 |
+
"loss": 0.3653,
|
4447 |
+
"step": 74500
|
4448 |
+
},
|
4449 |
+
{
|
4450 |
+
"epoch": 2.78,
|
4451 |
+
"eval_loss": 0.47627386450767517,
|
4452 |
+
"eval_runtime": 1332.1149,
|
4453 |
+
"eval_samples_per_second": 0.407,
|
4454 |
+
"eval_steps_per_second": 0.407,
|
4455 |
+
"step": 74500
|
4456 |
+
},
|
4457 |
+
{
|
4458 |
+
"epoch": 2.8,
|
4459 |
+
"learning_rate": 2.266731544316425e-06,
|
4460 |
+
"loss": 0.3743,
|
4461 |
+
"step": 75000
|
4462 |
+
},
|
4463 |
+
{
|
4464 |
+
"epoch": 2.8,
|
4465 |
+
"eval_loss": 0.47608959674835205,
|
4466 |
+
"eval_runtime": 1305.4101,
|
4467 |
+
"eval_samples_per_second": 0.415,
|
4468 |
+
"eval_steps_per_second": 0.415,
|
4469 |
+
"step": 75000
|
4470 |
+
},
|
4471 |
+
{
|
4472 |
+
"epoch": 2.81,
|
4473 |
+
"learning_rate": 1.872035648944026e-06,
|
4474 |
+
"loss": 0.3659,
|
4475 |
+
"step": 75500
|
4476 |
+
},
|
4477 |
+
{
|
4478 |
+
"epoch": 2.81,
|
4479 |
+
"eval_loss": 0.476179838180542,
|
4480 |
+
"eval_runtime": 1301.8331,
|
4481 |
+
"eval_samples_per_second": 0.416,
|
4482 |
+
"eval_steps_per_second": 0.416,
|
4483 |
+
"step": 75500
|
4484 |
+
},
|
4485 |
+
{
|
4486 |
+
"epoch": 2.83,
|
4487 |
+
"learning_rate": 1.5147451499514353e-06,
|
4488 |
+
"loss": 0.3678,
|
4489 |
+
"step": 76000
|
4490 |
+
},
|
4491 |
+
{
|
4492 |
+
"epoch": 2.83,
|
4493 |
+
"eval_loss": 0.4760454595088959,
|
4494 |
+
"eval_runtime": 1297.73,
|
4495 |
+
"eval_samples_per_second": 0.418,
|
4496 |
+
"eval_steps_per_second": 0.418,
|
4497 |
+
"step": 76000
|
4498 |
+
},
|
4499 |
+
{
|
4500 |
+
"epoch": 2.85,
|
4501 |
+
"learning_rate": 1.1949962428953965e-06,
|
4502 |
+
"loss": 0.3672,
|
4503 |
+
"step": 76500
|
4504 |
+
},
|
4505 |
+
{
|
4506 |
+
"epoch": 2.85,
|
4507 |
+
"eval_loss": 0.4760077893733978,
|
4508 |
+
"eval_runtime": 1293.9854,
|
4509 |
+
"eval_samples_per_second": 0.419,
|
4510 |
+
"eval_steps_per_second": 0.419,
|
4511 |
+
"step": 76500
|
4512 |
+
},
|
4513 |
+
{
|
4514 |
+
"epoch": 2.87,
|
4515 |
+
"learning_rate": 9.129108128541176e-07,
|
4516 |
+
"loss": 0.3658,
|
4517 |
+
"step": 77000
|
4518 |
+
},
|
4519 |
+
{
|
4520 |
+
"epoch": 2.87,
|
4521 |
+
"eval_loss": 0.47582224011421204,
|
4522 |
+
"eval_runtime": 1293.6591,
|
4523 |
+
"eval_samples_per_second": 0.419,
|
4524 |
+
"eval_steps_per_second": 0.419,
|
4525 |
+
"step": 77000
|
4526 |
+
},
|
4527 |
+
{
|
4528 |
+
"epoch": 2.89,
|
4529 |
+
"learning_rate": 6.685963879659362e-07,
|
4530 |
+
"loss": 0.3675,
|
4531 |
+
"step": 77500
|
4532 |
+
},
|
4533 |
+
{
|
4534 |
+
"epoch": 2.89,
|
4535 |
+
"eval_loss": 0.4758478105068207,
|
4536 |
+
"eval_runtime": 1311.0096,
|
4537 |
+
"eval_samples_per_second": 0.413,
|
4538 |
+
"eval_steps_per_second": 0.413,
|
4539 |
+
"step": 77500
|
4540 |
}
|
4541 |
],
|
4542 |
"logging_steps": 500,
|
4543 |
"max_steps": 80463,
|
4544 |
"num_train_epochs": 3,
|
4545 |
"save_steps": 500,
|
4546 |
+
"total_flos": 2.1768185355260805e+19,
|
4547 |
"trial_name": null,
|
4548 |
"trial_params": null
|
4549 |
}
|
{checkpoint-70000 β checkpoint-77500}/training_args.bin
RENAMED
File without changes
|
{checkpoint-70500 β checkpoint-78000}/README.md
RENAMED
File without changes
|
{checkpoint-70500 β checkpoint-78000}/adapter_config.json
RENAMED
File without changes
|
checkpoint-78000/adapter_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8a95e49326f95cbf72dea87596b1fd0bcc1abcccd06310612c0ac6bdb3314bc1
|
3 |
+
size 500897101
|
{checkpoint-70500 β checkpoint-78000}/adapter_model/README.md
RENAMED
File without changes
|
{checkpoint-70500 β checkpoint-78000}/adapter_model/adapter_config.json
RENAMED
File without changes
|
checkpoint-78000/adapter_model/adapter_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8a95e49326f95cbf72dea87596b1fd0bcc1abcccd06310612c0ac6bdb3314bc1
|
3 |
+
size 500897101
|
{checkpoint-70500 β checkpoint-78000}/optimizer.pt
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1001724605
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ad0ade3b6ce4741aa4976bb9b0aae8b16a0605bea6020968a870961a9dc6cf7f
|
3 |
size 1001724605
|
{checkpoint-69000 β checkpoint-78000}/rng_state.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14575
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cae952b189b908268d157bce31712cd2487d9ad50a53ef0ae319c8f965d6c13c
|
3 |
size 14575
|
{checkpoint-70500 β checkpoint-78000}/scheduler.pt
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 627
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:495a1512040b6a087956ab3f68fd532032c899c1dae932d93c4b1ba403b50d1d
|
3 |
size 627
|
{checkpoint-69000 β checkpoint-78000}/trainer_state.json
RENAMED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
-
"best_metric": 0.
|
3 |
-
"best_model_checkpoint": "./qlora-out/checkpoint-
|
4 |
-
"epoch": 2.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -4299,13 +4299,265 @@
|
|
4299 |
"eval_samples_per_second": 0.403,
|
4300 |
"eval_steps_per_second": 0.403,
|
4301 |
"step": 69000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4302 |
}
|
4303 |
],
|
4304 |
"logging_steps": 500,
|
4305 |
"max_steps": 80463,
|
4306 |
"num_train_epochs": 3,
|
4307 |
"save_steps": 500,
|
4308 |
-
"total_flos":
|
4309 |
"trial_name": null,
|
4310 |
"trial_params": null
|
4311 |
}
|
|
|
1 |
{
|
2 |
+
"best_metric": 0.4757947325706482,
|
3 |
+
"best_model_checkpoint": "./qlora-out/checkpoint-78000",
|
4 |
+
"epoch": 2.908168972074121,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 78000,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
4299 |
"eval_samples_per_second": 0.403,
|
4300 |
"eval_steps_per_second": 0.403,
|
4301 |
"step": 69000
|
4302 |
+
},
|
4303 |
+
{
|
4304 |
+
"epoch": 2.59,
|
4305 |
+
"learning_rate": 9.024048790501272e-06,
|
4306 |
+
"loss": 0.3594,
|
4307 |
+
"step": 69500
|
4308 |
+
},
|
4309 |
+
{
|
4310 |
+
"epoch": 2.59,
|
4311 |
+
"eval_loss": 0.47866225242614746,
|
4312 |
+
"eval_runtime": 1316.9883,
|
4313 |
+
"eval_samples_per_second": 0.412,
|
4314 |
+
"eval_steps_per_second": 0.412,
|
4315 |
+
"step": 69500
|
4316 |
+
},
|
4317 |
+
{
|
4318 |
+
"epoch": 2.61,
|
4319 |
+
"learning_rate": 8.230912461650797e-06,
|
4320 |
+
"loss": 0.3601,
|
4321 |
+
"step": 70000
|
4322 |
+
},
|
4323 |
+
{
|
4324 |
+
"epoch": 2.61,
|
4325 |
+
"eval_loss": 0.47838443517684937,
|
4326 |
+
"eval_runtime": 1306.7325,
|
4327 |
+
"eval_samples_per_second": 0.415,
|
4328 |
+
"eval_steps_per_second": 0.415,
|
4329 |
+
"step": 70000
|
4330 |
+
},
|
4331 |
+
{
|
4332 |
+
"epoch": 2.63,
|
4333 |
+
"learning_rate": 7.472757589080226e-06,
|
4334 |
+
"loss": 0.3614,
|
4335 |
+
"step": 70500
|
4336 |
+
},
|
4337 |
+
{
|
4338 |
+
"epoch": 2.63,
|
4339 |
+
"eval_loss": 0.4780386686325073,
|
4340 |
+
"eval_runtime": 1290.4017,
|
4341 |
+
"eval_samples_per_second": 0.42,
|
4342 |
+
"eval_steps_per_second": 0.42,
|
4343 |
+
"step": 70500
|
4344 |
+
},
|
4345 |
+
{
|
4346 |
+
"epoch": 2.65,
|
4347 |
+
"learning_rate": 6.749873173827314e-06,
|
4348 |
+
"loss": 0.3746,
|
4349 |
+
"step": 71000
|
4350 |
+
},
|
4351 |
+
{
|
4352 |
+
"epoch": 2.65,
|
4353 |
+
"eval_loss": 0.47773027420043945,
|
4354 |
+
"eval_runtime": 1293.7698,
|
4355 |
+
"eval_samples_per_second": 0.419,
|
4356 |
+
"eval_steps_per_second": 0.419,
|
4357 |
+
"step": 71000
|
4358 |
+
},
|
4359 |
+
{
|
4360 |
+
"epoch": 2.67,
|
4361 |
+
"learning_rate": 6.0625347721849805e-06,
|
4362 |
+
"loss": 0.365,
|
4363 |
+
"step": 71500
|
4364 |
+
},
|
4365 |
+
{
|
4366 |
+
"epoch": 2.67,
|
4367 |
+
"eval_loss": 0.47759953141212463,
|
4368 |
+
"eval_runtime": 1287.2533,
|
4369 |
+
"eval_samples_per_second": 0.421,
|
4370 |
+
"eval_steps_per_second": 0.421,
|
4371 |
+
"step": 71500
|
4372 |
+
},
|
4373 |
+
{
|
4374 |
+
"epoch": 2.68,
|
4375 |
+
"learning_rate": 5.411004390662034e-06,
|
4376 |
+
"loss": 0.3614,
|
4377 |
+
"step": 72000
|
4378 |
+
},
|
4379 |
+
{
|
4380 |
+
"epoch": 2.68,
|
4381 |
+
"eval_loss": 0.4774133861064911,
|
4382 |
+
"eval_runtime": 1290.2562,
|
4383 |
+
"eval_samples_per_second": 0.42,
|
4384 |
+
"eval_steps_per_second": 0.42,
|
4385 |
+
"step": 72000
|
4386 |
+
},
|
4387 |
+
{
|
4388 |
+
"epoch": 2.7,
|
4389 |
+
"learning_rate": 4.795530386109038e-06,
|
4390 |
+
"loss": 0.3672,
|
4391 |
+
"step": 72500
|
4392 |
+
},
|
4393 |
+
{
|
4394 |
+
"epoch": 2.7,
|
4395 |
+
"eval_loss": 0.4771479070186615,
|
4396 |
+
"eval_runtime": 1313.3814,
|
4397 |
+
"eval_samples_per_second": 0.413,
|
4398 |
+
"eval_steps_per_second": 0.413,
|
4399 |
+
"step": 72500
|
4400 |
+
},
|
4401 |
+
{
|
4402 |
+
"epoch": 2.72,
|
4403 |
+
"learning_rate": 4.2163473710470355e-06,
|
4404 |
+
"loss": 0.3536,
|
4405 |
+
"step": 73000
|
4406 |
+
},
|
4407 |
+
{
|
4408 |
+
"epoch": 2.72,
|
4409 |
+
"eval_loss": 0.4770236909389496,
|
4410 |
+
"eval_runtime": 1301.9499,
|
4411 |
+
"eval_samples_per_second": 0.416,
|
4412 |
+
"eval_steps_per_second": 0.416,
|
4413 |
+
"step": 73000
|
4414 |
+
},
|
4415 |
+
{
|
4416 |
+
"epoch": 2.74,
|
4417 |
+
"learning_rate": 3.67367612423567e-06,
|
4418 |
+
"loss": 0.3693,
|
4419 |
+
"step": 73500
|
4420 |
+
},
|
4421 |
+
{
|
4422 |
+
"epoch": 2.74,
|
4423 |
+
"eval_loss": 0.4766899645328522,
|
4424 |
+
"eval_runtime": 1310.5415,
|
4425 |
+
"eval_samples_per_second": 0.414,
|
4426 |
+
"eval_steps_per_second": 0.414,
|
4427 |
+
"step": 73500
|
4428 |
+
},
|
4429 |
+
{
|
4430 |
+
"epoch": 2.76,
|
4431 |
+
"learning_rate": 3.1677235065144862e-06,
|
4432 |
+
"loss": 0.358,
|
4433 |
+
"step": 74000
|
4434 |
+
},
|
4435 |
+
{
|
4436 |
+
"epoch": 2.76,
|
4437 |
+
"eval_loss": 0.47646036744117737,
|
4438 |
+
"eval_runtime": 1327.3256,
|
4439 |
+
"eval_samples_per_second": 0.408,
|
4440 |
+
"eval_steps_per_second": 0.408,
|
4441 |
+
"step": 74000
|
4442 |
+
},
|
4443 |
+
{
|
4444 |
+
"epoch": 2.78,
|
4445 |
+
"learning_rate": 2.6986823819497353e-06,
|
4446 |
+
"loss": 0.3653,
|
4447 |
+
"step": 74500
|
4448 |
+
},
|
4449 |
+
{
|
4450 |
+
"epoch": 2.78,
|
4451 |
+
"eval_loss": 0.47627386450767517,
|
4452 |
+
"eval_runtime": 1332.1149,
|
4453 |
+
"eval_samples_per_second": 0.407,
|
4454 |
+
"eval_steps_per_second": 0.407,
|
4455 |
+
"step": 74500
|
4456 |
+
},
|
4457 |
+
{
|
4458 |
+
"epoch": 2.8,
|
4459 |
+
"learning_rate": 2.266731544316425e-06,
|
4460 |
+
"loss": 0.3743,
|
4461 |
+
"step": 75000
|
4462 |
+
},
|
4463 |
+
{
|
4464 |
+
"epoch": 2.8,
|
4465 |
+
"eval_loss": 0.47608959674835205,
|
4466 |
+
"eval_runtime": 1305.4101,
|
4467 |
+
"eval_samples_per_second": 0.415,
|
4468 |
+
"eval_steps_per_second": 0.415,
|
4469 |
+
"step": 75000
|
4470 |
+
},
|
4471 |
+
{
|
4472 |
+
"epoch": 2.81,
|
4473 |
+
"learning_rate": 1.872035648944026e-06,
|
4474 |
+
"loss": 0.3659,
|
4475 |
+
"step": 75500
|
4476 |
+
},
|
4477 |
+
{
|
4478 |
+
"epoch": 2.81,
|
4479 |
+
"eval_loss": 0.476179838180542,
|
4480 |
+
"eval_runtime": 1301.8331,
|
4481 |
+
"eval_samples_per_second": 0.416,
|
4482 |
+
"eval_steps_per_second": 0.416,
|
4483 |
+
"step": 75500
|
4484 |
+
},
|
4485 |
+
{
|
4486 |
+
"epoch": 2.83,
|
4487 |
+
"learning_rate": 1.5147451499514353e-06,
|
4488 |
+
"loss": 0.3678,
|
4489 |
+
"step": 76000
|
4490 |
+
},
|
4491 |
+
{
|
4492 |
+
"epoch": 2.83,
|
4493 |
+
"eval_loss": 0.4760454595088959,
|
4494 |
+
"eval_runtime": 1297.73,
|
4495 |
+
"eval_samples_per_second": 0.418,
|
4496 |
+
"eval_steps_per_second": 0.418,
|
4497 |
+
"step": 76000
|
4498 |
+
},
|
4499 |
+
{
|
4500 |
+
"epoch": 2.85,
|
4501 |
+
"learning_rate": 1.1949962428953965e-06,
|
4502 |
+
"loss": 0.3672,
|
4503 |
+
"step": 76500
|
4504 |
+
},
|
4505 |
+
{
|
4506 |
+
"epoch": 2.85,
|
4507 |
+
"eval_loss": 0.4760077893733978,
|
4508 |
+
"eval_runtime": 1293.9854,
|
4509 |
+
"eval_samples_per_second": 0.419,
|
4510 |
+
"eval_steps_per_second": 0.419,
|
4511 |
+
"step": 76500
|
4512 |
+
},
|
4513 |
+
{
|
4514 |
+
"epoch": 2.87,
|
4515 |
+
"learning_rate": 9.129108128541176e-07,
|
4516 |
+
"loss": 0.3658,
|
4517 |
+
"step": 77000
|
4518 |
+
},
|
4519 |
+
{
|
4520 |
+
"epoch": 2.87,
|
4521 |
+
"eval_loss": 0.47582224011421204,
|
4522 |
+
"eval_runtime": 1293.6591,
|
4523 |
+
"eval_samples_per_second": 0.419,
|
4524 |
+
"eval_steps_per_second": 0.419,
|
4525 |
+
"step": 77000
|
4526 |
+
},
|
4527 |
+
{
|
4528 |
+
"epoch": 2.89,
|
4529 |
+
"learning_rate": 6.685963879659362e-07,
|
4530 |
+
"loss": 0.3675,
|
4531 |
+
"step": 77500
|
4532 |
+
},
|
4533 |
+
{
|
4534 |
+
"epoch": 2.89,
|
4535 |
+
"eval_loss": 0.4758478105068207,
|
4536 |
+
"eval_runtime": 1311.0096,
|
4537 |
+
"eval_samples_per_second": 0.413,
|
4538 |
+
"eval_steps_per_second": 0.413,
|
4539 |
+
"step": 77500
|
4540 |
+
},
|
4541 |
+
{
|
4542 |
+
"epoch": 2.91,
|
4543 |
+
"learning_rate": 4.6214609844061894e-07,
|
4544 |
+
"loss": 0.3696,
|
4545 |
+
"step": 78000
|
4546 |
+
},
|
4547 |
+
{
|
4548 |
+
"epoch": 2.91,
|
4549 |
+
"eval_loss": 0.4757947325706482,
|
4550 |
+
"eval_runtime": 1268.9631,
|
4551 |
+
"eval_samples_per_second": 0.427,
|
4552 |
+
"eval_steps_per_second": 0.427,
|
4553 |
+
"step": 78000
|
4554 |
}
|
4555 |
],
|
4556 |
"logging_steps": 500,
|
4557 |
"max_steps": 80463,
|
4558 |
"num_train_epochs": 3,
|
4559 |
"save_steps": 500,
|
4560 |
+
"total_flos": 2.1910553027265577e+19,
|
4561 |
"trial_name": null,
|
4562 |
"trial_params": null
|
4563 |
}
|
{checkpoint-70500 β checkpoint-78000}/training_args.bin
RENAMED
File without changes
|