RodrigoSalazar-U
commited on
Training in progress, step 5500, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4785762744
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bac2b56ba43e7cf6e627c6d78ee2d8a54c6a9b8482738e4d0e5508b059244676
|
3 |
size 4785762744
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3497859804
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:69973d0bdbecfdff6671addf5401d05a7990dd2a8e07167f9b28ddcc497207f6
|
3 |
size 3497859804
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14308
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:acc2d65074a2fcc8399dba6d3c0a62d0568496e8b5831e17319a7dcd95d56dc4
|
3 |
size 14308
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dbd3295a917ebbf1cf0429160bcb6ae7d8acc650fcf19c056fed23a32459f9c3
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -357,6 +357,41 @@
|
|
357 |
"learning_rate": 4.962412271614282e-05,
|
358 |
"loss": 0.146,
|
359 |
"step": 5000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
360 |
}
|
361 |
],
|
362 |
"logging_steps": 100,
|
@@ -376,7 +411,7 @@
|
|
376 |
"attributes": {}
|
377 |
}
|
378 |
},
|
379 |
-
"total_flos": 2.
|
380 |
"train_batch_size": 16,
|
381 |
"trial_name": null,
|
382 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 3.036157880209771,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 5500,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
357 |
"learning_rate": 4.962412271614282e-05,
|
358 |
"loss": 0.146,
|
359 |
"step": 5000
|
360 |
+
},
|
361 |
+
{
|
362 |
+
"epoch": 2.8153463980126965,
|
363 |
+
"grad_norm": 0.671291172504425,
|
364 |
+
"learning_rate": 4.7697339933668414e-05,
|
365 |
+
"loss": 0.1483,
|
366 |
+
"step": 5100
|
367 |
+
},
|
368 |
+
{
|
369 |
+
"epoch": 2.870549268561965,
|
370 |
+
"grad_norm": 0.6203655004501343,
|
371 |
+
"learning_rate": 4.5773979049898455e-05,
|
372 |
+
"loss": 0.1451,
|
373 |
+
"step": 5200
|
374 |
+
},
|
375 |
+
{
|
376 |
+
"epoch": 2.9257521391112338,
|
377 |
+
"grad_norm": 0.8333371877670288,
|
378 |
+
"learning_rate": 4.385689830092801e-05,
|
379 |
+
"loss": 0.1437,
|
380 |
+
"step": 5300
|
381 |
+
},
|
382 |
+
{
|
383 |
+
"epoch": 2.9809550096605024,
|
384 |
+
"grad_norm": 0.725379467010498,
|
385 |
+
"learning_rate": 4.194894659017415e-05,
|
386 |
+
"loss": 0.1412,
|
387 |
+
"step": 5400
|
388 |
+
},
|
389 |
+
{
|
390 |
+
"epoch": 3.036157880209771,
|
391 |
+
"grad_norm": 0.4681724011898041,
|
392 |
+
"learning_rate": 4.005295925472484e-05,
|
393 |
+
"loss": 0.1206,
|
394 |
+
"step": 5500
|
395 |
}
|
396 |
],
|
397 |
"logging_steps": 100,
|
|
|
411 |
"attributes": {}
|
412 |
}
|
413 |
},
|
414 |
+
"total_flos": 2.9148141046545777e+18,
|
415 |
"train_batch_size": 16,
|
416 |
"trial_name": null,
|
417 |
"trial_params": null
|