penglingwei
commited on
Commit
•
c695799
1
Parent(s):
4c605a7
Training in progress, step 32476, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 355967616
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:33340270abd0409af6d05e61ef0599813be5c1844119d7769c7068db632d1d9b
|
3 |
size 355967616
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 712029114
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:be70fd8ab879b6d39326b6d12348412b03e6ccfb19399bc1393d6d85e5db7863
|
3 |
size 712029114
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:148ed913b32e8721d0e239a3585a6304177384e08458813be1eba0a7d8567889
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ff90769edad154332f3106098071f15e0ed13400ddcc176f41814d521feb3838
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -22687,6 +22687,55 @@
|
|
22687 |
"learning_rate": 9.773355068215528e-10,
|
22688 |
"loss": 0.6151,
|
22689 |
"step": 32400
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22690 |
}
|
22691 |
],
|
22692 |
"logging_steps": 10,
|
@@ -22701,12 +22750,12 @@
|
|
22701 |
"should_evaluate": false,
|
22702 |
"should_log": false,
|
22703 |
"should_save": true,
|
22704 |
-
"should_training_stop":
|
22705 |
},
|
22706 |
"attributes": {}
|
22707 |
}
|
22708 |
},
|
22709 |
-
"total_flos": 2.
|
22710 |
"train_batch_size": 8,
|
22711 |
"trial_name": null,
|
22712 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.9999846042523055,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 32476,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
22687 |
"learning_rate": 9.773355068215528e-10,
|
22688 |
"loss": 0.6151,
|
22689 |
"step": 32400
|
22690 |
+
},
|
22691 |
+
{
|
22692 |
+
"epoch": 0.9979523655566332,
|
22693 |
+
"grad_norm": 13.585440635681152,
|
22694 |
+
"learning_rate": 7.722167650797696e-10,
|
22695 |
+
"loss": 0.4538,
|
22696 |
+
"step": 32410
|
22697 |
+
},
|
22698 |
+
{
|
22699 |
+
"epoch": 0.998260280510523,
|
22700 |
+
"grad_norm": 16.41465187072754,
|
22701 |
+
"learning_rate": 5.912291741405618e-10,
|
22702 |
+
"loss": 0.3448,
|
22703 |
+
"step": 32420
|
22704 |
+
},
|
22705 |
+
{
|
22706 |
+
"epoch": 0.9985681954644128,
|
22707 |
+
"grad_norm": 17.536109924316406,
|
22708 |
+
"learning_rate": 4.343729087002979e-10,
|
22709 |
+
"loss": 0.3335,
|
22710 |
+
"step": 32430
|
22711 |
+
},
|
22712 |
+
{
|
22713 |
+
"epoch": 0.9988761104183025,
|
22714 |
+
"grad_norm": 17.59690284729004,
|
22715 |
+
"learning_rate": 3.016481201739696e-10,
|
22716 |
+
"loss": 0.539,
|
22717 |
+
"step": 32440
|
22718 |
+
},
|
22719 |
+
{
|
22720 |
+
"epoch": 0.9991840253721922,
|
22721 |
+
"grad_norm": 23.103965759277344,
|
22722 |
+
"learning_rate": 1.930549366757628e-10,
|
22723 |
+
"loss": 0.5025,
|
22724 |
+
"step": 32450
|
22725 |
+
},
|
22726 |
+
{
|
22727 |
+
"epoch": 0.9994919403260819,
|
22728 |
+
"grad_norm": 13.974841117858887,
|
22729 |
+
"learning_rate": 1.0859346302460882e-10,
|
22730 |
+
"loss": 0.4925,
|
22731 |
+
"step": 32460
|
22732 |
+
},
|
22733 |
+
{
|
22734 |
+
"epoch": 0.9997998552799716,
|
22735 |
+
"grad_norm": 12.541289329528809,
|
22736 |
+
"learning_rate": 4.826378075528659e-11,
|
22737 |
+
"loss": 0.5266,
|
22738 |
+
"step": 32470
|
22739 |
}
|
22740 |
],
|
22741 |
"logging_steps": 10,
|
|
|
22750 |
"should_evaluate": false,
|
22751 |
"should_log": false,
|
22752 |
"should_save": true,
|
22753 |
+
"should_training_stop": true
|
22754 |
},
|
22755 |
"attributes": {}
|
22756 |
}
|
22757 |
},
|
22758 |
+
"total_flos": 2.172344327995392e+18,
|
22759 |
"train_batch_size": 8,
|
22760 |
"trial_name": null,
|
22761 |
"trial_params": null
|