MohamedAhmedAE
commited on
Commit
•
1f00134
1
Parent(s):
73f58b7
Training in progress, step 1500, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1423793692
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fafbc4b695fd8bfdea32039508732f28aad09b914411f7b745968d96db5a527d
|
3 |
size 1423793692
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2847809392
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:91ee2b5f2695e677f3f7d19bd6c7ee32347c3d70682d9581e9db11b0868ae4b3
|
3 |
size 2847809392
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 13990
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6f0307c22c9eaef5bdd0acdfa0f36120192eebe56339a218faa4b8fc8466c182
|
3 |
size 13990
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:492c71fdfddaa74e5ff08b634deba26aa598bdfa223039762d4d422cf8dc7688
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 200,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -105,6 +105,13 @@
|
|
105 |
"learning_rate": 1.999998931473612e-05,
|
106 |
"loss": 7.6066,
|
107 |
"step": 1400
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
108 |
}
|
109 |
],
|
110 |
"logging_steps": 100,
|
@@ -124,7 +131,7 @@
|
|
124 |
"attributes": {}
|
125 |
}
|
126 |
},
|
127 |
-
"total_flos":
|
128 |
"train_batch_size": 1,
|
129 |
"trial_name": null,
|
130 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.025017512258581006,
|
5 |
"eval_steps": 200,
|
6 |
+
"global_step": 1500,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
105 |
"learning_rate": 1.999998931473612e-05,
|
106 |
"loss": 7.6066,
|
107 |
"step": 1400
|
108 |
+
},
|
109 |
+
{
|
110 |
+
"epoch": 0.025017512258581006,
|
111 |
+
"grad_norm": 14.43734073638916,
|
112 |
+
"learning_rate": 1.9999987727890814e-05,
|
113 |
+
"loss": 7.6378,
|
114 |
+
"step": 1500
|
115 |
}
|
116 |
],
|
117 |
"logging_steps": 100,
|
|
|
131 |
"attributes": {}
|
132 |
}
|
133 |
},
|
134 |
+
"total_flos": 72658998638064.0,
|
135 |
"train_batch_size": 1,
|
136 |
"trial_name": null,
|
137 |
"trial_params": null
|