leixa commited on
Commit
ba134b5
·
verified ·
1 Parent(s): 8a4d62a

Training in progress, step 60, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:36219f4df01f6540de43e20af601c95b792f0e8b395a1faec9012b7942e4bc37
3
  size 1001465824
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:184231790e9a47327b2753c0830bb738e8fe8d9415817371828e63a694b4ed1b
3
  size 1001465824
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:585f9ab65b3cbf0614231a35f714d5862e5c46100460503306f45bca7fc5a217
3
  size 509176980
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:14a76d922d59bc1b0020e776875374796ad6902a69089b31610eda3c5aa070d9
3
  size 509176980
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aa8a3d63161f76c741ed27e5548e0954b55bada46f17ff65da09dc9004692f5a
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d6040f13b5818505a6ab000dd3b2f9edff910b36a9f5a3a034da7779eb7d8ce
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:78a060285bfc64e2914a2dfa3484550144c9851d788510de86ca8a657edce764
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e31ea59fbcacae53c010c1baf2222b21437849ba2abcafeea6c68dd11ef083b
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.7792207792207793,
5
  "eval_steps": 15,
6
- "global_step": 45,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -144,6 +144,49 @@
144
  "eval_samples_per_second": 4.436,
145
  "eval_steps_per_second": 0.595,
146
  "step": 45
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
  }
148
  ],
149
  "logging_steps": 3,
@@ -163,7 +206,7 @@
163
  "attributes": {}
164
  }
165
  },
166
- "total_flos": 2.318428010446848e+17,
167
  "train_batch_size": 8,
168
  "trial_name": null,
169
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.0432900432900434,
5
  "eval_steps": 15,
6
+ "global_step": 60,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
144
  "eval_samples_per_second": 4.436,
145
  "eval_steps_per_second": 0.595,
146
  "step": 45
147
+ },
148
+ {
149
+ "epoch": 0.8311688311688312,
150
+ "grad_norm": 0.7336857318878174,
151
+ "learning_rate": 8.717872783521047e-05,
152
+ "loss": 2.4763,
153
+ "step": 48
154
+ },
155
+ {
156
+ "epoch": 0.8831168831168831,
157
+ "grad_norm": 0.6515845060348511,
158
+ "learning_rate": 8.518457335743926e-05,
159
+ "loss": 2.5329,
160
+ "step": 51
161
+ },
162
+ {
163
+ "epoch": 0.935064935064935,
164
+ "grad_norm": 0.7598447203636169,
165
+ "learning_rate": 8.307282131280804e-05,
166
+ "loss": 2.6017,
167
+ "step": 54
168
+ },
169
+ {
170
+ "epoch": 0.987012987012987,
171
+ "grad_norm": 0.6881215572357178,
172
+ "learning_rate": 8.085052982021847e-05,
173
+ "loss": 2.5885,
174
+ "step": 57
175
+ },
176
+ {
177
+ "epoch": 1.0432900432900434,
178
+ "grad_norm": 0.6885347962379456,
179
+ "learning_rate": 7.85251264550948e-05,
180
+ "loss": 2.3153,
181
+ "step": 60
182
+ },
183
+ {
184
+ "epoch": 1.0432900432900434,
185
+ "eval_loss": 0.6708703637123108,
186
+ "eval_runtime": 21.8845,
187
+ "eval_samples_per_second": 4.432,
188
+ "eval_steps_per_second": 0.594,
189
+ "step": 60
190
  }
191
  ],
192
  "logging_steps": 3,
 
206
  "attributes": {}
207
  }
208
  },
209
+ "total_flos": 3.091237347262464e+17,
210
  "train_batch_size": 8,
211
  "trial_name": null,
212
  "trial_params": null