penglingwei commited on
Commit
61439ed
1 Parent(s): 52fcb81

Training in progress, step 500, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e0a4d4eb5830f54a037bb5b2ec4c0d8b16835c8e8380818d51d26253dd73758b
3
  size 355970836
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8910b9a58f88ac8286fea601e9dd43f8eb82a05ef861f4e329876124f86705d4
3
  size 355970836
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8b1dbfd9f0696dbaed1c3995ccce15ba4bc4256faed8e85d5762c31a1a2a589d
3
  size 712036922
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b83ffbb4550df83557f904e680abfe9068f31e77e6e9aa3035363e1d86308159
3
  size 712036922
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5c2863feda87075c004bd0dd402af564804ba4c165f80aa98488706883ec39bd
3
  size 14308
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:795229107db214ae138f938d284f076426adff33dd73907164d26d963d353076
3
  size 14308
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:139fdc5f456aa79c473440b624b5319327666b78b9b8b1ac7333eb8efe73bb6d
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d93ead1c986470262211b05883b802fce83f20b54d15c64bf021ab89b9a8bb82
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.00813806739972852,
5
  "eval_steps": 500,
6
- "global_step": 400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -287,6 +287,76 @@
287
  "learning_rate": 1e-05,
288
  "loss": 0.0057,
289
  "step": 400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
290
  }
291
  ],
292
  "logging_steps": 10,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.010172584249660648,
5
  "eval_steps": 500,
6
+ "global_step": 500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
287
  "learning_rate": 1e-05,
288
  "loss": 0.0057,
289
  "step": 400
290
+ },
291
+ {
292
+ "epoch": 0.008341519084721732,
293
+ "grad_norm": 19.89306640625,
294
+ "learning_rate": 1e-05,
295
+ "loss": 0.0063,
296
+ "step": 410
297
+ },
298
+ {
299
+ "epoch": 0.008544970769714945,
300
+ "grad_norm": 14.5062837600708,
301
+ "learning_rate": 1e-05,
302
+ "loss": 0.005,
303
+ "step": 420
304
+ },
305
+ {
306
+ "epoch": 0.008748422454708159,
307
+ "grad_norm": 13.982276916503906,
308
+ "learning_rate": 1e-05,
309
+ "loss": 0.0055,
310
+ "step": 430
311
+ },
312
+ {
313
+ "epoch": 0.00895187413970137,
314
+ "grad_norm": 21.34312629699707,
315
+ "learning_rate": 1e-05,
316
+ "loss": 0.0058,
317
+ "step": 440
318
+ },
319
+ {
320
+ "epoch": 0.009155325824694584,
321
+ "grad_norm": 17.6949405670166,
322
+ "learning_rate": 1e-05,
323
+ "loss": 0.0056,
324
+ "step": 450
325
+ },
326
+ {
327
+ "epoch": 0.009358777509687798,
328
+ "grad_norm": 16.052722930908203,
329
+ "learning_rate": 1e-05,
330
+ "loss": 0.0053,
331
+ "step": 460
332
+ },
333
+ {
334
+ "epoch": 0.00956222919468101,
335
+ "grad_norm": 11.400249481201172,
336
+ "learning_rate": 1e-05,
337
+ "loss": 0.0051,
338
+ "step": 470
339
+ },
340
+ {
341
+ "epoch": 0.009765680879674223,
342
+ "grad_norm": 15.31458568572998,
343
+ "learning_rate": 1e-05,
344
+ "loss": 0.0045,
345
+ "step": 480
346
+ },
347
+ {
348
+ "epoch": 0.009969132564667436,
349
+ "grad_norm": 17.73845863342285,
350
+ "learning_rate": 1e-05,
351
+ "loss": 0.0051,
352
+ "step": 490
353
+ },
354
+ {
355
+ "epoch": 0.010172584249660648,
356
+ "grad_norm": 13.484848022460938,
357
+ "learning_rate": 1e-05,
358
+ "loss": 0.006,
359
+ "step": 500
360
  }
361
  ],
362
  "logging_steps": 10,