TracyTank commited on
Commit
b220d99
·
verified ·
1 Parent(s): afb1ad4

Training in progress, step 300, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:caec0d86ab99eee6cff14ca87c82a867148f70a209126f9aa6e011e3684228dc
3
  size 17425352
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0e6f53d6607615df544562b53c177c597c7c7764cc3cb9a82154924eabc9d19
3
  size 17425352
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:688e7e0105fdeecfbd76140dca9c778ab27df9f4d6f0e48b9217d7a131ba0908
3
- size 10251668
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aff84825d6486fa6b4465aee818af4c7da4806e1a370db24ba58ba40c23073ff
3
+ size 10252116
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fd619ac5fe1b5a9993caa1b33027cbaba2affae2b385b5b8e1edc744130828b6
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49c3113ab5e74cf2cf75b8272697182349b986f05fea887c23c6b1cf30a10723
3
  size 14960
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1ce9f7304e0a4744e6ea3362c37f41fa81110af8aa0741e1543ca9d0f005e0bc
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:948034a5628471bb3c37b4e388f6a1656b75ee096cbb96c84fa1a5c61b6c63e2
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a2663b0191a48b0d84f57520e6d7470d2ed946f6d4e667618a3101ba08ea341a
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e03f9a24a7a90769559ab2186e52e4bea52a0ca73705b5babf1706779cbc29f3
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:21d803133439c81a4ccfa134ee0737fabb2479f40d1f31268b7cfb1fa6f46236
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:36ecb369e890de96dc252a97d723cd39a5de3933ea7d729b6a84c9264b7d8b05
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4ac116b8169c53ab649a7f15f2f32735f2c71ec2f803f70de8c655a513ee9cfc
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43da0688aca60835f4e18fa7e0f3cc099504828f82fd5dd994118be26b760a0f
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.05201222287237501,
5
  "eval_steps": 100,
6
- "global_step": 200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -311,6 +311,154 @@
311
  "eval_samples_per_second": 132.872,
312
  "eval_steps_per_second": 16.617,
313
  "step": 200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
314
  }
315
  ],
316
  "logging_steps": 5,
@@ -330,7 +478,7 @@
330
  "attributes": {}
331
  }
332
  },
333
- "total_flos": 1.271906598649856e+16,
334
  "train_batch_size": 2,
335
  "trial_name": null,
336
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.07801833430856252,
5
  "eval_steps": 100,
6
+ "global_step": 300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
311
  "eval_samples_per_second": 132.872,
312
  "eval_steps_per_second": 16.617,
313
  "step": 200
314
+ },
315
+ {
316
+ "epoch": 0.05331252844418438,
317
+ "grad_norm": 0.22658920288085938,
318
+ "learning_rate": 5e-05,
319
+ "loss": 2.4468,
320
+ "step": 205
321
+ },
322
+ {
323
+ "epoch": 0.05461283401599376,
324
+ "grad_norm": 0.22382205724716187,
325
+ "learning_rate": 4.798670299452926e-05,
326
+ "loss": 2.4227,
327
+ "step": 210
328
+ },
329
+ {
330
+ "epoch": 0.05591313958780313,
331
+ "grad_norm": 0.2128317654132843,
332
+ "learning_rate": 4.597667156416371e-05,
333
+ "loss": 2.394,
334
+ "step": 215
335
+ },
336
+ {
337
+ "epoch": 0.05721344515961251,
338
+ "grad_norm": 0.20399969816207886,
339
+ "learning_rate": 4.397316598723385e-05,
340
+ "loss": 2.3722,
341
+ "step": 220
342
+ },
343
+ {
344
+ "epoch": 0.058513750731421885,
345
+ "grad_norm": 0.22103694081306458,
346
+ "learning_rate": 4.197943595711198e-05,
347
+ "loss": 2.4722,
348
+ "step": 225
349
+ },
350
+ {
351
+ "epoch": 0.05981405630323126,
352
+ "grad_norm": 0.19609223306179047,
353
+ "learning_rate": 3.9998715311197785e-05,
354
+ "loss": 2.4358,
355
+ "step": 230
356
+ },
357
+ {
358
+ "epoch": 0.061114361875040636,
359
+ "grad_norm": 0.20797300338745117,
360
+ "learning_rate": 3.803421678562213e-05,
361
+ "loss": 2.4147,
362
+ "step": 235
363
+ },
364
+ {
365
+ "epoch": 0.06241466744685001,
366
+ "grad_norm": 0.23733116686344147,
367
+ "learning_rate": 3.608912680417737e-05,
368
+ "loss": 2.5038,
369
+ "step": 240
370
+ },
371
+ {
372
+ "epoch": 0.06371497301865939,
373
+ "grad_norm": 0.20630250871181488,
374
+ "learning_rate": 3.4166600309926387e-05,
375
+ "loss": 2.43,
376
+ "step": 245
377
+ },
378
+ {
379
+ "epoch": 0.06501527859046877,
380
+ "grad_norm": 0.21626047790050507,
381
+ "learning_rate": 3.226975564787322e-05,
382
+ "loss": 2.3791,
383
+ "step": 250
384
+ },
385
+ {
386
+ "epoch": 0.06631558416227813,
387
+ "grad_norm": 0.22082562744617462,
388
+ "learning_rate": 3.0401669506996256e-05,
389
+ "loss": 2.3927,
390
+ "step": 255
391
+ },
392
+ {
393
+ "epoch": 0.06761588973408751,
394
+ "grad_norm": 0.2386007159948349,
395
+ "learning_rate": 2.8565371929847284e-05,
396
+ "loss": 2.4492,
397
+ "step": 260
398
+ },
399
+ {
400
+ "epoch": 0.06891619530589689,
401
+ "grad_norm": 0.23111873865127563,
402
+ "learning_rate": 2.6763841397811573e-05,
403
+ "loss": 2.4282,
404
+ "step": 265
405
+ },
406
+ {
407
+ "epoch": 0.07021650087770626,
408
+ "grad_norm": 0.23999303579330444,
409
+ "learning_rate": 2.500000000000001e-05,
410
+ "loss": 2.4116,
411
+ "step": 270
412
+ },
413
+ {
414
+ "epoch": 0.07151680644951564,
415
+ "grad_norm": 0.23608632385730743,
416
+ "learning_rate": 2.3276708693609943e-05,
417
+ "loss": 2.5647,
418
+ "step": 275
419
+ },
420
+ {
421
+ "epoch": 0.07281711202132501,
422
+ "grad_norm": 0.22393612563610077,
423
+ "learning_rate": 2.1596762663442218e-05,
424
+ "loss": 2.516,
425
+ "step": 280
426
+ },
427
+ {
428
+ "epoch": 0.07411741759313438,
429
+ "grad_norm": 0.2267947643995285,
430
+ "learning_rate": 1.996288678810105e-05,
431
+ "loss": 2.4214,
432
+ "step": 285
433
+ },
434
+ {
435
+ "epoch": 0.07541772316494376,
436
+ "grad_norm": 0.21349841356277466,
437
+ "learning_rate": 1.837773122023114e-05,
438
+ "loss": 2.4265,
439
+ "step": 290
440
+ },
441
+ {
442
+ "epoch": 0.07671802873675314,
443
+ "grad_norm": 0.21270251274108887,
444
+ "learning_rate": 1.684386708796025e-05,
445
+ "loss": 2.4332,
446
+ "step": 295
447
+ },
448
+ {
449
+ "epoch": 0.07801833430856252,
450
+ "grad_norm": 0.23567631840705872,
451
+ "learning_rate": 1.536378232452003e-05,
452
+ "loss": 2.396,
453
+ "step": 300
454
+ },
455
+ {
456
+ "epoch": 0.07801833430856252,
457
+ "eval_loss": 2.4196038246154785,
458
+ "eval_runtime": 47.2697,
459
+ "eval_samples_per_second": 137.022,
460
+ "eval_steps_per_second": 17.136,
461
+ "step": 300
462
  }
463
  ],
464
  "logging_steps": 5,
 
478
  "attributes": {}
479
  }
480
  },
481
+ "total_flos": 1.905704121348915e+16,
482
  "train_batch_size": 2,
483
  "trial_name": null,
484
  "trial_params": null