farmery commited on
Commit
7cad091
·
verified ·
1 Parent(s): 09a3cd5

Training in progress, step 300, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f7a5bc27d15a98a1b7f008e2ae5b1fa71f8564b2203bc5f567718fee9c9b8de9
3
  size 17425352
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:917b9908d6ddf96c924657a17ab9a77947c3dee4187dc99733d028b7da01a370
3
  size 17425352
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4dfc1b4acfe0978bca370fbc004221ea2fea4474ce49223be7246d9be15c336c
3
- size 10251668
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:63574ea2b3926287ca101305d0da3612adfdea5a67b7f1e2bae3ca77824eb182
3
+ size 10252116
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fd619ac5fe1b5a9993caa1b33027cbaba2affae2b385b5b8e1edc744130828b6
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49c3113ab5e74cf2cf75b8272697182349b986f05fea887c23c6b1cf30a10723
3
  size 14960
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1ce9f7304e0a4744e6ea3362c37f41fa81110af8aa0741e1543ca9d0f005e0bc
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:948034a5628471bb3c37b4e388f6a1656b75ee096cbb96c84fa1a5c61b6c63e2
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a2663b0191a48b0d84f57520e6d7470d2ed946f6d4e667618a3101ba08ea341a
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e03f9a24a7a90769559ab2186e52e4bea52a0ca73705b5babf1706779cbc29f3
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:21d803133439c81a4ccfa134ee0737fabb2479f40d1f31268b7cfb1fa6f46236
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:36ecb369e890de96dc252a97d723cd39a5de3933ea7d729b6a84c9264b7d8b05
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4ac116b8169c53ab649a7f15f2f32735f2c71ec2f803f70de8c655a513ee9cfc
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43da0688aca60835f4e18fa7e0f3cc099504828f82fd5dd994118be26b760a0f
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.05201222287237501,
5
  "eval_steps": 100,
6
- "global_step": 200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -311,6 +311,154 @@
311
  "eval_samples_per_second": 141.18,
312
  "eval_steps_per_second": 17.656,
313
  "step": 200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
314
  }
315
  ],
316
  "logging_steps": 5,
@@ -330,7 +478,7 @@
330
  "attributes": {}
331
  }
332
  },
333
- "total_flos": 1.271906598649856e+16,
334
  "train_batch_size": 2,
335
  "trial_name": null,
336
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.07801833430856252,
5
  "eval_steps": 100,
6
+ "global_step": 300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
311
  "eval_samples_per_second": 141.18,
312
  "eval_steps_per_second": 17.656,
313
  "step": 200
314
+ },
315
+ {
316
+ "epoch": 0.05331252844418438,
317
+ "grad_norm": 0.2229231297969818,
318
+ "learning_rate": 5e-05,
319
+ "loss": 2.4477,
320
+ "step": 205
321
+ },
322
+ {
323
+ "epoch": 0.05461283401599376,
324
+ "grad_norm": 0.22189436852931976,
325
+ "learning_rate": 4.798670299452926e-05,
326
+ "loss": 2.4228,
327
+ "step": 210
328
+ },
329
+ {
330
+ "epoch": 0.05591313958780313,
331
+ "grad_norm": 0.21090376377105713,
332
+ "learning_rate": 4.597667156416371e-05,
333
+ "loss": 2.3939,
334
+ "step": 215
335
+ },
336
+ {
337
+ "epoch": 0.05721344515961251,
338
+ "grad_norm": 0.20227909088134766,
339
+ "learning_rate": 4.397316598723385e-05,
340
+ "loss": 2.37,
341
+ "step": 220
342
+ },
343
+ {
344
+ "epoch": 0.058513750731421885,
345
+ "grad_norm": 0.21790654957294464,
346
+ "learning_rate": 4.197943595711198e-05,
347
+ "loss": 2.4721,
348
+ "step": 225
349
+ },
350
+ {
351
+ "epoch": 0.05981405630323126,
352
+ "grad_norm": 0.1942271739244461,
353
+ "learning_rate": 3.9998715311197785e-05,
354
+ "loss": 2.4373,
355
+ "step": 230
356
+ },
357
+ {
358
+ "epoch": 0.061114361875040636,
359
+ "grad_norm": 0.20492784678936005,
360
+ "learning_rate": 3.803421678562213e-05,
361
+ "loss": 2.4142,
362
+ "step": 235
363
+ },
364
+ {
365
+ "epoch": 0.06241466744685001,
366
+ "grad_norm": 0.23484589159488678,
367
+ "learning_rate": 3.608912680417737e-05,
368
+ "loss": 2.5037,
369
+ "step": 240
370
+ },
371
+ {
372
+ "epoch": 0.06371497301865939,
373
+ "grad_norm": 0.20387956500053406,
374
+ "learning_rate": 3.4166600309926387e-05,
375
+ "loss": 2.4313,
376
+ "step": 245
377
+ },
378
+ {
379
+ "epoch": 0.06501527859046877,
380
+ "grad_norm": 0.21440523862838745,
381
+ "learning_rate": 3.226975564787322e-05,
382
+ "loss": 2.3804,
383
+ "step": 250
384
+ },
385
+ {
386
+ "epoch": 0.06631558416227813,
387
+ "grad_norm": 0.21894583106040955,
388
+ "learning_rate": 3.0401669506996256e-05,
389
+ "loss": 2.3944,
390
+ "step": 255
391
+ },
392
+ {
393
+ "epoch": 0.06761588973408751,
394
+ "grad_norm": 0.23404517769813538,
395
+ "learning_rate": 2.8565371929847284e-05,
396
+ "loss": 2.4495,
397
+ "step": 260
398
+ },
399
+ {
400
+ "epoch": 0.06891619530589689,
401
+ "grad_norm": 0.22827592492103577,
402
+ "learning_rate": 2.6763841397811573e-05,
403
+ "loss": 2.4284,
404
+ "step": 265
405
+ },
406
+ {
407
+ "epoch": 0.07021650087770626,
408
+ "grad_norm": 0.2367011457681656,
409
+ "learning_rate": 2.500000000000001e-05,
410
+ "loss": 2.4119,
411
+ "step": 270
412
+ },
413
+ {
414
+ "epoch": 0.07151680644951564,
415
+ "grad_norm": 0.23400311172008514,
416
+ "learning_rate": 2.3276708693609943e-05,
417
+ "loss": 2.5657,
418
+ "step": 275
419
+ },
420
+ {
421
+ "epoch": 0.07281711202132501,
422
+ "grad_norm": 0.22112374007701874,
423
+ "learning_rate": 2.1596762663442218e-05,
424
+ "loss": 2.5174,
425
+ "step": 280
426
+ },
427
+ {
428
+ "epoch": 0.07411741759313438,
429
+ "grad_norm": 0.22433413565158844,
430
+ "learning_rate": 1.996288678810105e-05,
431
+ "loss": 2.4226,
432
+ "step": 285
433
+ },
434
+ {
435
+ "epoch": 0.07541772316494376,
436
+ "grad_norm": 0.21052949130535126,
437
+ "learning_rate": 1.837773122023114e-05,
438
+ "loss": 2.4243,
439
+ "step": 290
440
+ },
441
+ {
442
+ "epoch": 0.07671802873675314,
443
+ "grad_norm": 0.21205665171146393,
444
+ "learning_rate": 1.684386708796025e-05,
445
+ "loss": 2.4338,
446
+ "step": 295
447
+ },
448
+ {
449
+ "epoch": 0.07801833430856252,
450
+ "grad_norm": 0.23179137706756592,
451
+ "learning_rate": 1.536378232452003e-05,
452
+ "loss": 2.3962,
453
+ "step": 300
454
+ },
455
+ {
456
+ "epoch": 0.07801833430856252,
457
+ "eval_loss": 2.4198389053344727,
458
+ "eval_runtime": 45.9519,
459
+ "eval_samples_per_second": 140.952,
460
+ "eval_steps_per_second": 17.627,
461
+ "step": 300
462
  }
463
  ],
464
  "logging_steps": 5,
 
478
  "attributes": {}
479
  }
480
  },
481
+ "total_flos": 1.905704121348915e+16,
482
  "train_batch_size": 2,
483
  "trial_name": null,
484
  "trial_params": null