leixa commited on
Commit
c6138ae
·
verified ·
1 Parent(s): 74feed6

Training in progress, step 120, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:266cd94871718a986e4ce23c183790fc4b0de8cc9aa2104fde09808eb0885d75
3
  size 1001465824
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:03e6b3e1d595414a7d7c782b65a9be536e02782b01c4d1af44ba2aceb43834f0
3
  size 1001465824
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1e33311b799b7918ecff9dc9f8b578379d48f6bda91cf68adc1a95d45266093b
3
  size 509176980
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:14c6db5f642cc4524ca1d733bd074dee4ddbfa7832cd101cbbb5fb6b5db1d572
3
  size 509176980
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:83b302bd10368c09cb19ba400e1fa8ceca162eab977031b0bc94e411bbd47746
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:878afb72930bbe35f3f76161250c8b6ec3eee322b00164ea89731e27a1aeb55c
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9042f02fbfca7c1dbbfe6d148e2a1de0ab7c9345d455fd2ba76f5d757c8ebcc0
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b978c4f5387d059752764238c4bee9571cf3df7a5c984a19baed3fed0a46071c
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.8225108225108224,
5
  "eval_steps": 15,
6
- "global_step": 105,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -316,6 +316,49 @@
316
  "eval_samples_per_second": 4.439,
317
  "eval_steps_per_second": 0.595,
318
  "step": 105
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
319
  }
320
  ],
321
  "logging_steps": 3,
@@ -335,7 +378,7 @@
335
  "attributes": {}
336
  }
337
  },
338
- "total_flos": 5.403225279902515e+17,
339
  "train_batch_size": 8,
340
  "trial_name": null,
341
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.0865800865800868,
5
  "eval_steps": 15,
6
+ "global_step": 120,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
316
  "eval_samples_per_second": 4.439,
317
  "eval_steps_per_second": 0.595,
318
  "step": 105
319
+ },
320
+ {
321
+ "epoch": 1.8744588744588744,
322
+ "grad_norm": 1.011114239692688,
323
+ "learning_rate": 3.43659629390117e-05,
324
+ "loss": 2.2031,
325
+ "step": 108
326
+ },
327
+ {
328
+ "epoch": 1.9264069264069263,
329
+ "grad_norm": 0.9879550933837891,
330
+ "learning_rate": 3.16475450103354e-05,
331
+ "loss": 2.1106,
332
+ "step": 111
333
+ },
334
+ {
335
+ "epoch": 1.9783549783549783,
336
+ "grad_norm": 1.0455985069274902,
337
+ "learning_rate": 2.899046657924992e-05,
338
+ "loss": 2.0031,
339
+ "step": 114
340
+ },
341
+ {
342
+ "epoch": 2.034632034632035,
343
+ "grad_norm": 1.1132123470306396,
344
+ "learning_rate": 2.6403608411631742e-05,
345
+ "loss": 1.843,
346
+ "step": 117
347
+ },
348
+ {
349
+ "epoch": 2.0865800865800868,
350
+ "grad_norm": 1.035132646560669,
351
+ "learning_rate": 2.389561657583681e-05,
352
+ "loss": 1.6831,
353
+ "step": 120
354
+ },
355
+ {
356
+ "epoch": 2.0865800865800868,
357
+ "eval_loss": 0.6776129603385925,
358
+ "eval_runtime": 21.8631,
359
+ "eval_samples_per_second": 4.437,
360
+ "eval_steps_per_second": 0.595,
361
+ "step": 120
362
  }
363
  ],
364
  "logging_steps": 3,
 
378
  "attributes": {}
379
  }
380
  },
381
+ "total_flos": 6.169594538911334e+17,
382
  "train_batch_size": 8,
383
  "trial_name": null,
384
  "trial_params": null