Training in progress, step 352

Browse files

Files changed (6) hide show

all_results.json +8 -0
model.safetensors +1 -1
runs/Nov07_15-25-23_509d87c7c7ea/events.out.tfevents.1730993124.509d87c7c7ea.30.1 +3 -0
train_results.json +8 -0
trainer_state.json +1049 -0
training_args.bin +1 -1

all_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 4.0,
+    "total_flos": 3.4828624117074493e+18,
+    "train_loss": 0.40373469023457303,
+    "train_runtime": 1995.1511,
+    "train_samples_per_second": 22.527,
+    "train_steps_per_second": 0.706
+}

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b252f90d6f06b178343a72a7a4d16d1986f8cc8307b69c1006547908dc83bac1
 size 343230128

 version https://git-lfs.github.com/spec/v1
+oid sha256:1d3c42fdb32fba9b201cecb12dbc5314c3699f4eb4a43f6d070041f7a0aa0ecc
 size 343230128

runs/Nov07_15-25-23_509d87c7c7ea/events.out.tfevents.1730993124.509d87c7c7ea.30.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4b68070a7005ea78360e178257b816af32338a29f712bf1b6edf0f427d61f47d
+size 12869

train_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 4.0,
+    "total_flos": 3.4828624117074493e+18,
+    "train_loss": 0.40373469023457303,
+    "train_runtime": 1995.1511,
+    "train_samples_per_second": 22.527,
+    "train_steps_per_second": 0.706
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,1049 @@

+{
+  "best_metric": 0.0417679101228714,
+  "best_model_checkpoint": "./finetune-vit-base-patch16-224/checkpoint-1200",
+  "epoch": 4.0,
+  "eval_steps": 400,
+  "global_step": 1408,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.028409090909090908,
+      "grad_norm": 246460.15625,
+      "learning_rate": 4.9644886363636365e-05,
+      "loss": 1.1228,
+      "step": 10
+    },
+    {
+      "epoch": 0.056818181818181816,
+      "grad_norm": 226149.65625,
+      "learning_rate": 4.9289772727272735e-05,
+      "loss": 0.9359,
+      "step": 20
+    },
+    {
+      "epoch": 0.08522727272727272,
+      "grad_norm": 229797.6875,
+      "learning_rate": 4.893465909090909e-05,
+      "loss": 0.9185,
+      "step": 30
+    },
+    {
+      "epoch": 0.11363636363636363,
+      "grad_norm": 274111.03125,
+      "learning_rate": 4.857954545454545e-05,
+      "loss": 0.9599,
+      "step": 40
+    },
+    {
+      "epoch": 0.14204545454545456,
+      "grad_norm": 189042.953125,
+      "learning_rate": 4.822443181818182e-05,
+      "loss": 0.9459,
+      "step": 50
+    },
+    {
+      "epoch": 0.17045454545454544,
+      "grad_norm": 233362.859375,
+      "learning_rate": 4.7869318181818185e-05,
+      "loss": 0.9634,
+      "step": 60
+    },
+    {
+      "epoch": 0.19886363636363635,
+      "grad_norm": 267175.90625,
+      "learning_rate": 4.751420454545455e-05,
+      "loss": 0.8705,
+      "step": 70
+    },
+    {
+      "epoch": 0.22727272727272727,
+      "grad_norm": 211430.734375,
+      "learning_rate": 4.715909090909091e-05,
+      "loss": 0.9014,
+      "step": 80
+    },
+    {
+      "epoch": 0.2556818181818182,
+      "grad_norm": 238574.546875,
+      "learning_rate": 4.6803977272727274e-05,
+      "loss": 0.8607,
+      "step": 90
+    },
+    {
+      "epoch": 0.2840909090909091,
+      "grad_norm": 260448.125,
+      "learning_rate": 4.6448863636363636e-05,
+      "loss": 0.8127,
+      "step": 100
+    },
+    {
+      "epoch": 0.3125,
+      "grad_norm": 168009.265625,
+      "learning_rate": 4.609375e-05,
+      "loss": 0.8228,
+      "step": 110
+    },
+    {
+      "epoch": 0.3409090909090909,
+      "grad_norm": 232205.125,
+      "learning_rate": 4.573863636363637e-05,
+      "loss": 0.8704,
+      "step": 120
+    },
+    {
+      "epoch": 0.3693181818181818,
+      "grad_norm": 302465.9375,
+      "learning_rate": 4.538352272727273e-05,
+      "loss": 0.8896,
+      "step": 130
+    },
+    {
+      "epoch": 0.3977272727272727,
+      "grad_norm": 210630.53125,
+      "learning_rate": 4.5028409090909094e-05,
+      "loss": 0.8732,
+      "step": 140
+    },
+    {
+      "epoch": 0.42613636363636365,
+      "grad_norm": 171584.9375,
+      "learning_rate": 4.4673295454545457e-05,
+      "loss": 0.7886,
+      "step": 150
+    },
+    {
+      "epoch": 0.45454545454545453,
+      "grad_norm": 255000.359375,
+      "learning_rate": 4.431818181818182e-05,
+      "loss": 0.9411,
+      "step": 160
+    },
+    {
+      "epoch": 0.48295454545454547,
+      "grad_norm": 244293.703125,
+      "learning_rate": 4.396306818181818e-05,
+      "loss": 0.8608,
+      "step": 170
+    },
+    {
+      "epoch": 0.5113636363636364,
+      "grad_norm": 235527.875,
+      "learning_rate": 4.360795454545455e-05,
+      "loss": 0.8106,
+      "step": 180
+    },
+    {
+      "epoch": 0.5397727272727273,
+      "grad_norm": 234210.1875,
+      "learning_rate": 4.3252840909090914e-05,
+      "loss": 0.795,
+      "step": 190
+    },
+    {
+      "epoch": 0.5681818181818182,
+      "grad_norm": 182797.875,
+      "learning_rate": 4.289772727272727e-05,
+      "loss": 0.7926,
+      "step": 200
+    },
+    {
+      "epoch": 0.5965909090909091,
+      "grad_norm": 324642.5,
+      "learning_rate": 4.254261363636364e-05,
+      "loss": 0.778,
+      "step": 210
+    },
+    {
+      "epoch": 0.625,
+      "grad_norm": 359272.71875,
+      "learning_rate": 4.21875e-05,
+      "loss": 0.7829,
+      "step": 220
+    },
+    {
+      "epoch": 0.6534090909090909,
+      "grad_norm": 279676.875,
+      "learning_rate": 4.1832386363636365e-05,
+      "loss": 0.8244,
+      "step": 230
+    },
+    {
+      "epoch": 0.6818181818181818,
+      "grad_norm": 259783.71875,
+      "learning_rate": 4.1477272727272734e-05,
+      "loss": 0.7465,
+      "step": 240
+    },
+    {
+      "epoch": 0.7102272727272727,
+      "grad_norm": 184817.609375,
+      "learning_rate": 4.112215909090909e-05,
+      "loss": 0.7447,
+      "step": 250
+    },
+    {
+      "epoch": 0.7386363636363636,
+      "grad_norm": 221672.1875,
+      "learning_rate": 4.076704545454545e-05,
+      "loss": 0.8206,
+      "step": 260
+    },
+    {
+      "epoch": 0.7670454545454546,
+      "grad_norm": 251710.0,
+      "learning_rate": 4.041193181818182e-05,
+      "loss": 0.8222,
+      "step": 270
+    },
+    {
+      "epoch": 0.7954545454545454,
+      "grad_norm": 287394.75,
+      "learning_rate": 4.0056818181818185e-05,
+      "loss": 0.8751,
+      "step": 280
+    },
+    {
+      "epoch": 0.8238636363636364,
+      "grad_norm": 261405.84375,
+      "learning_rate": 3.970170454545455e-05,
+      "loss": 0.8049,
+      "step": 290
+    },
+    {
+      "epoch": 0.8522727272727273,
+      "grad_norm": 339216.5,
+      "learning_rate": 3.934659090909091e-05,
+      "loss": 0.7734,
+      "step": 300
+    },
+    {
+      "epoch": 0.8806818181818182,
+      "grad_norm": 253168.921875,
+      "learning_rate": 3.899147727272727e-05,
+      "loss": 0.7916,
+      "step": 310
+    },
+    {
+      "epoch": 0.9090909090909091,
+      "grad_norm": 243938.09375,
+      "learning_rate": 3.8636363636363636e-05,
+      "loss": 0.8075,
+      "step": 320
+    },
+    {
+      "epoch": 0.9375,
+      "grad_norm": 224975.296875,
+      "learning_rate": 3.828125e-05,
+      "loss": 0.724,
+      "step": 330
+    },
+    {
+      "epoch": 0.9659090909090909,
+      "grad_norm": 314409.71875,
+      "learning_rate": 3.792613636363637e-05,
+      "loss": 0.86,
+      "step": 340
+    },
+    {
+      "epoch": 0.9943181818181818,
+      "grad_norm": 254573.59375,
+      "learning_rate": 3.757102272727273e-05,
+      "loss": 0.7882,
+      "step": 350
+    },
+    {
+      "epoch": 1.0227272727272727,
+      "grad_norm": 239098.109375,
+      "learning_rate": 3.721590909090909e-05,
+      "loss": 0.5987,
+      "step": 360
+    },
+    {
+      "epoch": 1.0511363636363635,
+      "grad_norm": 212271.015625,
+      "learning_rate": 3.6860795454545456e-05,
+      "loss": 0.5594,
+      "step": 370
+    },
+    {
+      "epoch": 1.0795454545454546,
+      "grad_norm": 258443.203125,
+      "learning_rate": 3.650568181818182e-05,
+      "loss": 0.5778,
+      "step": 380
+    },
+    {
+      "epoch": 1.1079545454545454,
+      "grad_norm": 251415.8125,
+      "learning_rate": 3.615056818181818e-05,
+      "loss": 0.5707,
+      "step": 390
+    },
+    {
+      "epoch": 1.1363636363636362,
+      "grad_norm": 191828.046875,
+      "learning_rate": 3.579545454545455e-05,
+      "loss": 0.6151,
+      "step": 400
+    },
+    {
+      "epoch": 1.1363636363636362,
+      "eval_f1": 0.7879138483446066,
+      "eval_loss": 0.5355119705200195,
+      "eval_runtime": 204.0556,
+      "eval_samples_per_second": 55.063,
+      "eval_steps_per_second": 3.445,
+      "step": 400
+    },
+    {
+      "epoch": 1.1647727272727273,
+      "grad_norm": 233159.53125,
+      "learning_rate": 3.5440340909090914e-05,
+      "loss": 0.548,
+      "step": 410
+    },
+    {
+      "epoch": 1.1931818181818181,
+      "grad_norm": 206000.609375,
+      "learning_rate": 3.508522727272727e-05,
+      "loss": 0.5118,
+      "step": 420
+    },
+    {
+      "epoch": 1.2215909090909092,
+      "grad_norm": 262176.0625,
+      "learning_rate": 3.473011363636364e-05,
+      "loss": 0.5221,
+      "step": 430
+    },
+    {
+      "epoch": 1.25,
+      "grad_norm": 225265.671875,
+      "learning_rate": 3.4375e-05,
+      "loss": 0.5489,
+      "step": 440
+    },
+    {
+      "epoch": 1.2784090909090908,
+      "grad_norm": 261512.140625,
+      "learning_rate": 3.4019886363636365e-05,
+      "loss": 0.5682,
+      "step": 450
+    },
+    {
+      "epoch": 1.3068181818181819,
+      "grad_norm": 336397.46875,
+      "learning_rate": 3.3664772727272734e-05,
+      "loss": 0.5585,
+      "step": 460
+    },
+    {
+      "epoch": 1.3352272727272727,
+      "grad_norm": 253634.796875,
+      "learning_rate": 3.330965909090909e-05,
+      "loss": 0.5239,
+      "step": 470
+    },
+    {
+      "epoch": 1.3636363636363638,
+      "grad_norm": 253387.1875,
+      "learning_rate": 3.295454545454545e-05,
+      "loss": 0.5411,
+      "step": 480
+    },
+    {
+      "epoch": 1.3920454545454546,
+      "grad_norm": 175611.75,
+      "learning_rate": 3.259943181818182e-05,
+      "loss": 0.4704,
+      "step": 490
+    },
+    {
+      "epoch": 1.4204545454545454,
+      "grad_norm": 210382.125,
+      "learning_rate": 3.2244318181818185e-05,
+      "loss": 0.4668,
+      "step": 500
+    },
+    {
+      "epoch": 1.4488636363636362,
+      "grad_norm": 207340.484375,
+      "learning_rate": 3.188920454545455e-05,
+      "loss": 0.5243,
+      "step": 510
+    },
+    {
+      "epoch": 1.4772727272727273,
+      "grad_norm": 211227.53125,
+      "learning_rate": 3.153409090909091e-05,
+      "loss": 0.5158,
+      "step": 520
+    },
+    {
+      "epoch": 1.5056818181818183,
+      "grad_norm": 263875.125,
+      "learning_rate": 3.117897727272727e-05,
+      "loss": 0.5264,
+      "step": 530
+    },
+    {
+      "epoch": 1.5340909090909092,
+      "grad_norm": 250973.984375,
+      "learning_rate": 3.0823863636363636e-05,
+      "loss": 0.4892,
+      "step": 540
+    },
+    {
+      "epoch": 1.5625,
+      "grad_norm": 210192.90625,
+      "learning_rate": 3.0468750000000002e-05,
+      "loss": 0.565,
+      "step": 550
+    },
+    {
+      "epoch": 1.5909090909090908,
+      "grad_norm": 277090.34375,
+      "learning_rate": 3.0113636363636365e-05,
+      "loss": 0.5501,
+      "step": 560
+    },
+    {
+      "epoch": 1.6193181818181817,
+      "grad_norm": 262420.625,
+      "learning_rate": 2.975852272727273e-05,
+      "loss": 0.4802,
+      "step": 570
+    },
+    {
+      "epoch": 1.6477272727272727,
+      "grad_norm": 247244.59375,
+      "learning_rate": 2.940340909090909e-05,
+      "loss": 0.4778,
+      "step": 580
+    },
+    {
+      "epoch": 1.6761363636363638,
+      "grad_norm": 238716.140625,
+      "learning_rate": 2.9048295454545453e-05,
+      "loss": 0.4998,
+      "step": 590
+    },
+    {
+      "epoch": 1.7045454545454546,
+      "grad_norm": 288676.875,
+      "learning_rate": 2.869318181818182e-05,
+      "loss": 0.4763,
+      "step": 600
+    },
+    {
+      "epoch": 1.7329545454545454,
+      "grad_norm": 254478.03125,
+      "learning_rate": 2.8338068181818185e-05,
+      "loss": 0.4912,
+      "step": 610
+    },
+    {
+      "epoch": 1.7613636363636362,
+      "grad_norm": 295674.3125,
+      "learning_rate": 2.7982954545454548e-05,
+      "loss": 0.4892,
+      "step": 620
+    },
+    {
+      "epoch": 1.7897727272727273,
+      "grad_norm": 279737.21875,
+      "learning_rate": 2.7627840909090914e-05,
+      "loss": 0.4677,
+      "step": 630
+    },
+    {
+      "epoch": 1.8181818181818183,
+      "grad_norm": 325599.34375,
+      "learning_rate": 2.7272727272727273e-05,
+      "loss": 0.4977,
+      "step": 640
+    },
+    {
+      "epoch": 1.8465909090909092,
+      "grad_norm": 303249.375,
+      "learning_rate": 2.6917613636363636e-05,
+      "loss": 0.5212,
+      "step": 650
+    },
+    {
+      "epoch": 1.875,
+      "grad_norm": 269595.21875,
+      "learning_rate": 2.6562500000000002e-05,
+      "loss": 0.5283,
+      "step": 660
+    },
+    {
+      "epoch": 1.9034090909090908,
+      "grad_norm": 274965.3125,
+      "learning_rate": 2.6207386363636365e-05,
+      "loss": 0.5194,
+      "step": 670
+    },
+    {
+      "epoch": 1.9318181818181817,
+      "grad_norm": 250650.328125,
+      "learning_rate": 2.585227272727273e-05,
+      "loss": 0.5274,
+      "step": 680
+    },
+    {
+      "epoch": 1.9602272727272727,
+      "grad_norm": 232058.15625,
+      "learning_rate": 2.549715909090909e-05,
+      "loss": 0.5002,
+      "step": 690
+    },
+    {
+      "epoch": 1.9886363636363638,
+      "grad_norm": 251402.0,
+      "learning_rate": 2.5142045454545453e-05,
+      "loss": 0.4618,
+      "step": 700
+    },
+    {
+      "epoch": 2.0170454545454546,
+      "grad_norm": 192832.578125,
+      "learning_rate": 2.478693181818182e-05,
+      "loss": 0.3425,
+      "step": 710
+    },
+    {
+      "epoch": 2.0454545454545454,
+      "grad_norm": 200086.390625,
+      "learning_rate": 2.4431818181818185e-05,
+      "loss": 0.2832,
+      "step": 720
+    },
+    {
+      "epoch": 2.0738636363636362,
+      "grad_norm": 162459.609375,
+      "learning_rate": 2.4076704545454544e-05,
+      "loss": 0.2102,
+      "step": 730
+    },
+    {
+      "epoch": 2.102272727272727,
+      "grad_norm": 132360.765625,
+      "learning_rate": 2.372159090909091e-05,
+      "loss": 0.2097,
+      "step": 740
+    },
+    {
+      "epoch": 2.1306818181818183,
+      "grad_norm": 146930.046875,
+      "learning_rate": 2.3366477272727273e-05,
+      "loss": 0.1884,
+      "step": 750
+    },
+    {
+      "epoch": 2.159090909090909,
+      "grad_norm": 246238.796875,
+      "learning_rate": 2.3011363636363636e-05,
+      "loss": 0.1969,
+      "step": 760
+    },
+    {
+      "epoch": 2.1875,
+      "grad_norm": 232657.203125,
+      "learning_rate": 2.2656250000000002e-05,
+      "loss": 0.1925,
+      "step": 770
+    },
+    {
+      "epoch": 2.215909090909091,
+      "grad_norm": 227103.3125,
+      "learning_rate": 2.2301136363636365e-05,
+      "loss": 0.1851,
+      "step": 780
+    },
+    {
+      "epoch": 2.2443181818181817,
+      "grad_norm": 171326.71875,
+      "learning_rate": 2.1946022727272727e-05,
+      "loss": 0.2253,
+      "step": 790
+    },
+    {
+      "epoch": 2.2727272727272725,
+      "grad_norm": 121495.1953125,
+      "learning_rate": 2.1590909090909093e-05,
+      "loss": 0.1867,
+      "step": 800
+    },
+    {
+      "epoch": 2.2727272727272725,
+      "eval_f1": 0.9550551797792809,
+      "eval_loss": 0.17148956656455994,
+      "eval_runtime": 203.949,
+      "eval_samples_per_second": 55.092,
+      "eval_steps_per_second": 3.447,
+      "step": 800
+    },
+    {
+      "epoch": 2.3011363636363638,
+      "grad_norm": 238023.546875,
+      "learning_rate": 2.1235795454545456e-05,
+      "loss": 0.2143,
+      "step": 810
+    },
+    {
+      "epoch": 2.3295454545454546,
+      "grad_norm": 215472.78125,
+      "learning_rate": 2.088068181818182e-05,
+      "loss": 0.1681,
+      "step": 820
+    },
+    {
+      "epoch": 2.3579545454545454,
+      "grad_norm": 185951.046875,
+      "learning_rate": 2.0525568181818185e-05,
+      "loss": 0.2,
+      "step": 830
+    },
+    {
+      "epoch": 2.3863636363636362,
+      "grad_norm": 288287.34375,
+      "learning_rate": 2.0170454545454544e-05,
+      "loss": 0.1899,
+      "step": 840
+    },
+    {
+      "epoch": 2.4147727272727275,
+      "grad_norm": 184342.796875,
+      "learning_rate": 1.981534090909091e-05,
+      "loss": 0.1898,
+      "step": 850
+    },
+    {
+      "epoch": 2.4431818181818183,
+      "grad_norm": 143657.375,
+      "learning_rate": 1.9460227272727273e-05,
+      "loss": 0.1707,
+      "step": 860
+    },
+    {
+      "epoch": 2.471590909090909,
+      "grad_norm": 142439.578125,
+      "learning_rate": 1.9105113636363636e-05,
+      "loss": 0.1505,
+      "step": 870
+    },
+    {
+      "epoch": 2.5,
+      "grad_norm": 255553.71875,
+      "learning_rate": 1.8750000000000002e-05,
+      "loss": 0.2047,
+      "step": 880
+    },
+    {
+      "epoch": 2.528409090909091,
+      "grad_norm": 217335.078125,
+      "learning_rate": 1.8394886363636364e-05,
+      "loss": 0.18,
+      "step": 890
+    },
+    {
+      "epoch": 2.5568181818181817,
+      "grad_norm": 143375.3125,
+      "learning_rate": 1.8039772727272727e-05,
+      "loss": 0.2372,
+      "step": 900
+    },
+    {
+      "epoch": 2.5852272727272725,
+      "grad_norm": 325331.0625,
+      "learning_rate": 1.7684659090909093e-05,
+      "loss": 0.2047,
+      "step": 910
+    },
+    {
+      "epoch": 2.6136363636363638,
+      "grad_norm": 160601.78125,
+      "learning_rate": 1.7329545454545456e-05,
+      "loss": 0.1999,
+      "step": 920
+    },
+    {
+      "epoch": 2.6420454545454546,
+      "grad_norm": 114873.859375,
+      "learning_rate": 1.697443181818182e-05,
+      "loss": 0.1736,
+      "step": 930
+    },
+    {
+      "epoch": 2.6704545454545454,
+      "grad_norm": 191060.78125,
+      "learning_rate": 1.6619318181818185e-05,
+      "loss": 0.1809,
+      "step": 940
+    },
+    {
+      "epoch": 2.6988636363636362,
+      "grad_norm": 303838.96875,
+      "learning_rate": 1.6264204545454544e-05,
+      "loss": 0.238,
+      "step": 950
+    },
+    {
+      "epoch": 2.7272727272727275,
+      "grad_norm": 92415.265625,
+      "learning_rate": 1.590909090909091e-05,
+      "loss": 0.137,
+      "step": 960
+    },
+    {
+      "epoch": 2.7556818181818183,
+      "grad_norm": 227939.296875,
+      "learning_rate": 1.5553977272727273e-05,
+      "loss": 0.1811,
+      "step": 970
+    },
+    {
+      "epoch": 2.784090909090909,
+      "grad_norm": 244860.359375,
+      "learning_rate": 1.5198863636363636e-05,
+      "loss": 0.2235,
+      "step": 980
+    },
+    {
+      "epoch": 2.8125,
+      "grad_norm": 199524.078125,
+      "learning_rate": 1.484375e-05,
+      "loss": 0.1885,
+      "step": 990
+    },
+    {
+      "epoch": 2.840909090909091,
+      "grad_norm": 245456.046875,
+      "learning_rate": 1.4488636363636366e-05,
+      "loss": 0.2261,
+      "step": 1000
+    },
+    {
+      "epoch": 2.8693181818181817,
+      "grad_norm": 291130.96875,
+      "learning_rate": 1.4133522727272727e-05,
+      "loss": 0.1767,
+      "step": 1010
+    },
+    {
+      "epoch": 2.8977272727272725,
+      "grad_norm": 119223.3046875,
+      "learning_rate": 1.3778409090909091e-05,
+      "loss": 0.1589,
+      "step": 1020
+    },
+    {
+      "epoch": 2.9261363636363638,
+      "grad_norm": 205424.078125,
+      "learning_rate": 1.3423295454545456e-05,
+      "loss": 0.1666,
+      "step": 1030
+    },
+    {
+      "epoch": 2.9545454545454546,
+      "grad_norm": 177895.84375,
+      "learning_rate": 1.3068181818181819e-05,
+      "loss": 0.1572,
+      "step": 1040
+    },
+    {
+      "epoch": 2.9829545454545454,
+      "grad_norm": 337598.78125,
+      "learning_rate": 1.2713068181818183e-05,
+      "loss": 0.1938,
+      "step": 1050
+    },
+    {
+      "epoch": 3.0113636363636362,
+      "grad_norm": 173000.0,
+      "learning_rate": 1.2357954545454546e-05,
+      "loss": 0.1126,
+      "step": 1060
+    },
+    {
+      "epoch": 3.039772727272727,
+      "grad_norm": 97144.171875,
+      "learning_rate": 1.200284090909091e-05,
+      "loss": 0.0462,
+      "step": 1070
+    },
+    {
+      "epoch": 3.0681818181818183,
+      "grad_norm": 54899.234375,
+      "learning_rate": 1.1647727272727273e-05,
+      "loss": 0.0615,
+      "step": 1080
+    },
+    {
+      "epoch": 3.096590909090909,
+      "grad_norm": 36492.046875,
+      "learning_rate": 1.1292613636363637e-05,
+      "loss": 0.0491,
+      "step": 1090
+    },
+    {
+      "epoch": 3.125,
+      "grad_norm": 37996.1953125,
+      "learning_rate": 1.09375e-05,
+      "loss": 0.0562,
+      "step": 1100
+    },
+    {
+      "epoch": 3.153409090909091,
+      "grad_norm": 190393.703125,
+      "learning_rate": 1.0582386363636364e-05,
+      "loss": 0.054,
+      "step": 1110
+    },
+    {
+      "epoch": 3.1818181818181817,
+      "grad_norm": 179904.40625,
+      "learning_rate": 1.0227272727272729e-05,
+      "loss": 0.0728,
+      "step": 1120
+    },
+    {
+      "epoch": 3.210227272727273,
+      "grad_norm": 100628.515625,
+      "learning_rate": 9.872159090909091e-06,
+      "loss": 0.0625,
+      "step": 1130
+    },
+    {
+      "epoch": 3.2386363636363638,
+      "grad_norm": 118374.3984375,
+      "learning_rate": 9.517045454545454e-06,
+      "loss": 0.0569,
+      "step": 1140
+    },
+    {
+      "epoch": 3.2670454545454546,
+      "grad_norm": 75175.8359375,
+      "learning_rate": 9.161931818181818e-06,
+      "loss": 0.0436,
+      "step": 1150
+    },
+    {
+      "epoch": 3.2954545454545454,
+      "grad_norm": 158238.78125,
+      "learning_rate": 8.806818181818183e-06,
+      "loss": 0.079,
+      "step": 1160
+    },
+    {
+      "epoch": 3.3238636363636362,
+      "grad_norm": 68349.515625,
+      "learning_rate": 8.451704545454546e-06,
+      "loss": 0.056,
+      "step": 1170
+    },
+    {
+      "epoch": 3.3522727272727275,
+      "grad_norm": 43816.8671875,
+      "learning_rate": 8.09659090909091e-06,
+      "loss": 0.0443,
+      "step": 1180
+    },
+    {
+      "epoch": 3.3806818181818183,
+      "grad_norm": 61632.68359375,
+      "learning_rate": 7.741477272727273e-06,
+      "loss": 0.0554,
+      "step": 1190
+    },
+    {
+      "epoch": 3.409090909090909,
+      "grad_norm": 60831.44140625,
+      "learning_rate": 7.386363636363637e-06,
+      "loss": 0.0871,
+      "step": 1200
+    },
+    {
+      "epoch": 3.409090909090909,
+      "eval_f1": 0.9917230331078676,
+      "eval_loss": 0.0417679101228714,
+      "eval_runtime": 204.1321,
+      "eval_samples_per_second": 55.043,
+      "eval_steps_per_second": 3.444,
+      "step": 1200
+    },
+    {
+      "epoch": 3.4375,
+      "grad_norm": 90207.28125,
+      "learning_rate": 7.031250000000001e-06,
+      "loss": 0.0676,
+      "step": 1210
+    },
+    {
+      "epoch": 3.465909090909091,
+      "grad_norm": 63487.5546875,
+      "learning_rate": 6.676136363636363e-06,
+      "loss": 0.0346,
+      "step": 1220
+    },
+    {
+      "epoch": 3.4943181818181817,
+      "grad_norm": 83902.515625,
+      "learning_rate": 6.321022727272729e-06,
+      "loss": 0.0587,
+      "step": 1230
+    },
+    {
+      "epoch": 3.5227272727272725,
+      "grad_norm": 26082.44921875,
+      "learning_rate": 5.965909090909091e-06,
+      "loss": 0.0385,
+      "step": 1240
+    },
+    {
+      "epoch": 3.5511363636363638,
+      "grad_norm": 71738.4140625,
+      "learning_rate": 5.610795454545455e-06,
+      "loss": 0.0497,
+      "step": 1250
+    },
+    {
+      "epoch": 3.5795454545454546,
+      "grad_norm": 115759.3671875,
+      "learning_rate": 5.255681818181818e-06,
+      "loss": 0.0679,
+      "step": 1260
+    },
+    {
+      "epoch": 3.6079545454545454,
+      "grad_norm": 49416.74609375,
+      "learning_rate": 4.900568181818182e-06,
+      "loss": 0.0565,
+      "step": 1270
+    },
+    {
+      "epoch": 3.6363636363636362,
+      "grad_norm": 164339.484375,
+      "learning_rate": 4.5454545454545455e-06,
+      "loss": 0.0374,
+      "step": 1280
+    },
+    {
+      "epoch": 3.6647727272727275,
+      "grad_norm": 74746.796875,
+      "learning_rate": 4.190340909090909e-06,
+      "loss": 0.0382,
+      "step": 1290
+    },
+    {
+      "epoch": 3.6931818181818183,
+      "grad_norm": 29929.04296875,
+      "learning_rate": 3.835227272727273e-06,
+      "loss": 0.039,
+      "step": 1300
+    },
+    {
+      "epoch": 3.721590909090909,
+      "grad_norm": 59106.06640625,
+      "learning_rate": 3.480113636363636e-06,
+      "loss": 0.0376,
+      "step": 1310
+    },
+    {
+      "epoch": 3.75,
+      "grad_norm": 187797.71875,
+      "learning_rate": 3.125e-06,
+      "loss": 0.056,
+      "step": 1320
+    },
+    {
+      "epoch": 3.778409090909091,
+      "grad_norm": 42829.46875,
+      "learning_rate": 2.7698863636363637e-06,
+      "loss": 0.0434,
+      "step": 1330
+    },
+    {
+      "epoch": 3.8068181818181817,
+      "grad_norm": 252679.109375,
+      "learning_rate": 2.4147727272727273e-06,
+      "loss": 0.0502,
+      "step": 1340
+    },
+    {
+      "epoch": 3.8352272727272725,
+      "grad_norm": 35090.86328125,
+      "learning_rate": 2.059659090909091e-06,
+      "loss": 0.0686,
+      "step": 1350
+    },
+    {
+      "epoch": 3.8636363636363638,
+      "grad_norm": 287442.9375,
+      "learning_rate": 1.7045454545454546e-06,
+      "loss": 0.0579,
+      "step": 1360
+    },
+    {
+      "epoch": 3.8920454545454546,
+      "grad_norm": 241179.890625,
+      "learning_rate": 1.3494318181818183e-06,
+      "loss": 0.065,
+      "step": 1370
+    },
+    {
+      "epoch": 3.9204545454545454,
+      "grad_norm": 20388.59765625,
+      "learning_rate": 9.943181818181819e-07,
+      "loss": 0.0281,
+      "step": 1380
+    },
+    {
+      "epoch": 3.9488636363636362,
+      "grad_norm": 44893.046875,
+      "learning_rate": 6.392045454545455e-07,
+      "loss": 0.0297,
+      "step": 1390
+    },
+    {
+      "epoch": 3.9772727272727275,
+      "grad_norm": 30813.0546875,
+      "learning_rate": 2.840909090909091e-07,
+      "loss": 0.048,
+      "step": 1400
+    },
+    {
+      "epoch": 4.0,
+      "step": 1408,
+      "total_flos": 3.4828624117074493e+18,
+      "train_loss": 0.40373469023457303,
+      "train_runtime": 1995.1511,
+      "train_samples_per_second": 22.527,
+      "train_steps_per_second": 0.706
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 1408,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 4,
+  "save_steps": 400,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 3.4828624117074493e+18,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": null
+}

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:93c94b1e2af28ebd0231de3ea12cc2305ae0723ff20e56cce8aa6dee01ecbdfc
 size 5240

 version https://git-lfs.github.com/spec/v1
+oid sha256:0b584c7abff93919a16bdd8a77a0bf32e568b3b8bfccee1f93593599a2675fc3
 size 5240