|
{ |
|
"best_metric": 0.19156721234321594, |
|
"best_model_checkpoint": "./ryan03312024_lr_2e-5_wd_001/checkpoint-3100", |
|
"epoch": 1.5001803101334295, |
|
"eval_steps": 100, |
|
"global_step": 4160, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.6417293548583984, |
|
"learning_rate": 1.9879807692307693e-05, |
|
"loss": 0.5741, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 1.4069948196411133, |
|
"learning_rate": 1.975961538461539e-05, |
|
"loss": 0.4638, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 1.5593534708023071, |
|
"learning_rate": 1.963942307692308e-05, |
|
"loss": 0.4114, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.6675819158554077, |
|
"learning_rate": 1.9519230769230772e-05, |
|
"loss": 0.4436, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_loss": 0.3697698712348938, |
|
"eval_na_accuracy": 0.7989690899848938, |
|
"eval_ordinal_accuracy": 0.3331620991230011, |
|
"eval_ordinal_mae": 0.8705630302429199, |
|
"eval_runtime": 346.2992, |
|
"eval_samples_per_second": 12.922, |
|
"eval_steps_per_second": 1.617, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.6941640377044678, |
|
"learning_rate": 1.9399038461538464e-05, |
|
"loss": 0.3901, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.7081687450408936, |
|
"learning_rate": 1.9278846153846155e-05, |
|
"loss": 0.366, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.9151293635368347, |
|
"learning_rate": 1.9158653846153847e-05, |
|
"loss": 0.3773, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.5204830169677734, |
|
"learning_rate": 1.903846153846154e-05, |
|
"loss": 0.3143, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_loss": 0.3215162754058838, |
|
"eval_na_accuracy": 0.8092783689498901, |
|
"eval_ordinal_accuracy": 0.4017467200756073, |
|
"eval_ordinal_mae": 0.855476975440979, |
|
"eval_runtime": 209.7588, |
|
"eval_samples_per_second": 21.334, |
|
"eval_steps_per_second": 2.67, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 1.357649803161621, |
|
"learning_rate": 1.8918269230769234e-05, |
|
"loss": 0.3559, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.7635074257850647, |
|
"learning_rate": 1.8798076923076926e-05, |
|
"loss": 0.3616, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 2.4480478763580322, |
|
"learning_rate": 1.8677884615384617e-05, |
|
"loss": 0.3209, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.8215653300285339, |
|
"learning_rate": 1.855769230769231e-05, |
|
"loss": 0.3385, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_loss": 0.2996984124183655, |
|
"eval_na_accuracy": 0.8591065406799316, |
|
"eval_ordinal_accuracy": 0.44849729537963867, |
|
"eval_ordinal_mae": 0.8302922248840332, |
|
"eval_runtime": 193.7095, |
|
"eval_samples_per_second": 23.102, |
|
"eval_steps_per_second": 2.891, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.48835399746894836, |
|
"learning_rate": 1.84375e-05, |
|
"loss": 0.2976, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.6756967306137085, |
|
"learning_rate": 1.8317307692307693e-05, |
|
"loss": 0.3043, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.20951023697853088, |
|
"learning_rate": 1.8197115384615388e-05, |
|
"loss": 0.3046, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.9027990102767944, |
|
"learning_rate": 1.807692307692308e-05, |
|
"loss": 0.3127, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_loss": 0.28894639015197754, |
|
"eval_na_accuracy": 0.8745704293251038, |
|
"eval_ordinal_accuracy": 0.4880554974079132, |
|
"eval_ordinal_mae": 0.8012504577636719, |
|
"eval_runtime": 195.2283, |
|
"eval_samples_per_second": 22.922, |
|
"eval_steps_per_second": 2.868, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 1.5711051225662231, |
|
"learning_rate": 1.795673076923077e-05, |
|
"loss": 0.3094, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 2.6442267894744873, |
|
"learning_rate": 1.7836538461538463e-05, |
|
"loss": 0.2979, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 1.7498008012771606, |
|
"learning_rate": 1.7716346153846155e-05, |
|
"loss": 0.3193, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 1.4141407012939453, |
|
"learning_rate": 1.7596153846153846e-05, |
|
"loss": 0.3054, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_loss": 0.28038087487220764, |
|
"eval_na_accuracy": 0.8780068755149841, |
|
"eval_ordinal_accuracy": 0.5324942469596863, |
|
"eval_ordinal_mae": 0.7619425058364868, |
|
"eval_runtime": 189.7487, |
|
"eval_samples_per_second": 23.584, |
|
"eval_steps_per_second": 2.951, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 1.3073471784591675, |
|
"learning_rate": 1.7475961538461538e-05, |
|
"loss": 0.302, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 1.1622358560562134, |
|
"learning_rate": 1.7355769230769233e-05, |
|
"loss": 0.2844, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.3688335716724396, |
|
"learning_rate": 1.7235576923076925e-05, |
|
"loss": 0.2745, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 1.1387437582015991, |
|
"learning_rate": 1.7115384615384617e-05, |
|
"loss": 0.3051, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_loss": 0.27521631121635437, |
|
"eval_na_accuracy": 0.9158075451850891, |
|
"eval_ordinal_accuracy": 0.5235037207603455, |
|
"eval_ordinal_mae": 0.7215057015419006, |
|
"eval_runtime": 192.1403, |
|
"eval_samples_per_second": 23.29, |
|
"eval_steps_per_second": 2.915, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.613787055015564, |
|
"learning_rate": 1.699519230769231e-05, |
|
"loss": 0.3017, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.595897376537323, |
|
"learning_rate": 1.6875e-05, |
|
"loss": 0.2806, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 1.4324086904525757, |
|
"learning_rate": 1.6754807692307692e-05, |
|
"loss": 0.256, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 3.7929930686950684, |
|
"learning_rate": 1.6634615384615387e-05, |
|
"loss": 0.2833, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_loss": 0.26531103253364563, |
|
"eval_na_accuracy": 0.8969072103500366, |
|
"eval_ordinal_accuracy": 0.5486770868301392, |
|
"eval_ordinal_mae": 0.6806999444961548, |
|
"eval_runtime": 191.9358, |
|
"eval_samples_per_second": 23.315, |
|
"eval_steps_per_second": 2.918, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.27854958176612854, |
|
"learning_rate": 1.651442307692308e-05, |
|
"loss": 0.2805, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 1.8652201890945435, |
|
"learning_rate": 1.6399038461538462e-05, |
|
"loss": 0.2737, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 2.1118507385253906, |
|
"learning_rate": 1.6278846153846154e-05, |
|
"loss": 0.2611, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 2.6029460430145264, |
|
"learning_rate": 1.6158653846153845e-05, |
|
"loss": 0.2907, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_loss": 0.255000501871109, |
|
"eval_na_accuracy": 0.8350515365600586, |
|
"eval_ordinal_accuracy": 0.5617775321006775, |
|
"eval_ordinal_mae": 0.6431577205657959, |
|
"eval_runtime": 190.5444, |
|
"eval_samples_per_second": 23.485, |
|
"eval_steps_per_second": 2.939, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 1.63986337184906, |
|
"learning_rate": 1.603846153846154e-05, |
|
"loss": 0.2661, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.8340407013893127, |
|
"learning_rate": 1.5918269230769232e-05, |
|
"loss": 0.2513, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 1.321059226989746, |
|
"learning_rate": 1.5798076923076924e-05, |
|
"loss": 0.2676, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 2.41912841796875, |
|
"learning_rate": 1.567788461538462e-05, |
|
"loss": 0.2468, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_loss": 0.2521895170211792, |
|
"eval_na_accuracy": 0.8058419227600098, |
|
"eval_ordinal_accuracy": 0.5972257852554321, |
|
"eval_ordinal_mae": 0.6118690967559814, |
|
"eval_runtime": 197.239, |
|
"eval_samples_per_second": 22.688, |
|
"eval_steps_per_second": 2.839, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.9854594469070435, |
|
"learning_rate": 1.555769230769231e-05, |
|
"loss": 0.2727, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 5.904122352600098, |
|
"learning_rate": 1.54375e-05, |
|
"loss": 0.3027, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 1.8180320262908936, |
|
"learning_rate": 1.5317307692307694e-05, |
|
"loss": 0.2465, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.5673078894615173, |
|
"learning_rate": 1.5197115384615386e-05, |
|
"loss": 0.2199, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_loss": 0.24374203383922577, |
|
"eval_na_accuracy": 0.8127147555351257, |
|
"eval_ordinal_accuracy": 0.606216311454773, |
|
"eval_ordinal_mae": 0.6023499965667725, |
|
"eval_runtime": 192.8009, |
|
"eval_samples_per_second": 23.21, |
|
"eval_steps_per_second": 2.905, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.8171074390411377, |
|
"learning_rate": 1.5076923076923078e-05, |
|
"loss": 0.221, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.881005048751831, |
|
"learning_rate": 1.495673076923077e-05, |
|
"loss": 0.235, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 2.021958351135254, |
|
"learning_rate": 1.4836538461538463e-05, |
|
"loss": 0.2841, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 1.8785498142242432, |
|
"learning_rate": 1.4716346153846155e-05, |
|
"loss": 0.2219, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_loss": 0.23609140515327454, |
|
"eval_na_accuracy": 0.9037800431251526, |
|
"eval_ordinal_accuracy": 0.595941424369812, |
|
"eval_ordinal_mae": 0.5573533177375793, |
|
"eval_runtime": 193.7176, |
|
"eval_samples_per_second": 23.101, |
|
"eval_steps_per_second": 2.891, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.6871089935302734, |
|
"learning_rate": 1.4596153846153846e-05, |
|
"loss": 0.201, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.3621855080127716, |
|
"learning_rate": 1.447596153846154e-05, |
|
"loss": 0.2119, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 1.404956340789795, |
|
"learning_rate": 1.4355769230769232e-05, |
|
"loss": 0.2566, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.8949152231216431, |
|
"learning_rate": 1.4235576923076923e-05, |
|
"loss": 0.2071, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_loss": 0.23867186903953552, |
|
"eval_na_accuracy": 0.7714776396751404, |
|
"eval_ordinal_accuracy": 0.6175186038017273, |
|
"eval_ordinal_mae": 0.5438615679740906, |
|
"eval_runtime": 196.3391, |
|
"eval_samples_per_second": 22.792, |
|
"eval_steps_per_second": 2.852, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 1.7730196714401245, |
|
"learning_rate": 1.4115384615384617e-05, |
|
"loss": 0.2756, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.7472477555274963, |
|
"learning_rate": 1.3995192307692308e-05, |
|
"loss": 0.2318, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 2.942986249923706, |
|
"learning_rate": 1.3875e-05, |
|
"loss": 0.2568, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 0.5726996064186096, |
|
"learning_rate": 1.3754807692307695e-05, |
|
"loss": 0.2214, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_loss": 0.2340591698884964, |
|
"eval_na_accuracy": 0.7955326437950134, |
|
"eval_ordinal_accuracy": 0.6231697797775269, |
|
"eval_ordinal_mae": 0.5256503224372864, |
|
"eval_runtime": 193.9402, |
|
"eval_samples_per_second": 23.074, |
|
"eval_steps_per_second": 2.887, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 0.94366455078125, |
|
"learning_rate": 1.3634615384615385e-05, |
|
"loss": 0.2146, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 3.612720251083374, |
|
"learning_rate": 1.3514423076923077e-05, |
|
"loss": 0.1979, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.878446638584137, |
|
"learning_rate": 1.3394230769230769e-05, |
|
"loss": 0.2233, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 1.1531257629394531, |
|
"learning_rate": 1.3274038461538464e-05, |
|
"loss": 0.2627, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_loss": 0.2315448820590973, |
|
"eval_na_accuracy": 0.7989690899848938, |
|
"eval_ordinal_accuracy": 0.6123812198638916, |
|
"eval_ordinal_mae": 0.5152010917663574, |
|
"eval_runtime": 196.8075, |
|
"eval_samples_per_second": 22.738, |
|
"eval_steps_per_second": 2.845, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 2.2066402435302734, |
|
"learning_rate": 1.3153846153846156e-05, |
|
"loss": 0.2415, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 1.4936281442642212, |
|
"learning_rate": 1.3033653846153846e-05, |
|
"loss": 0.2536, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 2.112527847290039, |
|
"learning_rate": 1.291346153846154e-05, |
|
"loss": 0.2105, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 1.8110361099243164, |
|
"learning_rate": 1.2793269230769233e-05, |
|
"loss": 0.2067, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"eval_loss": 0.22465108335018158, |
|
"eval_na_accuracy": 0.8109965920448303, |
|
"eval_ordinal_accuracy": 0.6396095752716064, |
|
"eval_ordinal_mae": 0.5025707483291626, |
|
"eval_runtime": 194.1538, |
|
"eval_samples_per_second": 23.049, |
|
"eval_steps_per_second": 2.884, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 0.8933520913124084, |
|
"learning_rate": 1.2673076923076924e-05, |
|
"loss": 0.1957, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 1.3568251132965088, |
|
"learning_rate": 1.2557692307692309e-05, |
|
"loss": 0.2286, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 2.600196123123169, |
|
"learning_rate": 1.24375e-05, |
|
"loss": 0.2292, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 1.2017379999160767, |
|
"learning_rate": 1.2317307692307694e-05, |
|
"loss": 0.2086, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_loss": 0.21920213103294373, |
|
"eval_na_accuracy": 0.8041236996650696, |
|
"eval_ordinal_accuracy": 0.6588749289512634, |
|
"eval_ordinal_mae": 0.49550649523735046, |
|
"eval_runtime": 189.1959, |
|
"eval_samples_per_second": 23.653, |
|
"eval_steps_per_second": 2.96, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 2.4263405799865723, |
|
"learning_rate": 1.2197115384615386e-05, |
|
"loss": 0.2384, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 2.1560556888580322, |
|
"learning_rate": 1.2076923076923078e-05, |
|
"loss": 0.2557, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.8151688575744629, |
|
"learning_rate": 1.1956730769230771e-05, |
|
"loss": 0.2623, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 2.8995933532714844, |
|
"learning_rate": 1.1836538461538463e-05, |
|
"loss": 0.1993, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_loss": 0.21818678081035614, |
|
"eval_na_accuracy": 0.8127147555351257, |
|
"eval_ordinal_accuracy": 0.6521962285041809, |
|
"eval_ordinal_mae": 0.47375088930130005, |
|
"eval_runtime": 196.7859, |
|
"eval_samples_per_second": 22.74, |
|
"eval_steps_per_second": 2.846, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 0.6673493385314941, |
|
"learning_rate": 1.1716346153846155e-05, |
|
"loss": 0.2627, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 4.143210411071777, |
|
"learning_rate": 1.1596153846153848e-05, |
|
"loss": 0.2294, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.9360339641571045, |
|
"learning_rate": 1.147596153846154e-05, |
|
"loss": 0.1854, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 4.869482040405273, |
|
"learning_rate": 1.1355769230769231e-05, |
|
"loss": 0.1962, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_loss": 0.22110989689826965, |
|
"eval_na_accuracy": 0.9140893220901489, |
|
"eval_ordinal_accuracy": 0.6231697797775269, |
|
"eval_ordinal_mae": 0.4857858419418335, |
|
"eval_runtime": 187.6432, |
|
"eval_samples_per_second": 23.848, |
|
"eval_steps_per_second": 2.984, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 5.728977203369141, |
|
"learning_rate": 1.1235576923076923e-05, |
|
"loss": 0.2294, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 0.5772213339805603, |
|
"learning_rate": 1.1115384615384617e-05, |
|
"loss": 0.1981, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 2.899949550628662, |
|
"learning_rate": 1.0995192307692308e-05, |
|
"loss": 0.1885, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 1.060448408126831, |
|
"learning_rate": 1.0875e-05, |
|
"loss": 0.1882, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"eval_loss": 0.20448338985443115, |
|
"eval_na_accuracy": 0.8625429272651672, |
|
"eval_ordinal_accuracy": 0.6632417440414429, |
|
"eval_ordinal_mae": 0.4668627381324768, |
|
"eval_runtime": 187.7227, |
|
"eval_samples_per_second": 23.838, |
|
"eval_steps_per_second": 2.983, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 8.596738815307617, |
|
"learning_rate": 1.0754807692307693e-05, |
|
"loss": 0.2388, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 2.922056198120117, |
|
"learning_rate": 1.0634615384615385e-05, |
|
"loss": 0.2172, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 2.6090290546417236, |
|
"learning_rate": 1.0514423076923077e-05, |
|
"loss": 0.2432, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 0.9129126667976379, |
|
"learning_rate": 1.039423076923077e-05, |
|
"loss": 0.1895, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_loss": 0.2081986963748932, |
|
"eval_na_accuracy": 0.8608247637748718, |
|
"eval_ordinal_accuracy": 0.6316465735435486, |
|
"eval_ordinal_mae": 0.46963009238243103, |
|
"eval_runtime": 177.2649, |
|
"eval_samples_per_second": 25.245, |
|
"eval_steps_per_second": 3.159, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 14.650406837463379, |
|
"learning_rate": 1.0274038461538462e-05, |
|
"loss": 0.2168, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 2.405910015106201, |
|
"learning_rate": 1.0153846153846154e-05, |
|
"loss": 0.224, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 1.4129964113235474, |
|
"learning_rate": 1.0033653846153847e-05, |
|
"loss": 0.1908, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 2.485114812850952, |
|
"learning_rate": 9.913461538461539e-06, |
|
"loss": 0.1979, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_loss": 0.22696280479431152, |
|
"eval_na_accuracy": 0.900343656539917, |
|
"eval_ordinal_accuracy": 0.6372976899147034, |
|
"eval_ordinal_mae": 0.4791434407234192, |
|
"eval_runtime": 187.006, |
|
"eval_samples_per_second": 23.93, |
|
"eval_steps_per_second": 2.995, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 1.7261921167373657, |
|
"learning_rate": 9.79326923076923e-06, |
|
"loss": 0.2169, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 5.685389995574951, |
|
"learning_rate": 9.673076923076924e-06, |
|
"loss": 0.2446, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 1.2241212129592896, |
|
"learning_rate": 9.552884615384616e-06, |
|
"loss": 0.2202, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 3.1511054039001465, |
|
"learning_rate": 9.432692307692308e-06, |
|
"loss": 0.2643, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_loss": 0.20690996944904327, |
|
"eval_na_accuracy": 0.8556700944900513, |
|
"eval_ordinal_accuracy": 0.6414076685905457, |
|
"eval_ordinal_mae": 0.46626007556915283, |
|
"eval_runtime": 194.378, |
|
"eval_samples_per_second": 23.022, |
|
"eval_steps_per_second": 2.881, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 2.382234811782837, |
|
"learning_rate": 9.312500000000001e-06, |
|
"loss": 0.1782, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 3.646544933319092, |
|
"learning_rate": 9.192307692307693e-06, |
|
"loss": 0.1901, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 3.5835981369018555, |
|
"learning_rate": 9.072115384615385e-06, |
|
"loss": 0.179, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 0.6391886472702026, |
|
"learning_rate": 8.951923076923078e-06, |
|
"loss": 0.2279, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"eval_loss": 0.2029835283756256, |
|
"eval_na_accuracy": 0.869415819644928, |
|
"eval_ordinal_accuracy": 0.654251217842102, |
|
"eval_ordinal_mae": 0.4581436216831207, |
|
"eval_runtime": 190.8034, |
|
"eval_samples_per_second": 23.453, |
|
"eval_steps_per_second": 2.935, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 1.8583753108978271, |
|
"learning_rate": 8.83173076923077e-06, |
|
"loss": 0.2403, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 7.337312698364258, |
|
"learning_rate": 8.711538461538463e-06, |
|
"loss": 0.2262, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 4.292835712432861, |
|
"learning_rate": 8.591346153846155e-06, |
|
"loss": 0.2321, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 3.3267788887023926, |
|
"learning_rate": 8.471153846153847e-06, |
|
"loss": 0.1965, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"eval_loss": 0.21094879508018494, |
|
"eval_na_accuracy": 0.800687313079834, |
|
"eval_ordinal_accuracy": 0.681993305683136, |
|
"eval_ordinal_mae": 0.44459667801856995, |
|
"eval_runtime": 184.1529, |
|
"eval_samples_per_second": 24.3, |
|
"eval_steps_per_second": 3.041, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 2.07079815864563, |
|
"learning_rate": 8.35096153846154e-06, |
|
"loss": 0.2122, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 3.6883444786071777, |
|
"learning_rate": 8.230769230769232e-06, |
|
"loss": 0.1876, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 0.5956806540489197, |
|
"learning_rate": 8.110576923076923e-06, |
|
"loss": 0.1804, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 0.6182098984718323, |
|
"learning_rate": 7.990384615384617e-06, |
|
"loss": 0.1637, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_loss": 0.20052286982536316, |
|
"eval_na_accuracy": 0.8556700944900513, |
|
"eval_ordinal_accuracy": 0.6763421297073364, |
|
"eval_ordinal_mae": 0.4438597857952118, |
|
"eval_runtime": 183.167, |
|
"eval_samples_per_second": 24.431, |
|
"eval_steps_per_second": 3.057, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 2.0849900245666504, |
|
"learning_rate": 7.875e-06, |
|
"loss": 0.2287, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 2.9747681617736816, |
|
"learning_rate": 7.754807692307693e-06, |
|
"loss": 0.202, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 0.7342644333839417, |
|
"learning_rate": 7.634615384615385e-06, |
|
"loss": 0.1887, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 2.5988609790802, |
|
"learning_rate": 7.514423076923078e-06, |
|
"loss": 0.1705, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_loss": 0.19641266763210297, |
|
"eval_na_accuracy": 0.8539518713951111, |
|
"eval_ordinal_accuracy": 0.6748009324073792, |
|
"eval_ordinal_mae": 0.43212634325027466, |
|
"eval_runtime": 189.9978, |
|
"eval_samples_per_second": 23.553, |
|
"eval_steps_per_second": 2.947, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 3.347687244415283, |
|
"learning_rate": 7.39423076923077e-06, |
|
"loss": 0.1947, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 1.516992449760437, |
|
"learning_rate": 7.274038461538462e-06, |
|
"loss": 0.2137, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 1.79114830493927, |
|
"learning_rate": 7.153846153846155e-06, |
|
"loss": 0.2398, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 2.8799991607666016, |
|
"learning_rate": 7.033653846153847e-06, |
|
"loss": 0.2412, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_loss": 0.19578155875205994, |
|
"eval_na_accuracy": 0.8780068755149841, |
|
"eval_ordinal_accuracy": 0.6730028390884399, |
|
"eval_ordinal_mae": 0.4344838559627533, |
|
"eval_runtime": 198.8509, |
|
"eval_samples_per_second": 22.504, |
|
"eval_steps_per_second": 2.816, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 1.0458216667175293, |
|
"learning_rate": 6.913461538461539e-06, |
|
"loss": 0.1697, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 2.2299771308898926, |
|
"learning_rate": 6.7932692307692315e-06, |
|
"loss": 0.1623, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 1.825832724571228, |
|
"learning_rate": 6.673076923076923e-06, |
|
"loss": 0.2052, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"grad_norm": 0.6158725619316101, |
|
"learning_rate": 6.552884615384616e-06, |
|
"loss": 0.1438, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"eval_loss": 0.19719891250133514, |
|
"eval_na_accuracy": 0.8470790386199951, |
|
"eval_ordinal_accuracy": 0.6783971190452576, |
|
"eval_ordinal_mae": 0.43012040853500366, |
|
"eval_runtime": 190.7061, |
|
"eval_samples_per_second": 23.465, |
|
"eval_steps_per_second": 2.936, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"grad_norm": 1.041905164718628, |
|
"learning_rate": 6.432692307692308e-06, |
|
"loss": 0.1584, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"grad_norm": 0.6891164779663086, |
|
"learning_rate": 6.3125e-06, |
|
"loss": 0.1488, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"grad_norm": 4.814165115356445, |
|
"learning_rate": 6.192307692307693e-06, |
|
"loss": 0.1565, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"grad_norm": 1.0739597082138062, |
|
"learning_rate": 6.0721153846153844e-06, |
|
"loss": 0.123, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"eval_loss": 0.19954617321491241, |
|
"eval_na_accuracy": 0.8419243693351746, |
|
"eval_ordinal_accuracy": 0.6753146648406982, |
|
"eval_ordinal_mae": 0.42309799790382385, |
|
"eval_runtime": 185.744, |
|
"eval_samples_per_second": 24.092, |
|
"eval_steps_per_second": 3.015, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"grad_norm": 3.492755651473999, |
|
"learning_rate": 5.951923076923077e-06, |
|
"loss": 0.1547, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"grad_norm": 0.9093284010887146, |
|
"learning_rate": 5.8317307692307704e-06, |
|
"loss": 0.1258, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"grad_norm": 0.7456061244010925, |
|
"learning_rate": 5.711538461538461e-06, |
|
"loss": 0.1648, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"grad_norm": 0.8997055888175964, |
|
"learning_rate": 5.591346153846155e-06, |
|
"loss": 0.1411, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"eval_loss": 0.19463004171848297, |
|
"eval_na_accuracy": 0.8453608155250549, |
|
"eval_ordinal_accuracy": 0.6817364692687988, |
|
"eval_ordinal_mae": 0.42202073335647583, |
|
"eval_runtime": 176.571, |
|
"eval_samples_per_second": 25.344, |
|
"eval_steps_per_second": 3.172, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"grad_norm": 0.5158917903900146, |
|
"learning_rate": 5.471153846153847e-06, |
|
"loss": 0.168, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 1.5927814245224, |
|
"learning_rate": 5.350961538461539e-06, |
|
"loss": 0.1113, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"grad_norm": 0.7580274343490601, |
|
"learning_rate": 5.230769230769232e-06, |
|
"loss": 0.1489, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"grad_norm": 0.6561002135276794, |
|
"learning_rate": 5.110576923076923e-06, |
|
"loss": 0.1443, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"eval_loss": 0.19156721234321594, |
|
"eval_na_accuracy": 0.8591065406799316, |
|
"eval_ordinal_accuracy": 0.682763934135437, |
|
"eval_ordinal_mae": 0.42213648557662964, |
|
"eval_runtime": 180.8137, |
|
"eval_samples_per_second": 24.749, |
|
"eval_steps_per_second": 3.097, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"grad_norm": 0.7529481053352356, |
|
"learning_rate": 4.990384615384616e-06, |
|
"loss": 0.1546, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"grad_norm": 3.369683027267456, |
|
"learning_rate": 4.870192307692308e-06, |
|
"loss": 0.136, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"grad_norm": 0.9865265488624573, |
|
"learning_rate": 4.75e-06, |
|
"loss": 0.1498, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"grad_norm": 1.8516024351119995, |
|
"learning_rate": 4.629807692307693e-06, |
|
"loss": 0.208, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"eval_loss": 0.19419582188129425, |
|
"eval_na_accuracy": 0.8676975965499878, |
|
"eval_ordinal_accuracy": 0.6740303039550781, |
|
"eval_ordinal_mae": 0.4163132607936859, |
|
"eval_runtime": 188.6406, |
|
"eval_samples_per_second": 23.722, |
|
"eval_steps_per_second": 2.969, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"grad_norm": 1.1575658321380615, |
|
"learning_rate": 4.509615384615385e-06, |
|
"loss": 0.1316, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"grad_norm": 0.9484291076660156, |
|
"learning_rate": 4.389423076923077e-06, |
|
"loss": 0.1204, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"grad_norm": 2.565762519836426, |
|
"learning_rate": 4.26923076923077e-06, |
|
"loss": 0.1262, |
|
"step": 3275 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"grad_norm": 2.2757420539855957, |
|
"learning_rate": 4.149038461538462e-06, |
|
"loss": 0.1343, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"eval_loss": 0.19619733095169067, |
|
"eval_na_accuracy": 0.8470790386199951, |
|
"eval_ordinal_accuracy": 0.6889288425445557, |
|
"eval_ordinal_mae": 0.4182307720184326, |
|
"eval_runtime": 187.8854, |
|
"eval_samples_per_second": 23.818, |
|
"eval_steps_per_second": 2.981, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 3.402385711669922, |
|
"learning_rate": 4.028846153846154e-06, |
|
"loss": 0.1397, |
|
"step": 3325 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"grad_norm": 0.881535530090332, |
|
"learning_rate": 3.908653846153847e-06, |
|
"loss": 0.1176, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"grad_norm": 0.948428213596344, |
|
"learning_rate": 3.7884615384615388e-06, |
|
"loss": 0.1767, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"grad_norm": 1.2142385244369507, |
|
"learning_rate": 3.668269230769231e-06, |
|
"loss": 0.1347, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"eval_loss": 0.1938188225030899, |
|
"eval_na_accuracy": 0.8659793734550476, |
|
"eval_ordinal_accuracy": 0.6899563074111938, |
|
"eval_ordinal_mae": 0.4161255955696106, |
|
"eval_runtime": 185.9367, |
|
"eval_samples_per_second": 24.067, |
|
"eval_steps_per_second": 3.012, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"grad_norm": 3.2418200969696045, |
|
"learning_rate": 3.5480769230769235e-06, |
|
"loss": 0.1585, |
|
"step": 3425 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"grad_norm": 1.845564842224121, |
|
"learning_rate": 3.4278846153846157e-06, |
|
"loss": 0.1499, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 1.7326797246932983, |
|
"learning_rate": 3.307692307692308e-06, |
|
"loss": 0.1286, |
|
"step": 3475 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"grad_norm": 4.901269912719727, |
|
"learning_rate": 3.1875e-06, |
|
"loss": 0.1076, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"eval_loss": 0.19698283076286316, |
|
"eval_na_accuracy": 0.8470790386199951, |
|
"eval_ordinal_accuracy": 0.6943231225013733, |
|
"eval_ordinal_mae": 0.41806870698928833, |
|
"eval_runtime": 187.7833, |
|
"eval_samples_per_second": 23.831, |
|
"eval_steps_per_second": 2.982, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"grad_norm": 1.3833277225494385, |
|
"learning_rate": 3.0673076923076926e-06, |
|
"loss": 0.1246, |
|
"step": 3525 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"grad_norm": 0.9357690215110779, |
|
"learning_rate": 2.947115384615385e-06, |
|
"loss": 0.1113, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"grad_norm": 7.975602149963379, |
|
"learning_rate": 2.8269230769230773e-06, |
|
"loss": 0.1872, |
|
"step": 3575 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"grad_norm": 0.5384923815727234, |
|
"learning_rate": 2.7067307692307694e-06, |
|
"loss": 0.1248, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"eval_loss": 0.19512778520584106, |
|
"eval_na_accuracy": 0.8470790386199951, |
|
"eval_ordinal_accuracy": 0.6958643794059753, |
|
"eval_ordinal_mae": 0.4150661826133728, |
|
"eval_runtime": 190.3088, |
|
"eval_samples_per_second": 23.514, |
|
"eval_steps_per_second": 2.943, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"grad_norm": 0.7750712633132935, |
|
"learning_rate": 2.586538461538462e-06, |
|
"loss": 0.1249, |
|
"step": 3625 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"grad_norm": 0.7317385077476501, |
|
"learning_rate": 2.466346153846154e-06, |
|
"loss": 0.124, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"grad_norm": 0.79685378074646, |
|
"learning_rate": 2.3461538461538463e-06, |
|
"loss": 0.1524, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"grad_norm": 0.9845700263977051, |
|
"learning_rate": 2.2259615384615385e-06, |
|
"loss": 0.1455, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"eval_loss": 0.1951962560415268, |
|
"eval_na_accuracy": 0.8814433217048645, |
|
"eval_ordinal_accuracy": 0.6850757598876953, |
|
"eval_ordinal_mae": 0.4146950840950012, |
|
"eval_runtime": 188.1776, |
|
"eval_samples_per_second": 23.781, |
|
"eval_steps_per_second": 2.976, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"grad_norm": 2.780651330947876, |
|
"learning_rate": 2.105769230769231e-06, |
|
"loss": 0.174, |
|
"step": 3725 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"grad_norm": 1.0597649812698364, |
|
"learning_rate": 1.9855769230769232e-06, |
|
"loss": 0.1211, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"grad_norm": 2.3485703468322754, |
|
"learning_rate": 1.8653846153846156e-06, |
|
"loss": 0.1537, |
|
"step": 3775 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"grad_norm": 6.283831596374512, |
|
"learning_rate": 1.7451923076923077e-06, |
|
"loss": 0.131, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"eval_loss": 0.19528667628765106, |
|
"eval_na_accuracy": 0.8453608155250549, |
|
"eval_ordinal_accuracy": 0.6948369145393372, |
|
"eval_ordinal_mae": 0.4172358810901642, |
|
"eval_runtime": 191.9622, |
|
"eval_samples_per_second": 23.312, |
|
"eval_steps_per_second": 2.917, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"grad_norm": 0.6940748691558838, |
|
"learning_rate": 1.6250000000000001e-06, |
|
"loss": 0.1321, |
|
"step": 3825 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"grad_norm": 2.970365285873413, |
|
"learning_rate": 1.5048076923076923e-06, |
|
"loss": 0.1578, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"grad_norm": 3.247617483139038, |
|
"learning_rate": 1.3846153846153848e-06, |
|
"loss": 0.1484, |
|
"step": 3875 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"grad_norm": 0.7711169719696045, |
|
"learning_rate": 1.264423076923077e-06, |
|
"loss": 0.1307, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"eval_loss": 0.19318200647830963, |
|
"eval_na_accuracy": 0.8642611503601074, |
|
"eval_ordinal_accuracy": 0.692781925201416, |
|
"eval_ordinal_mae": 0.41271111369132996, |
|
"eval_runtime": 192.6809, |
|
"eval_samples_per_second": 23.225, |
|
"eval_steps_per_second": 2.906, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"grad_norm": 0.6948511004447937, |
|
"learning_rate": 1.1442307692307694e-06, |
|
"loss": 0.1566, |
|
"step": 3925 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"grad_norm": 0.9776670336723328, |
|
"learning_rate": 1.0240384615384615e-06, |
|
"loss": 0.153, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"grad_norm": 1.5596826076507568, |
|
"learning_rate": 9.038461538461539e-07, |
|
"loss": 0.1316, |
|
"step": 3975 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"grad_norm": 0.5882940888404846, |
|
"learning_rate": 7.836538461538463e-07, |
|
"loss": 0.1198, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"eval_loss": 0.19471855461597443, |
|
"eval_na_accuracy": 0.8573883175849915, |
|
"eval_ordinal_accuracy": 0.6940662860870361, |
|
"eval_ordinal_mae": 0.4110487997531891, |
|
"eval_runtime": 192.5912, |
|
"eval_samples_per_second": 23.236, |
|
"eval_steps_per_second": 2.908, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"grad_norm": 1.4058364629745483, |
|
"learning_rate": 6.634615384615385e-07, |
|
"loss": 0.1154, |
|
"step": 4025 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"grad_norm": 2.0241715908050537, |
|
"learning_rate": 5.432692307692308e-07, |
|
"loss": 0.1359, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"grad_norm": 2.556328773498535, |
|
"learning_rate": 4.2307692307692315e-07, |
|
"loss": 0.1374, |
|
"step": 4075 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"grad_norm": 0.89422607421875, |
|
"learning_rate": 3.028846153846154e-07, |
|
"loss": 0.1363, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"eval_loss": 0.19520333409309387, |
|
"eval_na_accuracy": 0.8573883175849915, |
|
"eval_ordinal_accuracy": 0.6886719465255737, |
|
"eval_ordinal_mae": 0.4086832106113434, |
|
"eval_runtime": 191.6181, |
|
"eval_samples_per_second": 23.354, |
|
"eval_steps_per_second": 2.922, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"grad_norm": 1.4230666160583496, |
|
"learning_rate": 1.8269230769230772e-07, |
|
"loss": 0.1273, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 2.7363412380218506, |
|
"learning_rate": 6.250000000000001e-08, |
|
"loss": 0.1068, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"step": 4160, |
|
"total_flos": 5.158051742063002e+18, |
|
"train_loss": 0.21532289660893955, |
|
"train_runtime": 13643.9574, |
|
"train_samples_per_second": 4.878, |
|
"train_steps_per_second": 0.305 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 4160, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 100, |
|
"total_flos": 5.158051742063002e+18, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|