{ "best_metric": null, "best_model_checkpoint": null, "epoch": 35.0, "global_step": 33600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "learning_rate": 1.535132032339264e-05, "loss": 2.3734, "step": 960 }, { "epoch": 1.0, "eval_accuracy": 0.6711076941043658, "eval_loss": 1.6856393814086914, "eval_runtime": 42.9945, "eval_samples_per_second": 176.86, "eval_steps_per_second": 1.116, "step": 960 }, { "epoch": 2.0, "learning_rate": 1.6900880215595094e-05, "loss": 1.5002, "step": 1920 }, { "epoch": 2.0, "eval_accuracy": 0.6930295365255809, "eval_loss": 1.5316802263259888, "eval_runtime": 40.2605, "eval_samples_per_second": 188.87, "eval_steps_per_second": 1.192, "step": 1920 }, { "epoch": 3.0, "learning_rate": 1.7807314645155048e-05, "loss": 1.3682, "step": 2880 }, { "epoch": 3.0, "eval_accuracy": 0.7001060409279067, "eval_loss": 1.4794470071792603, "eval_runtime": 40.3426, "eval_samples_per_second": 188.486, "eval_steps_per_second": 1.19, "step": 2880 }, { "epoch": 4.0, "learning_rate": 1.8450440107797548e-05, "loss": 1.3057, "step": 3840 }, { "epoch": 4.0, "eval_accuracy": 0.7058089394925496, "eval_loss": 1.445176362991333, "eval_runtime": 40.2825, "eval_samples_per_second": 188.767, "eval_steps_per_second": 1.192, "step": 3840 }, { "epoch": 5.0, "learning_rate": 1.894928697180815e-05, "loss": 1.2652, "step": 4800 }, { "epoch": 5.0, "eval_accuracy": 0.7078205742901283, "eval_loss": 1.4240751266479492, "eval_runtime": 44.133, "eval_samples_per_second": 172.297, "eval_steps_per_second": 1.088, "step": 4800 }, { "epoch": 6.0, "learning_rate": 1.93568745373575e-05, "loss": 1.2347, "step": 5760 }, { "epoch": 6.0, "eval_accuracy": 0.712788831055115, "eval_loss": 1.393662452697754, "eval_runtime": 40.438, "eval_samples_per_second": 188.041, "eval_steps_per_second": 1.187, "step": 5760 }, { "epoch": 7.0, "learning_rate": 1.9701484913790247e-05, "loss": 1.2117, "step": 6720 }, { "epoch": 7.0, "eval_accuracy": 0.7158340831339961, "eval_loss": 1.3783458471298218, "eval_runtime": 47.0316, "eval_samples_per_second": 161.679, "eval_steps_per_second": 1.021, "step": 6720 }, { "epoch": 8.0, "learning_rate": 2e-05, "loss": 1.1863, "step": 7680 }, { "epoch": 8.0, "eval_accuracy": 0.7177737277852768, "eval_loss": 1.356780767440796, "eval_runtime": 40.2772, "eval_samples_per_second": 188.791, "eval_steps_per_second": 1.192, "step": 7680 }, { "epoch": 9.0, "learning_rate": 2e-05, "loss": 1.167, "step": 8640 }, { "epoch": 9.0, "eval_accuracy": 0.7172315754804711, "eval_loss": 1.362362265586853, "eval_runtime": 40.3662, "eval_samples_per_second": 188.375, "eval_steps_per_second": 1.189, "step": 8640 }, { "epoch": 10.0, "learning_rate": 2e-05, "loss": 1.1528, "step": 9600 }, { "epoch": 10.0, "eval_accuracy": 0.7208890541082557, "eval_loss": 1.3375591039657593, "eval_runtime": 40.4301, "eval_samples_per_second": 188.077, "eval_steps_per_second": 1.187, "step": 9600 }, { "epoch": 11.0, "learning_rate": 2e-05, "loss": 1.1403, "step": 10560 }, { "epoch": 11.0, "eval_accuracy": 0.722637380785269, "eval_loss": 1.3316693305969238, "eval_runtime": 40.2778, "eval_samples_per_second": 188.789, "eval_steps_per_second": 1.192, "step": 10560 }, { "epoch": 12.0, "learning_rate": 2e-05, "loss": 1.1276, "step": 11520 }, { "epoch": 12.0, "eval_accuracy": 0.7243298395325108, "eval_loss": 1.3127739429473877, "eval_runtime": 40.1949, "eval_samples_per_second": 189.178, "eval_steps_per_second": 1.194, "step": 11520 }, { "epoch": 13.0, "learning_rate": 2e-05, "loss": 1.1176, "step": 12480 }, { "epoch": 13.0, "eval_accuracy": 0.7242034818873964, "eval_loss": 1.3149378299713135, "eval_runtime": 40.5071, "eval_samples_per_second": 187.72, "eval_steps_per_second": 1.185, "step": 12480 }, { "epoch": 14.0, "learning_rate": 2e-05, "loss": 1.1061, "step": 13440 }, { "epoch": 14.0, "eval_accuracy": 0.7251281499452183, "eval_loss": 1.3011534214019775, "eval_runtime": 41.5767, "eval_samples_per_second": 182.891, "eval_steps_per_second": 1.154, "step": 13440 }, { "epoch": 15.0, "learning_rate": 2e-05, "loss": 1.0953, "step": 14400 }, { "epoch": 15.0, "eval_accuracy": 0.7274233697774307, "eval_loss": 1.2953981161117554, "eval_runtime": 40.2998, "eval_samples_per_second": 188.686, "eval_steps_per_second": 1.191, "step": 14400 }, { "epoch": 16.0, "learning_rate": 2e-05, "loss": 1.0872, "step": 15360 }, { "epoch": 16.0, "eval_accuracy": 0.729807233922918, "eval_loss": 1.2836934328079224, "eval_runtime": 40.3358, "eval_samples_per_second": 188.517, "eval_steps_per_second": 1.19, "step": 15360 }, { "epoch": 17.0, "learning_rate": 2e-05, "loss": 1.0778, "step": 16320 }, { "epoch": 17.0, "eval_accuracy": 0.728892083446406, "eval_loss": 1.2819887399673462, "eval_runtime": 40.2817, "eval_samples_per_second": 188.77, "eval_steps_per_second": 1.192, "step": 16320 }, { "epoch": 18.0, "learning_rate": 2e-05, "loss": 1.0709, "step": 17280 }, { "epoch": 18.0, "eval_accuracy": 0.7314453291503982, "eval_loss": 1.2700670957565308, "eval_runtime": 40.2932, "eval_samples_per_second": 188.717, "eval_steps_per_second": 1.191, "step": 17280 }, { "epoch": 19.0, "learning_rate": 2e-05, "loss": 1.0629, "step": 18240 }, { "epoch": 19.0, "eval_accuracy": 0.7313673197000715, "eval_loss": 1.2694642543792725, "eval_runtime": 40.2951, "eval_samples_per_second": 188.708, "eval_steps_per_second": 1.191, "step": 18240 }, { "epoch": 20.0, "learning_rate": 2e-05, "loss": 1.0575, "step": 19200 }, { "epoch": 20.0, "eval_accuracy": 0.7321188555482103, "eval_loss": 1.269392490386963, "eval_runtime": 40.9309, "eval_samples_per_second": 185.776, "eval_steps_per_second": 1.173, "step": 19200 }, { "epoch": 21.0, "learning_rate": 2e-05, "loss": 1.0494, "step": 20160 }, { "epoch": 21.0, "eval_accuracy": 0.7336233174474587, "eval_loss": 1.2491707801818848, "eval_runtime": 41.3244, "eval_samples_per_second": 184.007, "eval_steps_per_second": 1.162, "step": 20160 }, { "epoch": 22.0, "learning_rate": 2e-05, "loss": 1.0443, "step": 21120 }, { "epoch": 22.0, "eval_accuracy": 0.7333931208601605, "eval_loss": 1.2573738098144531, "eval_runtime": 40.4041, "eval_samples_per_second": 188.199, "eval_steps_per_second": 1.188, "step": 21120 }, { "epoch": 23.0, "learning_rate": 2e-05, "loss": 1.0375, "step": 22080 }, { "epoch": 23.0, "eval_accuracy": 0.7354173520930503, "eval_loss": 1.2430847883224487, "eval_runtime": 40.4071, "eval_samples_per_second": 188.185, "eval_steps_per_second": 1.188, "step": 22080 }, { "epoch": 24.0, "learning_rate": 2e-05, "loss": 1.0332, "step": 23040 }, { "epoch": 24.0, "eval_accuracy": 0.7351443165552266, "eval_loss": 1.240692377090454, "eval_runtime": 40.2279, "eval_samples_per_second": 189.023, "eval_steps_per_second": 1.193, "step": 23040 }, { "epoch": 25.0, "learning_rate": 2e-05, "loss": 1.0279, "step": 24000 }, { "epoch": 25.0, "eval_accuracy": 0.7350845404729578, "eval_loss": 1.2445788383483887, "eval_runtime": 40.3432, "eval_samples_per_second": 188.483, "eval_steps_per_second": 1.19, "step": 24000 }, { "epoch": 26.0, "learning_rate": 2e-05, "loss": 1.0233, "step": 24960 }, { "epoch": 26.0, "eval_accuracy": 0.7361292168770468, "eval_loss": 1.2367281913757324, "eval_runtime": 40.3061, "eval_samples_per_second": 188.657, "eval_steps_per_second": 1.191, "step": 24960 }, { "epoch": 27.0, "learning_rate": 2e-05, "loss": 1.018, "step": 25920 }, { "epoch": 27.0, "eval_accuracy": 0.7351869592986123, "eval_loss": 1.2435057163238525, "eval_runtime": 41.4116, "eval_samples_per_second": 183.62, "eval_steps_per_second": 1.159, "step": 25920 }, { "epoch": 28.0, "learning_rate": 2e-05, "loss": 1.0128, "step": 26880 }, { "epoch": 28.0, "eval_accuracy": 0.7379444038039502, "eval_loss": 1.2293747663497925, "eval_runtime": 42.7481, "eval_samples_per_second": 177.879, "eval_steps_per_second": 1.123, "step": 26880 }, { "epoch": 29.0, "learning_rate": 2e-05, "loss": 1.008, "step": 27840 }, { "epoch": 29.0, "eval_accuracy": 0.7381460506414618, "eval_loss": 1.224423885345459, "eval_runtime": 40.3086, "eval_samples_per_second": 188.645, "eval_steps_per_second": 1.191, "step": 27840 }, { "epoch": 30.0, "learning_rate": 2e-05, "loss": 1.0036, "step": 28800 }, { "epoch": 30.0, "eval_accuracy": 0.7393030501323384, "eval_loss": 1.2178620100021362, "eval_runtime": 40.2497, "eval_samples_per_second": 188.921, "eval_steps_per_second": 1.193, "step": 28800 }, { "epoch": 31.0, "learning_rate": 2e-05, "loss": 0.9997, "step": 29760 }, { "epoch": 31.0, "eval_accuracy": 0.7388722666381714, "eval_loss": 1.2249476909637451, "eval_runtime": 40.2623, "eval_samples_per_second": 188.862, "eval_steps_per_second": 1.192, "step": 29760 }, { "epoch": 32.0, "learning_rate": 2e-05, "loss": 0.9969, "step": 30720 }, { "epoch": 32.0, "eval_accuracy": 0.7389634850823794, "eval_loss": 1.2235573530197144, "eval_runtime": 40.3447, "eval_samples_per_second": 188.476, "eval_steps_per_second": 1.19, "step": 30720 }, { "epoch": 33.0, "learning_rate": 2e-05, "loss": 0.992, "step": 31680 }, { "epoch": 33.0, "eval_accuracy": 0.7388708814628272, "eval_loss": 1.217455506324768, "eval_runtime": 42.7026, "eval_samples_per_second": 178.069, "eval_steps_per_second": 1.124, "step": 31680 }, { "epoch": 34.0, "learning_rate": 2e-05, "loss": 0.988, "step": 32640 }, { "epoch": 34.0, "eval_accuracy": 0.7401730933519727, "eval_loss": 1.2093894481658936, "eval_runtime": 40.7008, "eval_samples_per_second": 186.827, "eval_steps_per_second": 1.179, "step": 32640 }, { "epoch": 35.0, "learning_rate": 2e-05, "loss": 0.9836, "step": 33600 }, { "epoch": 35.0, "eval_accuracy": 0.7400202510644008, "eval_loss": 1.208998203277588, "eval_runtime": 40.6904, "eval_samples_per_second": 186.874, "eval_steps_per_second": 1.18, "step": 33600 } ], "max_steps": 38400, "num_train_epochs": 40, "total_flos": 2041828249436160.0, "trial_name": null, "trial_params": null }