|
{ |
|
"best_metric": 0.9863948830227645, |
|
"best_model_checkpoint": "./output//29_roberta-large_nace_5__5e-6_0.01_0.06_07-21-22_10-40/checkpoint-46000", |
|
"epoch": 1.6596362630523478, |
|
"global_step": 48000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.1524720525527257e-06, |
|
"loss": 0.1493, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_accuracy": 0.9610440126806309, |
|
"eval_f1": 0.9764342504146886, |
|
"eval_loss": 0.05725020170211792, |
|
"eval_roc_auc": 0.9759211072420441, |
|
"eval_runtime": 1536.6369, |
|
"eval_samples_per_second": 33.461, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.3049441051054513e-06, |
|
"loss": 0.0528, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_accuracy": 0.9728883443219168, |
|
"eval_f1": 0.9829246052694726, |
|
"eval_loss": 0.05182106792926788, |
|
"eval_roc_auc": 0.9827894561625595, |
|
"eval_runtime": 1536.9258, |
|
"eval_samples_per_second": 33.454, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 3.457416157658177e-06, |
|
"loss": 0.0497, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_accuracy": 0.9731606278079235, |
|
"eval_f1": 0.9840875186474392, |
|
"eval_loss": 0.048033468425273895, |
|
"eval_roc_auc": 0.9838391681932317, |
|
"eval_runtime": 1538.305, |
|
"eval_samples_per_second": 33.424, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.609888210210903e-06, |
|
"loss": 0.0462, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_accuracy": 0.9722465332477586, |
|
"eval_f1": 0.9834792174869661, |
|
"eval_loss": 0.054564349353313446, |
|
"eval_roc_auc": 0.9832329187393456, |
|
"eval_runtime": 1538.1819, |
|
"eval_samples_per_second": 33.427, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.951336320098873e-06, |
|
"loss": 0.0433, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_accuracy": 0.9736079506777914, |
|
"eval_f1": 0.9837862700755107, |
|
"eval_loss": 0.04221172630786896, |
|
"eval_roc_auc": 0.9836052885203017, |
|
"eval_runtime": 1535.859, |
|
"eval_samples_per_second": 33.478, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.877770666431257e-06, |
|
"loss": 0.0442, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_accuracy": 0.9738413365229399, |
|
"eval_f1": 0.9840872143062064, |
|
"eval_loss": 0.047410748898983, |
|
"eval_roc_auc": 0.9838395246306029, |
|
"eval_runtime": 1536.4074, |
|
"eval_samples_per_second": 33.466, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.804205012763641e-06, |
|
"loss": 0.0433, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_accuracy": 0.9747748799035338, |
|
"eval_f1": 0.9848328360492695, |
|
"eval_loss": 0.04587765783071518, |
|
"eval_roc_auc": 0.9845986397430451, |
|
"eval_runtime": 1536.4412, |
|
"eval_samples_per_second": 33.465, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.730639359096026e-06, |
|
"loss": 0.0427, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_accuracy": 0.9745609428788145, |
|
"eval_f1": 0.9846027155907081, |
|
"eval_loss": 0.04280721768736839, |
|
"eval_roc_auc": 0.9844085121898316, |
|
"eval_runtime": 1536.1052, |
|
"eval_samples_per_second": 33.472, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.65707370542841e-06, |
|
"loss": 0.0401, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"eval_accuracy": 0.9754555886185503, |
|
"eval_f1": 0.9849309815950921, |
|
"eval_loss": 0.0410287007689476, |
|
"eval_roc_auc": 0.9847790254315509, |
|
"eval_runtime": 1536.224, |
|
"eval_samples_per_second": 33.47, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.583508051760795e-06, |
|
"loss": 0.0409, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"eval_accuracy": 0.974891572826108, |
|
"eval_f1": 0.9855708668931557, |
|
"eval_loss": 0.040095292031764984, |
|
"eval_roc_auc": 0.9852792796060187, |
|
"eval_runtime": 1536.3646, |
|
"eval_samples_per_second": 33.467, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.509942398093178e-06, |
|
"loss": 0.0403, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_accuracy": 0.9755139350798374, |
|
"eval_f1": 0.9850088183421517, |
|
"eval_loss": 0.040499284863471985, |
|
"eval_roc_auc": 0.9848554210846383, |
|
"eval_runtime": 1536.2737, |
|
"eval_samples_per_second": 33.469, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.436376744425563e-06, |
|
"loss": 0.0414, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"eval_accuracy": 0.975319446875547, |
|
"eval_f1": 0.9852808881232654, |
|
"eval_loss": 0.041136886924505234, |
|
"eval_roc_auc": 0.985079469215943, |
|
"eval_runtime": 1536.5795, |
|
"eval_samples_per_second": 33.462, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 4.362811090757947e-06, |
|
"loss": 0.0402, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_accuracy": 0.9764474784604313, |
|
"eval_f1": 0.9861650624772766, |
|
"eval_loss": 0.04073059558868408, |
|
"eval_roc_auc": 0.9859694252223201, |
|
"eval_runtime": 1536.8522, |
|
"eval_samples_per_second": 33.456, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 4.289245437090332e-06, |
|
"loss": 0.0399, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_accuracy": 0.9755722815411245, |
|
"eval_f1": 0.9853538060958797, |
|
"eval_loss": 0.040422726422548294, |
|
"eval_roc_auc": 0.9851619243043377, |
|
"eval_runtime": 1537.2709, |
|
"eval_samples_per_second": 33.447, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 4.215679783422716e-06, |
|
"loss": 0.04, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"eval_accuracy": 0.9761946437948539, |
|
"eval_f1": 0.9859556473987333, |
|
"eval_loss": 0.03985821455717087, |
|
"eval_roc_auc": 0.9857539906157645, |
|
"eval_runtime": 1537.4136, |
|
"eval_samples_per_second": 33.444, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 4.142114129755101e-06, |
|
"loss": 0.0373, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"eval_accuracy": 0.9766419666647218, |
|
"eval_f1": 0.9855267070141279, |
|
"eval_loss": 0.04419500008225441, |
|
"eval_roc_auc": 0.9854023838792458, |
|
"eval_runtime": 1536.3227, |
|
"eval_samples_per_second": 33.468, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 4.068548476087484e-06, |
|
"loss": 0.0357, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"eval_accuracy": 0.9768948013302993, |
|
"eval_f1": 0.9859001025621362, |
|
"eval_loss": 0.04071442782878876, |
|
"eval_roc_auc": 0.9857602731082165, |
|
"eval_runtime": 1536.5165, |
|
"eval_samples_per_second": 33.463, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 3.994982822419869e-06, |
|
"loss": 0.0365, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"eval_accuracy": 0.976252990256141, |
|
"eval_f1": 0.9857402622260503, |
|
"eval_loss": 0.04006611183285713, |
|
"eval_roc_auc": 0.9855463976313715, |
|
"eval_runtime": 1536.8448, |
|
"eval_samples_per_second": 33.456, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 3.921417168752253e-06, |
|
"loss": 0.0362, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"eval_accuracy": 0.9752805492346889, |
|
"eval_f1": 0.9857657021203471, |
|
"eval_loss": 0.03810262680053711, |
|
"eval_roc_auc": 0.9854908677538746, |
|
"eval_runtime": 1536.4894, |
|
"eval_samples_per_second": 33.464, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 3.847851515084638e-06, |
|
"loss": 0.0367, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"eval_accuracy": 0.9741719664702336, |
|
"eval_f1": 0.9848820340536, |
|
"eval_loss": 0.03949220851063728, |
|
"eval_roc_auc": 0.984581025607102, |
|
"eval_runtime": 1536.5636, |
|
"eval_samples_per_second": 33.462, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 3.7742858614170217e-06, |
|
"loss": 0.0366, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"eval_accuracy": 0.9754166909776922, |
|
"eval_f1": 0.9858603603173542, |
|
"eval_loss": 0.03979261964559555, |
|
"eval_roc_auc": 0.9855892138192017, |
|
"eval_runtime": 1537.1787, |
|
"eval_samples_per_second": 33.449, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 3.7007202077494066e-06, |
|
"loss": 0.0354, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"eval_accuracy": 0.9768559036894412, |
|
"eval_f1": 0.9861624145916668, |
|
"eval_loss": 0.039536893367767334, |
|
"eval_roc_auc": 0.9859729895960303, |
|
"eval_runtime": 1536.4855, |
|
"eval_samples_per_second": 33.464, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 3.6271545540817902e-06, |
|
"loss": 0.0365, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"eval_accuracy": 0.9768364548690122, |
|
"eval_f1": 0.9863948830227645, |
|
"eval_loss": 0.04265940561890602, |
|
"eval_roc_auc": 0.9861797953532997, |
|
"eval_runtime": 1536.6035, |
|
"eval_samples_per_second": 33.461, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 3.553588900414175e-06, |
|
"loss": 0.0358, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"eval_accuracy": 0.9745609428788145, |
|
"eval_f1": 0.9850252764160049, |
|
"eval_loss": 0.039769496768713, |
|
"eval_roc_auc": 0.9847700991727389, |
|
"eval_runtime": 1536.7946, |
|
"eval_samples_per_second": 33.457, |
|
"step": 48000 |
|
} |
|
], |
|
"max_steps": 144610, |
|
"num_train_epochs": 5, |
|
"total_flos": 6.9866971594752e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|