|
{ |
|
"best_metric": 0.8179571663920923, |
|
"best_model_checkpoint": "./bigbird-base-health-fact/checkpoint-2452", |
|
"epoch": 3.0, |
|
"global_step": 3678, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 2.6630434782608698e-06, |
|
"loss": 1.2792, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 5.380434782608695e-06, |
|
"loss": 1.0546, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 8.097826086956523e-06, |
|
"loss": 0.8857, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 9.909365558912388e-06, |
|
"loss": 0.7958, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 9.60725075528701e-06, |
|
"loss": 0.7246, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 9.305135951661632e-06, |
|
"loss": 0.7629, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 9.003021148036256e-06, |
|
"loss": 0.6498, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 8.700906344410877e-06, |
|
"loss": 0.6201, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 8.398791540785499e-06, |
|
"loss": 0.5809, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 8.099697885196374e-06, |
|
"loss": 0.647, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 7.797583081570997e-06, |
|
"loss": 0.5814, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 7.4954682779456205e-06, |
|
"loss": 0.5563, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_false_f1": 0.7926023778071335, |
|
"eval_loss": 0.5019509196281433, |
|
"eval_macro_f1": 0.6062122975261963, |
|
"eval_micro_f1": 0.7948929159802306, |
|
"eval_mixture_f1": 0.4591194968553459, |
|
"eval_runtime": 39.3928, |
|
"eval_samples_per_second": 30.818, |
|
"eval_steps_per_second": 0.965, |
|
"eval_true_f1": 0.8986175115207373, |
|
"eval_unproven_f1": 0.2745098039215686, |
|
"step": 1226 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 7.193353474320243e-06, |
|
"loss": 0.5271, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 6.891238670694864e-06, |
|
"loss": 0.4912, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 6.589123867069487e-06, |
|
"loss": 0.4844, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 6.287009063444109e-06, |
|
"loss": 0.4816, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 5.984894259818732e-06, |
|
"loss": 0.462, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 5.682779456193354e-06, |
|
"loss": 0.4087, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 5.380664652567976e-06, |
|
"loss": 0.5065, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 5.078549848942599e-06, |
|
"loss": 0.4313, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 4.776435045317221e-06, |
|
"loss": 0.5098, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 4.4743202416918435e-06, |
|
"loss": 0.4699, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 4.172205438066466e-06, |
|
"loss": 0.4408, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 3.8700906344410875e-06, |
|
"loss": 0.5048, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_false_f1": 0.8201811125485123, |
|
"eval_loss": 0.4968700110912323, |
|
"eval_macro_f1": 0.684587518040316, |
|
"eval_micro_f1": 0.8179571663920923, |
|
"eval_mixture_f1": 0.43416370106761565, |
|
"eval_runtime": 39.4059, |
|
"eval_samples_per_second": 30.808, |
|
"eval_steps_per_second": 0.964, |
|
"eval_true_f1": 0.9125766871165646, |
|
"eval_unproven_f1": 0.5714285714285714, |
|
"step": 2452 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 3.5679758308157103e-06, |
|
"loss": 0.378, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 3.2658610271903322e-06, |
|
"loss": 0.3631, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 2.963746223564955e-06, |
|
"loss": 0.3475, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 2.661631419939577e-06, |
|
"loss": 0.3283, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 2.3595166163142e-06, |
|
"loss": 0.317, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 2.0604229607250755e-06, |
|
"loss": 0.3541, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 1.758308157099698e-06, |
|
"loss": 0.3818, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 1.4561933534743203e-06, |
|
"loss": 0.3467, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 1.1540785498489427e-06, |
|
"loss": 0.3048, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 8.51963746223565e-07, |
|
"loss": 0.3334, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 5.498489425981874e-07, |
|
"loss": 0.3855, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 2.477341389728097e-07, |
|
"loss": 0.3454, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_false_f1": 0.811443433029909, |
|
"eval_loss": 0.5863622426986694, |
|
"eval_macro_f1": 0.6874160790583576, |
|
"eval_micro_f1": 0.8130148270181219, |
|
"eval_mixture_f1": 0.4556962025316456, |
|
"eval_runtime": 39.6082, |
|
"eval_samples_per_second": 30.65, |
|
"eval_steps_per_second": 0.959, |
|
"eval_true_f1": 0.9153605015673981, |
|
"eval_unproven_f1": 0.5671641791044776, |
|
"step": 3678 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 3678, |
|
"total_flos": 2.106512041918464e+16, |
|
"train_loss": 0.5249485609128204, |
|
"train_runtime": 3291.1672, |
|
"train_samples_per_second": 8.937, |
|
"train_steps_per_second": 1.118 |
|
} |
|
], |
|
"max_steps": 3678, |
|
"num_train_epochs": 3, |
|
"total_flos": 2.106512041918464e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|