|
{ |
|
"best_metric": 36.31328557707662, |
|
"best_model_checkpoint": "/root/turkic_qa/tr_uzn_models/orig_uzn_roberta_base_model/checkpoint-7310", |
|
"epoch": 10.0, |
|
"eval_steps": 500, |
|
"global_step": 7310, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"step": 731, |
|
"train_exact_match": 6.893106893106893, |
|
"train_f1": 13.147750410374615, |
|
"train_runtime": 13.1312, |
|
"train_samples_per_second": 113.851, |
|
"train_steps_per_second": 4.112 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 12.239034652709961, |
|
"learning_rate": 5e-06, |
|
"loss": 4.418, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_exact_match": 4.3125, |
|
"eval_f1": 11.281225208823463, |
|
"eval_runtime": 40.2248, |
|
"eval_samples_per_second": 113.935, |
|
"eval_steps_per_second": 4.077, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"step": 1462, |
|
"train_exact_match": 15.484515484515484, |
|
"train_f1": 23.00008673777926, |
|
"train_runtime": 12.8965, |
|
"train_samples_per_second": 113.364, |
|
"train_steps_per_second": 4.11 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 13.470973014831543, |
|
"learning_rate": 1e-05, |
|
"loss": 3.3027, |
|
"step": 1462 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_exact_match": 10.5, |
|
"eval_f1": 18.60249294088304, |
|
"eval_runtime": 40.2237, |
|
"eval_samples_per_second": 113.938, |
|
"eval_steps_per_second": 4.077, |
|
"step": 1462 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 2193, |
|
"train_exact_match": 24.475524475524477, |
|
"train_f1": 33.75749140151479, |
|
"train_runtime": 12.4836, |
|
"train_samples_per_second": 113.83, |
|
"train_steps_per_second": 4.085 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 11.94182014465332, |
|
"learning_rate": 8.750000000000001e-06, |
|
"loss": 2.8501, |
|
"step": 2193 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_exact_match": 14.96875, |
|
"eval_f1": 23.95818931657813, |
|
"eval_runtime": 40.3346, |
|
"eval_samples_per_second": 113.625, |
|
"eval_steps_per_second": 4.066, |
|
"step": 2193 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"step": 2924, |
|
"train_exact_match": 32.56743256743257, |
|
"train_f1": 41.56263168632173, |
|
"train_runtime": 12.6655, |
|
"train_samples_per_second": 112.826, |
|
"train_steps_per_second": 4.106 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 13.756119728088379, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 2.5536, |
|
"step": 2924 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_exact_match": 17.875, |
|
"eval_f1": 27.247899577169083, |
|
"eval_runtime": 40.1853, |
|
"eval_samples_per_second": 114.047, |
|
"eval_steps_per_second": 4.081, |
|
"step": 2924 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"step": 3655, |
|
"train_exact_match": 36.76323676323676, |
|
"train_f1": 46.68974245865671, |
|
"train_runtime": 12.7455, |
|
"train_samples_per_second": 112.981, |
|
"train_steps_per_second": 4.08 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 15.806905746459961, |
|
"learning_rate": 6.25e-06, |
|
"loss": 2.3273, |
|
"step": 3655 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_exact_match": 19.75, |
|
"eval_f1": 29.976243167371322, |
|
"eval_runtime": 40.5485, |
|
"eval_samples_per_second": 113.025, |
|
"eval_steps_per_second": 4.045, |
|
"step": 3655 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"step": 4386, |
|
"train_exact_match": 45.45454545454545, |
|
"train_f1": 55.6255817809433, |
|
"train_runtime": 13.0466, |
|
"train_samples_per_second": 113.517, |
|
"train_steps_per_second": 4.062 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 22.134902954101562, |
|
"learning_rate": 5e-06, |
|
"loss": 2.111, |
|
"step": 4386 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_exact_match": 21.875, |
|
"eval_f1": 32.46367810578293, |
|
"eval_runtime": 40.5576, |
|
"eval_samples_per_second": 113.0, |
|
"eval_steps_per_second": 4.044, |
|
"step": 4386 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"step": 5117, |
|
"train_exact_match": 48.451548451548454, |
|
"train_f1": 59.11155707778988, |
|
"train_runtime": 12.7339, |
|
"train_samples_per_second": 112.456, |
|
"train_steps_per_second": 4.084 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 18.71537971496582, |
|
"learning_rate": 3.7500000000000005e-06, |
|
"loss": 1.9313, |
|
"step": 5117 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_exact_match": 23.34375, |
|
"eval_f1": 34.34464862509141, |
|
"eval_runtime": 40.5488, |
|
"eval_samples_per_second": 113.024, |
|
"eval_steps_per_second": 4.045, |
|
"step": 5117 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"step": 5848, |
|
"train_exact_match": 52.34765234765235, |
|
"train_f1": 61.4699737458471, |
|
"train_runtime": 12.2502, |
|
"train_samples_per_second": 112.978, |
|
"train_steps_per_second": 4.082 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 21.410564422607422, |
|
"learning_rate": 2.5e-06, |
|
"loss": 1.7907, |
|
"step": 5848 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_exact_match": 24.65625, |
|
"eval_f1": 35.76160110698192, |
|
"eval_runtime": 40.4412, |
|
"eval_samples_per_second": 113.325, |
|
"eval_steps_per_second": 4.055, |
|
"step": 5848 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"step": 6579, |
|
"train_exact_match": 56.34365634365634, |
|
"train_f1": 65.84711342159365, |
|
"train_runtime": 12.9755, |
|
"train_samples_per_second": 113.213, |
|
"train_steps_per_second": 4.085 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 18.834157943725586, |
|
"learning_rate": 1.25e-06, |
|
"loss": 1.6986, |
|
"step": 6579 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_exact_match": 24.8125, |
|
"eval_f1": 35.999416160714, |
|
"eval_runtime": 40.5359, |
|
"eval_samples_per_second": 113.06, |
|
"eval_steps_per_second": 4.046, |
|
"step": 6579 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 7310, |
|
"train_exact_match": 56.64335664335665, |
|
"train_f1": 66.49918737685148, |
|
"train_runtime": 12.1264, |
|
"train_samples_per_second": 113.224, |
|
"train_steps_per_second": 4.123 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 26.555368423461914, |
|
"learning_rate": 0.0, |
|
"loss": 1.6365, |
|
"step": 7310 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_exact_match": 24.9375, |
|
"eval_f1": 36.31328557707662, |
|
"eval_runtime": 40.4168, |
|
"eval_samples_per_second": 113.394, |
|
"eval_steps_per_second": 4.058, |
|
"step": 7310 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 7310, |
|
"total_flos": 2.004675840340992e+16, |
|
"train_loss": 2.4619675162715886, |
|
"train_runtime": 2688.9862, |
|
"train_samples_per_second": 76.081, |
|
"train_steps_per_second": 2.718 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 7310, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"total_flos": 2.004675840340992e+16, |
|
"train_batch_size": 28, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|