|
{ |
|
"best_metric": 0.6307053941908713, |
|
"best_model_checkpoint": "distilbert-base-multilingual-cased-hyper-matt/run-qtr0udgj/checkpoint-200", |
|
"epoch": 2.0, |
|
"eval_steps": 500, |
|
"global_step": 200, |
|
"is_hyper_param_search": true, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 2.049558639526367, |
|
"learning_rate": 8.243172962606281e-06, |
|
"loss": 0.663, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 1.8758213520050049, |
|
"learning_rate": 7.809321754048056e-06, |
|
"loss": 0.5872, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 1.6676493883132935, |
|
"learning_rate": 7.375470545489831e-06, |
|
"loss": 0.5806, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 2.160999298095703, |
|
"learning_rate": 6.941619336931606e-06, |
|
"loss": 0.5375, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 3.5581603050231934, |
|
"learning_rate": 6.507768128373381e-06, |
|
"loss": 0.5219, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 3.43050479888916, |
|
"learning_rate": 6.0739169198151545e-06, |
|
"loss": 0.4933, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 3.4380669593811035, |
|
"learning_rate": 5.64006571125693e-06, |
|
"loss": 0.4417, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 3.310192823410034, |
|
"learning_rate": 5.2062145026987045e-06, |
|
"loss": 0.484, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 5.5063323974609375, |
|
"learning_rate": 4.772363294140479e-06, |
|
"loss": 0.5157, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 2.589657783508301, |
|
"learning_rate": 4.3385120855822536e-06, |
|
"loss": 0.4096, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.755, |
|
"eval_f1": 0.5882352941176471, |
|
"eval_loss": 0.4866395890712738, |
|
"eval_precision": 0.6086956521739131, |
|
"eval_recall": 0.5691056910569106, |
|
"eval_runtime": 1.5211, |
|
"eval_samples_per_second": 262.961, |
|
"eval_steps_per_second": 16.435, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 3.2699267864227295, |
|
"learning_rate": 3.904660877024028e-06, |
|
"loss": 0.4491, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 3.8382904529571533, |
|
"learning_rate": 3.470809668465803e-06, |
|
"loss": 0.4236, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"grad_norm": 3.1422176361083984, |
|
"learning_rate": 3.0369584599075772e-06, |
|
"loss": 0.4383, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"grad_norm": 2.356064558029175, |
|
"learning_rate": 2.6031072513493522e-06, |
|
"loss": 0.4434, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 3.1500184535980225, |
|
"learning_rate": 2.1692560427911268e-06, |
|
"loss": 0.3509, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 2.9784765243530273, |
|
"learning_rate": 1.7354048342329016e-06, |
|
"loss": 0.4451, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"grad_norm": 8.244532585144043, |
|
"learning_rate": 1.3015536256746761e-06, |
|
"loss": 0.381, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"grad_norm": 8.15170669555664, |
|
"learning_rate": 8.677024171164508e-07, |
|
"loss": 0.4256, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"grad_norm": 3.7399942874908447, |
|
"learning_rate": 4.338512085582254e-07, |
|
"loss": 0.3909, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 3.0128397941589355, |
|
"learning_rate": 0.0, |
|
"loss": 0.3897, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7775, |
|
"eval_f1": 0.6307053941908713, |
|
"eval_loss": 0.4450126588344574, |
|
"eval_precision": 0.6440677966101694, |
|
"eval_recall": 0.6178861788617886, |
|
"eval_runtime": 1.5626, |
|
"eval_samples_per_second": 255.985, |
|
"eval_steps_per_second": 15.999, |
|
"step": 200 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 423630740901888.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": { |
|
"_wandb": {}, |
|
"assignments": {}, |
|
"learning_rate": 8.677024171164507e-06, |
|
"metric": "eval/loss", |
|
"num_train_epochs": 2, |
|
"per_device_train_batch_size": 16, |
|
"seed": 37 |
|
} |
|
} |
|
|