|
{ |
|
"best_metric": 0.9975, |
|
"best_model_checkpoint": "./tr_results/checkpoint-1200", |
|
"epoch": 12.0, |
|
"eval_steps": 500, |
|
"global_step": 1800, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.98, |
|
"eval_classification_report": { |
|
"accuracy": 0.98, |
|
"macro avg": { |
|
"f1-score": 0.9799979997999799, |
|
"precision": 0.9801920768307323, |
|
"recall": 0.98, |
|
"support": 800.0 |
|
}, |
|
"neural": { |
|
"f1-score": 0.9797979797979798, |
|
"precision": 0.9897959183673469, |
|
"recall": 0.97, |
|
"support": 400.0 |
|
}, |
|
"real": { |
|
"f1-score": 0.9801980198019802, |
|
"precision": 0.9705882352941176, |
|
"recall": 0.99, |
|
"support": 400.0 |
|
}, |
|
"weighted avg": { |
|
"f1-score": 0.97999799979998, |
|
"precision": 0.9801920768307323, |
|
"recall": 0.98, |
|
"support": 800.0 |
|
} |
|
}, |
|
"eval_loss": 0.08059845864772797, |
|
"eval_runtime": 44.1793, |
|
"eval_samples_per_second": 18.108, |
|
"eval_steps_per_second": 1.132, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.9925, |
|
"eval_classification_report": { |
|
"accuracy": 0.9925, |
|
"macro avg": { |
|
"f1-score": 0.9924995781012682, |
|
"precision": 0.9926108374384237, |
|
"recall": 0.9924999999999999, |
|
"support": 800.0 |
|
}, |
|
"neural": { |
|
"f1-score": 0.9925558312655086, |
|
"precision": 0.9852216748768473, |
|
"recall": 1.0, |
|
"support": 400.0 |
|
}, |
|
"real": { |
|
"f1-score": 0.9924433249370278, |
|
"precision": 1.0, |
|
"recall": 0.985, |
|
"support": 400.0 |
|
}, |
|
"weighted avg": { |
|
"f1-score": 0.9924995781012683, |
|
"precision": 0.9926108374384236, |
|
"recall": 0.9925, |
|
"support": 800.0 |
|
} |
|
}, |
|
"eval_loss": 0.03137431666254997, |
|
"eval_runtime": 37.0926, |
|
"eval_samples_per_second": 21.568, |
|
"eval_steps_per_second": 1.348, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.93, |
|
"eval_classification_report": { |
|
"accuracy": 0.93, |
|
"macro avg": { |
|
"f1-score": 0.9296553110240177, |
|
"precision": 0.9385964912280702, |
|
"recall": 0.9299999999999999, |
|
"support": 800.0 |
|
}, |
|
"neural": { |
|
"f1-score": 0.924731182795699, |
|
"precision": 1.0, |
|
"recall": 0.86, |
|
"support": 400.0 |
|
}, |
|
"real": { |
|
"f1-score": 0.9345794392523363, |
|
"precision": 0.8771929824561403, |
|
"recall": 1.0, |
|
"support": 400.0 |
|
}, |
|
"weighted avg": { |
|
"f1-score": 0.9296553110240177, |
|
"precision": 0.9385964912280701, |
|
"recall": 0.93, |
|
"support": 800.0 |
|
} |
|
}, |
|
"eval_loss": 0.40115270018577576, |
|
"eval_runtime": 36.1218, |
|
"eval_samples_per_second": 22.147, |
|
"eval_steps_per_second": 1.384, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 5e-05, |
|
"loss": 0.1716, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.9925, |
|
"eval_classification_report": { |
|
"accuracy": 0.9925, |
|
"macro avg": { |
|
"f1-score": 0.9924995781012682, |
|
"precision": 0.9926108374384237, |
|
"recall": 0.9924999999999999, |
|
"support": 800.0 |
|
}, |
|
"neural": { |
|
"f1-score": 0.9924433249370278, |
|
"precision": 1.0, |
|
"recall": 0.985, |
|
"support": 400.0 |
|
}, |
|
"real": { |
|
"f1-score": 0.9925558312655086, |
|
"precision": 0.9852216748768473, |
|
"recall": 1.0, |
|
"support": 400.0 |
|
}, |
|
"weighted avg": { |
|
"f1-score": 0.9924995781012683, |
|
"precision": 0.9926108374384236, |
|
"recall": 0.9925, |
|
"support": 800.0 |
|
} |
|
}, |
|
"eval_loss": 0.03166389465332031, |
|
"eval_runtime": 45.4896, |
|
"eval_samples_per_second": 17.586, |
|
"eval_steps_per_second": 1.099, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.9825, |
|
"eval_classification_report": { |
|
"accuracy": 0.9825, |
|
"macro avg": { |
|
"f1-score": 0.9824946389831886, |
|
"precision": 0.9830917874396135, |
|
"recall": 0.9824999999999999, |
|
"support": 800.0 |
|
}, |
|
"neural": { |
|
"f1-score": 0.9828009828009828, |
|
"precision": 0.966183574879227, |
|
"recall": 1.0, |
|
"support": 400.0 |
|
}, |
|
"real": { |
|
"f1-score": 0.9821882951653944, |
|
"precision": 1.0, |
|
"recall": 0.965, |
|
"support": 400.0 |
|
}, |
|
"weighted avg": { |
|
"f1-score": 0.9824946389831887, |
|
"precision": 0.9830917874396136, |
|
"recall": 0.9825, |
|
"support": 800.0 |
|
} |
|
}, |
|
"eval_loss": 0.06639690697193146, |
|
"eval_runtime": 45.9078, |
|
"eval_samples_per_second": 17.426, |
|
"eval_steps_per_second": 1.089, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.995, |
|
"eval_classification_report": { |
|
"accuracy": 0.995, |
|
"macro avg": { |
|
"f1-score": 0.9949998749968749, |
|
"precision": 0.995049504950495, |
|
"recall": 0.995, |
|
"support": 800.0 |
|
}, |
|
"neural": { |
|
"f1-score": 0.9949748743718593, |
|
"precision": 1.0, |
|
"recall": 0.99, |
|
"support": 400.0 |
|
}, |
|
"real": { |
|
"f1-score": 0.9950248756218906, |
|
"precision": 0.9900990099009901, |
|
"recall": 1.0, |
|
"support": 400.0 |
|
}, |
|
"weighted avg": { |
|
"f1-score": 0.9949998749968749, |
|
"precision": 0.9950495049504952, |
|
"recall": 0.995, |
|
"support": 800.0 |
|
} |
|
}, |
|
"eval_loss": 0.01404611486941576, |
|
"eval_runtime": 44.8322, |
|
"eval_samples_per_second": 17.844, |
|
"eval_steps_per_second": 1.115, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 6.67, |
|
"learning_rate": 4.827586206896552e-05, |
|
"loss": 0.0304, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.98625, |
|
"eval_classification_report": { |
|
"accuracy": 0.98625, |
|
"macro avg": { |
|
"f1-score": 0.9862473998990433, |
|
"precision": 0.9866180048661801, |
|
"recall": 0.9862500000000001, |
|
"support": 800.0 |
|
}, |
|
"neural": { |
|
"f1-score": 0.9860583016476552, |
|
"precision": 1.0, |
|
"recall": 0.9725, |
|
"support": 400.0 |
|
}, |
|
"real": { |
|
"f1-score": 0.9864364981504316, |
|
"precision": 0.9732360097323601, |
|
"recall": 1.0, |
|
"support": 400.0 |
|
}, |
|
"weighted avg": { |
|
"f1-score": 0.9862473998990434, |
|
"precision": 0.98661800486618, |
|
"recall": 0.98625, |
|
"support": 800.0 |
|
} |
|
}, |
|
"eval_loss": 0.06141611561179161, |
|
"eval_runtime": 44.6418, |
|
"eval_samples_per_second": 17.92, |
|
"eval_steps_per_second": 1.12, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.9975, |
|
"eval_classification_report": { |
|
"accuracy": 0.9975, |
|
"macro avg": { |
|
"f1-score": 0.9974999843749024, |
|
"precision": 0.9975124378109452, |
|
"recall": 0.9975, |
|
"support": 800.0 |
|
}, |
|
"neural": { |
|
"f1-score": 0.9974937343358395, |
|
"precision": 1.0, |
|
"recall": 0.995, |
|
"support": 400.0 |
|
}, |
|
"real": { |
|
"f1-score": 0.9975062344139651, |
|
"precision": 0.9950248756218906, |
|
"recall": 1.0, |
|
"support": 400.0 |
|
}, |
|
"weighted avg": { |
|
"f1-score": 0.9974999843749024, |
|
"precision": 0.9975124378109453, |
|
"recall": 0.9975, |
|
"support": 800.0 |
|
} |
|
}, |
|
"eval_loss": 0.010667502880096436, |
|
"eval_runtime": 35.7885, |
|
"eval_samples_per_second": 22.354, |
|
"eval_steps_per_second": 1.397, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.9975, |
|
"eval_classification_report": { |
|
"accuracy": 0.9975, |
|
"macro avg": { |
|
"f1-score": 0.9974999843749024, |
|
"precision": 0.9975124378109452, |
|
"recall": 0.9975, |
|
"support": 800.0 |
|
}, |
|
"neural": { |
|
"f1-score": 0.9974937343358395, |
|
"precision": 1.0, |
|
"recall": 0.995, |
|
"support": 400.0 |
|
}, |
|
"real": { |
|
"f1-score": 0.9975062344139651, |
|
"precision": 0.9950248756218906, |
|
"recall": 1.0, |
|
"support": 400.0 |
|
}, |
|
"weighted avg": { |
|
"f1-score": 0.9974999843749024, |
|
"precision": 0.9975124378109453, |
|
"recall": 0.9975, |
|
"support": 800.0 |
|
} |
|
}, |
|
"eval_loss": 0.016876069828867912, |
|
"eval_runtime": 29.7468, |
|
"eval_samples_per_second": 26.894, |
|
"eval_steps_per_second": 1.681, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 4.655172413793104e-05, |
|
"loss": 0.0059, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.9975, |
|
"eval_classification_report": { |
|
"accuracy": 0.9975, |
|
"macro avg": { |
|
"f1-score": 0.9974999843749024, |
|
"precision": 0.9975124378109452, |
|
"recall": 0.9975, |
|
"support": 800.0 |
|
}, |
|
"neural": { |
|
"f1-score": 0.9974937343358395, |
|
"precision": 1.0, |
|
"recall": 0.995, |
|
"support": 400.0 |
|
}, |
|
"real": { |
|
"f1-score": 0.9975062344139651, |
|
"precision": 0.9950248756218906, |
|
"recall": 1.0, |
|
"support": 400.0 |
|
}, |
|
"weighted avg": { |
|
"f1-score": 0.9974999843749024, |
|
"precision": 0.9975124378109453, |
|
"recall": 0.9975, |
|
"support": 800.0 |
|
} |
|
}, |
|
"eval_loss": 0.015976013615727425, |
|
"eval_runtime": 30.2383, |
|
"eval_samples_per_second": 26.457, |
|
"eval_steps_per_second": 1.654, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.9975, |
|
"eval_classification_report": { |
|
"accuracy": 0.9975, |
|
"macro avg": { |
|
"f1-score": 0.9974999843749024, |
|
"precision": 0.9975124378109452, |
|
"recall": 0.9975, |
|
"support": 800.0 |
|
}, |
|
"neural": { |
|
"f1-score": 0.9974937343358395, |
|
"precision": 1.0, |
|
"recall": 0.995, |
|
"support": 400.0 |
|
}, |
|
"real": { |
|
"f1-score": 0.9975062344139651, |
|
"precision": 0.9950248756218906, |
|
"recall": 1.0, |
|
"support": 400.0 |
|
}, |
|
"weighted avg": { |
|
"f1-score": 0.9974999843749024, |
|
"precision": 0.9975124378109453, |
|
"recall": 0.9975, |
|
"support": 800.0 |
|
} |
|
}, |
|
"eval_loss": 0.015417657792568207, |
|
"eval_runtime": 26.7067, |
|
"eval_samples_per_second": 29.955, |
|
"eval_steps_per_second": 1.872, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.9975, |
|
"eval_classification_report": { |
|
"accuracy": 0.9975, |
|
"macro avg": { |
|
"f1-score": 0.9974999843749024, |
|
"precision": 0.9975124378109452, |
|
"recall": 0.9975, |
|
"support": 800.0 |
|
}, |
|
"neural": { |
|
"f1-score": 0.9974937343358395, |
|
"precision": 1.0, |
|
"recall": 0.995, |
|
"support": 400.0 |
|
}, |
|
"real": { |
|
"f1-score": 0.9975062344139651, |
|
"precision": 0.9950248756218906, |
|
"recall": 1.0, |
|
"support": 400.0 |
|
}, |
|
"weighted avg": { |
|
"f1-score": 0.9974999843749024, |
|
"precision": 0.9975124378109453, |
|
"recall": 0.9975, |
|
"support": 800.0 |
|
} |
|
}, |
|
"eval_loss": 0.017138773575425148, |
|
"eval_runtime": 28.9695, |
|
"eval_samples_per_second": 27.615, |
|
"eval_steps_per_second": 1.726, |
|
"step": 1800 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 15000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 100, |
|
"save_steps": 500, |
|
"total_flos": 7577598394368000.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|