|
{ |
|
"best_metric": 0.9714, |
|
"best_model_checkpoint": "models/distilcamembert-allocine\\checkpoint-5500", |
|
"epoch": 3.0, |
|
"global_step": 7500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.0000000000000001e-07, |
|
"loss": 0.7377, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.5782, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.1837, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.1665, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 5e-05, |
|
"loss": 0.1504, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_accuracy": 0.95545, |
|
"eval_f1": 0.9541784520442272, |
|
"eval_loss": 0.1289823055267334, |
|
"eval_precision": 0.9614467820499534, |
|
"eval_recall": 0.9470191915067374, |
|
"eval_runtime": 51.8793, |
|
"eval_samples_per_second": 385.51, |
|
"eval_steps_per_second": 24.094, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.910714285714286e-05, |
|
"loss": 0.1469, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.8214285714285716e-05, |
|
"loss": 0.1363, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.732142857142857e-05, |
|
"loss": 0.1332, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.642857142857143e-05, |
|
"loss": 0.1334, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_accuracy": 0.96235, |
|
"eval_f1": 0.9618984971917219, |
|
"eval_loss": 0.10494959354400635, |
|
"eval_precision": 0.9536470352162135, |
|
"eval_recall": 0.9702939975500204, |
|
"eval_runtime": 52.9168, |
|
"eval_samples_per_second": 377.951, |
|
"eval_steps_per_second": 23.622, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.5535714285714286e-05, |
|
"loss": 0.1286, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.464285714285715e-05, |
|
"loss": 0.1322, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.375e-05, |
|
"loss": 0.1214, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.2857142857142856e-05, |
|
"loss": 0.1158, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_accuracy": 0.963, |
|
"eval_f1": 0.962742926190716, |
|
"eval_loss": 0.10521914064884186, |
|
"eval_precision": 0.9498311146433539, |
|
"eval_recall": 0.9760106165781952, |
|
"eval_runtime": 52.1412, |
|
"eval_samples_per_second": 383.574, |
|
"eval_steps_per_second": 23.973, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.196428571428572e-05, |
|
"loss": 0.1146, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.107142857142857e-05, |
|
"loss": 0.1154, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.017857142857143e-05, |
|
"loss": 0.1246, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.928571428571429e-05, |
|
"loss": 0.1153, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_accuracy": 0.9661, |
|
"eval_f1": 0.9652770664754685, |
|
"eval_loss": 0.09492386132478714, |
|
"eval_precision": 0.9685508735868448, |
|
"eval_recall": 0.9620253164556962, |
|
"eval_runtime": 53.0859, |
|
"eval_samples_per_second": 376.748, |
|
"eval_steps_per_second": 23.547, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.839285714285715e-05, |
|
"loss": 0.1064, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.112, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.6607142857142853e-05, |
|
"loss": 0.1062, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.571428571428572e-05, |
|
"loss": 0.1053, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.9666, |
|
"eval_f1": 0.9663374319693611, |
|
"eval_loss": 0.09356806427240372, |
|
"eval_precision": 0.95421974522293, |
|
"eval_recall": 0.9787668436096366, |
|
"eval_runtime": 52.3106, |
|
"eval_samples_per_second": 382.331, |
|
"eval_steps_per_second": 23.896, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 3.4821428571428574e-05, |
|
"loss": 0.0797, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 3.392857142857143e-05, |
|
"loss": 0.0684, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.303571428571429e-05, |
|
"loss": 0.0745, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 3.2142857142857144e-05, |
|
"loss": 0.0755, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"eval_accuracy": 0.97, |
|
"eval_f1": 0.9695400548279013, |
|
"eval_loss": 0.09874136000871658, |
|
"eval_precision": 0.964350636235104, |
|
"eval_recall": 0.9747856267864434, |
|
"eval_runtime": 52.4682, |
|
"eval_samples_per_second": 381.183, |
|
"eval_steps_per_second": 23.824, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 3.125e-05, |
|
"loss": 0.0771, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 3.0357142857142857e-05, |
|
"loss": 0.0733, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 2.9464285714285718e-05, |
|
"loss": 0.069, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 2.857142857142857e-05, |
|
"loss": 0.0716, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"eval_accuracy": 0.9688, |
|
"eval_f1": 0.9684370257966616, |
|
"eval_loss": 0.10781414806842804, |
|
"eval_precision": 0.9597954682173652, |
|
"eval_recall": 0.9772356063699469, |
|
"eval_runtime": 52.2793, |
|
"eval_samples_per_second": 382.561, |
|
"eval_steps_per_second": 23.91, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 2.767857142857143e-05, |
|
"loss": 0.0781, |
|
"step": 3625 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 2.6785714285714288e-05, |
|
"loss": 0.0725, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.5892857142857148e-05, |
|
"loss": 0.0755, |
|
"step": 3875 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.0688, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_accuracy": 0.9673, |
|
"eval_f1": 0.967032967032967, |
|
"eval_loss": 0.10506118088960648, |
|
"eval_precision": 0.9551882095200159, |
|
"eval_recall": 0.9791751735402205, |
|
"eval_runtime": 52.2828, |
|
"eval_samples_per_second": 382.535, |
|
"eval_steps_per_second": 23.908, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 2.4107142857142858e-05, |
|
"loss": 0.07, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 2.3214285714285715e-05, |
|
"loss": 0.0735, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 2.2321428571428575e-05, |
|
"loss": 0.0748, |
|
"step": 4375 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 2.1428571428571428e-05, |
|
"loss": 0.0691, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"eval_accuracy": 0.97095, |
|
"eval_f1": 0.9703949044585987, |
|
"eval_loss": 0.09402387589216232, |
|
"eval_precision": 0.9687658968358938, |
|
"eval_recall": 0.972029399755002, |
|
"eval_runtime": 52.5632, |
|
"eval_samples_per_second": 380.494, |
|
"eval_steps_per_second": 23.781, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 2.0535714285714285e-05, |
|
"loss": 0.0655, |
|
"step": 4625 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 1.9642857142857145e-05, |
|
"loss": 0.0714, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 1.8750000000000002e-05, |
|
"loss": 0.0783, |
|
"step": 4875 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.785714285714286e-05, |
|
"loss": 0.0733, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.96855, |
|
"eval_f1": 0.9683171309122047, |
|
"eval_loss": 0.10380826145410538, |
|
"eval_precision": 0.9557522123893806, |
|
"eval_recall": 0.9812168231931401, |
|
"eval_runtime": 52.269, |
|
"eval_samples_per_second": 382.636, |
|
"eval_steps_per_second": 23.915, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 1.6964285714285715e-05, |
|
"loss": 0.0461, |
|
"step": 5125 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 1.6071428571428572e-05, |
|
"loss": 0.0451, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 1.5178571428571429e-05, |
|
"loss": 0.0502, |
|
"step": 5375 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 1.4285714285714285e-05, |
|
"loss": 0.0476, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"eval_accuracy": 0.9714, |
|
"eval_f1": 0.9709909727152854, |
|
"eval_loss": 0.10657692700624466, |
|
"eval_precision": 0.9648256399919372, |
|
"eval_recall": 0.9772356063699469, |
|
"eval_runtime": 51.6788, |
|
"eval_samples_per_second": 387.006, |
|
"eval_steps_per_second": 24.188, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 1.3392857142857144e-05, |
|
"loss": 0.0522, |
|
"step": 5625 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.0476, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 1.1607142857142857e-05, |
|
"loss": 0.0404, |
|
"step": 5875 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 1.0714285714285714e-05, |
|
"loss": 0.047, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"eval_accuracy": 0.96895, |
|
"eval_f1": 0.9686316108501288, |
|
"eval_loss": 0.10983184725046158, |
|
"eval_precision": 0.9587041295870413, |
|
"eval_recall": 0.9787668436096366, |
|
"eval_runtime": 52.0939, |
|
"eval_samples_per_second": 383.922, |
|
"eval_steps_per_second": 23.995, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 9.821428571428573e-06, |
|
"loss": 0.0426, |
|
"step": 6125 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 8.92857142857143e-06, |
|
"loss": 0.0396, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 8.035714285714286e-06, |
|
"loss": 0.0481, |
|
"step": 6375 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 7.142857142857143e-06, |
|
"loss": 0.0431, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"eval_accuracy": 0.9711, |
|
"eval_f1": 0.9706211243265224, |
|
"eval_loss": 0.11103978008031845, |
|
"eval_precision": 0.9665924276169265, |
|
"eval_recall": 0.9746835443037974, |
|
"eval_runtime": 54.7325, |
|
"eval_samples_per_second": 365.413, |
|
"eval_steps_per_second": 22.838, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.0396, |
|
"step": 6625 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 5.357142857142857e-06, |
|
"loss": 0.0446, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 4.464285714285715e-06, |
|
"loss": 0.0362, |
|
"step": 6875 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 3.5714285714285714e-06, |
|
"loss": 0.0464, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"eval_accuracy": 0.9697, |
|
"eval_f1": 0.9693970306029694, |
|
"eval_loss": 0.11486733704805374, |
|
"eval_precision": 0.9592244653208075, |
|
"eval_recall": 0.9797876684360963, |
|
"eval_runtime": 52.2973, |
|
"eval_samples_per_second": 382.429, |
|
"eval_steps_per_second": 23.902, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 2.6785714285714285e-06, |
|
"loss": 0.0431, |
|
"step": 7125 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 1.7857142857142857e-06, |
|
"loss": 0.0343, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 8.928571428571428e-07, |
|
"loss": 0.0467, |
|
"step": 7375 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 0.0, |
|
"loss": 0.0342, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.9703, |
|
"eval_f1": 0.9699270959902794, |
|
"eval_loss": 0.11218974739313126, |
|
"eval_precision": 0.9621333869023704, |
|
"eval_recall": 0.9778481012658228, |
|
"eval_runtime": 52.6026, |
|
"eval_samples_per_second": 380.209, |
|
"eval_steps_per_second": 23.763, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 7500, |
|
"total_flos": 4.553211650587354e+16, |
|
"train_loss": 0.08937127710183461, |
|
"train_runtime": 4426.0374, |
|
"train_samples_per_second": 108.449, |
|
"train_steps_per_second": 1.695 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.9714, |
|
"eval_f1": 0.9709909727152854, |
|
"eval_loss": 0.10657692700624466, |
|
"eval_precision": 0.9648256399919372, |
|
"eval_recall": 0.9772356063699469, |
|
"eval_runtime": 52.9584, |
|
"eval_samples_per_second": 377.655, |
|
"eval_steps_per_second": 23.603, |
|
"step": 7500 |
|
} |
|
], |
|
"max_steps": 7500, |
|
"num_train_epochs": 3, |
|
"total_flos": 4.553211650587354e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|