|
{ |
|
"best_metric": 0.7999999999999999, |
|
"best_model_checkpoint": "/home/bel3/content/model_folder//finetune/sst2/checkpoint-2000", |
|
"epoch": 8.10126582278481, |
|
"global_step": 6400, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.25, |
|
"eval_accuracy": 0.7637795209884644, |
|
"eval_f1": 0.7894736842105263, |
|
"eval_loss": 0.5017877817153931, |
|
"eval_runtime": 0.8927, |
|
"eval_samples_per_second": 569.063, |
|
"eval_steps_per_second": 71.693, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_accuracy": 0.8070865869522095, |
|
"eval_f1": 0.8178438661710037, |
|
"eval_loss": 0.4368048310279846, |
|
"eval_runtime": 0.896, |
|
"eval_samples_per_second": 566.985, |
|
"eval_steps_per_second": 71.431, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.683544303797468e-05, |
|
"loss": 0.4948, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_accuracy": 0.789370059967041, |
|
"eval_f1": 0.7906066536203523, |
|
"eval_loss": 0.45398545265197754, |
|
"eval_runtime": 0.895, |
|
"eval_samples_per_second": 567.578, |
|
"eval_steps_per_second": 71.506, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"eval_accuracy": 0.8011810779571533, |
|
"eval_f1": 0.7908902691511387, |
|
"eval_loss": 0.44997021555900574, |
|
"eval_runtime": 0.9042, |
|
"eval_samples_per_second": 561.806, |
|
"eval_steps_per_second": 70.779, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 4.367088607594937e-05, |
|
"loss": 0.3027, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"eval_accuracy": 0.8129921555519104, |
|
"eval_f1": 0.8155339805825244, |
|
"eval_loss": 0.4525600075721741, |
|
"eval_runtime": 0.833, |
|
"eval_samples_per_second": 609.864, |
|
"eval_steps_per_second": 76.833, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"eval_accuracy": 0.8208661675453186, |
|
"eval_f1": 0.8253358925143954, |
|
"eval_loss": 0.4560699462890625, |
|
"eval_runtime": 0.8459, |
|
"eval_samples_per_second": 600.52, |
|
"eval_steps_per_second": 75.656, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"eval_accuracy": 0.8149606585502625, |
|
"eval_f1": 0.812, |
|
"eval_loss": 0.4809434413909912, |
|
"eval_runtime": 0.8476, |
|
"eval_samples_per_second": 599.306, |
|
"eval_steps_per_second": 75.503, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 4.050632911392405e-05, |
|
"loss": 0.236, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"eval_accuracy": 0.8169291615486145, |
|
"eval_f1": 0.8228571428571428, |
|
"eval_loss": 0.6252030730247498, |
|
"eval_runtime": 0.9277, |
|
"eval_samples_per_second": 547.599, |
|
"eval_steps_per_second": 68.989, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"eval_accuracy": 0.8149606585502625, |
|
"eval_f1": 0.8142292490118577, |
|
"eval_loss": 0.6088564395904541, |
|
"eval_runtime": 0.8559, |
|
"eval_samples_per_second": 593.542, |
|
"eval_steps_per_second": 74.777, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 3.7341772151898736e-05, |
|
"loss": 0.1743, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"eval_accuracy": 0.8070865869522095, |
|
"eval_f1": 0.7999999999999999, |
|
"eval_loss": 0.593163251876831, |
|
"eval_runtime": 0.8297, |
|
"eval_samples_per_second": 612.294, |
|
"eval_steps_per_second": 77.139, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"eval_accuracy": 0.8090550899505615, |
|
"eval_f1": 0.7991718426501037, |
|
"eval_loss": 0.5563398599624634, |
|
"eval_runtime": 0.8844, |
|
"eval_samples_per_second": 574.387, |
|
"eval_steps_per_second": 72.364, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"eval_accuracy": 0.7972440719604492, |
|
"eval_f1": 0.8052930056710774, |
|
"eval_loss": 0.5898112058639526, |
|
"eval_runtime": 0.9275, |
|
"eval_samples_per_second": 547.695, |
|
"eval_steps_per_second": 69.001, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 3.4177215189873416e-05, |
|
"loss": 0.1599, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"eval_accuracy": 0.8228346705436707, |
|
"eval_f1": 0.8228346456692914, |
|
"eval_loss": 0.538675844669342, |
|
"eval_runtime": 0.9213, |
|
"eval_samples_per_second": 551.421, |
|
"eval_steps_per_second": 69.47, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"eval_accuracy": 0.8110235929489136, |
|
"eval_f1": 0.8087649402390438, |
|
"eval_loss": 0.5058029890060425, |
|
"eval_runtime": 0.9251, |
|
"eval_samples_per_second": 549.151, |
|
"eval_steps_per_second": 69.184, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"learning_rate": 3.10126582278481e-05, |
|
"loss": 0.1218, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"eval_accuracy": 0.8248031735420227, |
|
"eval_f1": 0.8216432865731464, |
|
"eval_loss": 0.6525737047195435, |
|
"eval_runtime": 0.8811, |
|
"eval_samples_per_second": 576.522, |
|
"eval_steps_per_second": 72.633, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"eval_accuracy": 0.8110235929489136, |
|
"eval_f1": 0.8102766798418972, |
|
"eval_loss": 0.8057999610900879, |
|
"eval_runtime": 0.8465, |
|
"eval_samples_per_second": 600.11, |
|
"eval_steps_per_second": 75.604, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 4.3, |
|
"eval_accuracy": 0.8110235929489136, |
|
"eval_f1": 0.8032786885245902, |
|
"eval_loss": 0.7197728753089905, |
|
"eval_runtime": 0.8474, |
|
"eval_samples_per_second": 599.485, |
|
"eval_steps_per_second": 75.526, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 4.43, |
|
"learning_rate": 2.7848101265822786e-05, |
|
"loss": 0.0955, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 4.56, |
|
"eval_accuracy": 0.8031495809555054, |
|
"eval_f1": 0.7967479674796747, |
|
"eval_loss": 0.7408320903778076, |
|
"eval_runtime": 0.8491, |
|
"eval_samples_per_second": 598.276, |
|
"eval_steps_per_second": 75.373, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"eval_accuracy": 0.8011810779571533, |
|
"eval_f1": 0.8112149532710279, |
|
"eval_loss": 0.7727562785148621, |
|
"eval_runtime": 0.8979, |
|
"eval_samples_per_second": 565.791, |
|
"eval_steps_per_second": 71.281, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 5.06, |
|
"learning_rate": 2.468354430379747e-05, |
|
"loss": 0.0858, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 5.06, |
|
"eval_accuracy": 0.8051180839538574, |
|
"eval_f1": 0.800804828973843, |
|
"eval_loss": 0.8653830885887146, |
|
"eval_runtime": 0.8947, |
|
"eval_samples_per_second": 567.8, |
|
"eval_steps_per_second": 71.534, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 5.32, |
|
"eval_accuracy": 0.789370059967041, |
|
"eval_f1": 0.8065099457504521, |
|
"eval_loss": 0.9238587021827698, |
|
"eval_runtime": 0.8912, |
|
"eval_samples_per_second": 570.005, |
|
"eval_steps_per_second": 71.812, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 5.57, |
|
"eval_accuracy": 0.7814960479736328, |
|
"eval_f1": 0.7940630797773653, |
|
"eval_loss": 0.8553095459938049, |
|
"eval_runtime": 0.8957, |
|
"eval_samples_per_second": 567.124, |
|
"eval_steps_per_second": 71.449, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 5.7, |
|
"learning_rate": 2.1518987341772153e-05, |
|
"loss": 0.0634, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 5.82, |
|
"eval_accuracy": 0.8110235929489136, |
|
"eval_f1": 0.817490494296578, |
|
"eval_loss": 0.6774270534515381, |
|
"eval_runtime": 0.8879, |
|
"eval_samples_per_second": 572.147, |
|
"eval_steps_per_second": 72.081, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 6.08, |
|
"eval_accuracy": 0.7992125749588013, |
|
"eval_f1": 0.8038461538461538, |
|
"eval_loss": 0.9549906849861145, |
|
"eval_runtime": 0.9051, |
|
"eval_samples_per_second": 561.292, |
|
"eval_steps_per_second": 70.714, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 6.33, |
|
"learning_rate": 1.8354430379746836e-05, |
|
"loss": 0.0548, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 6.33, |
|
"eval_accuracy": 0.8031495809555054, |
|
"eval_f1": 0.8091603053435114, |
|
"eval_loss": 1.1089578866958618, |
|
"eval_runtime": 0.8777, |
|
"eval_samples_per_second": 578.768, |
|
"eval_steps_per_second": 72.916, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 6.58, |
|
"eval_accuracy": 0.8051180839538574, |
|
"eval_f1": 0.809248554913295, |
|
"eval_loss": 1.028864860534668, |
|
"eval_runtime": 0.8922, |
|
"eval_samples_per_second": 569.357, |
|
"eval_steps_per_second": 71.73, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 6.84, |
|
"eval_accuracy": 0.7952755689620972, |
|
"eval_f1": 0.8066914498141264, |
|
"eval_loss": 0.9950660467147827, |
|
"eval_runtime": 0.894, |
|
"eval_samples_per_second": 568.236, |
|
"eval_steps_per_second": 71.589, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 6.96, |
|
"learning_rate": 1.5189873417721521e-05, |
|
"loss": 0.0473, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 7.09, |
|
"eval_accuracy": 0.8110235929489136, |
|
"eval_f1": 0.8117647058823529, |
|
"eval_loss": 1.1159313917160034, |
|
"eval_runtime": 0.8468, |
|
"eval_samples_per_second": 599.932, |
|
"eval_steps_per_second": 75.582, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 7.34, |
|
"eval_accuracy": 0.8090550899505615, |
|
"eval_f1": 0.8131021194605009, |
|
"eval_loss": 1.0203063488006592, |
|
"eval_runtime": 0.9245, |
|
"eval_samples_per_second": 549.472, |
|
"eval_steps_per_second": 69.225, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 7.59, |
|
"learning_rate": 1.2025316455696203e-05, |
|
"loss": 0.036, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 7.59, |
|
"eval_accuracy": 0.8149606585502625, |
|
"eval_f1": 0.815686274509804, |
|
"eval_loss": 1.0656845569610596, |
|
"eval_runtime": 0.9107, |
|
"eval_samples_per_second": 557.814, |
|
"eval_steps_per_second": 70.276, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 7.85, |
|
"eval_accuracy": 0.8129921555519104, |
|
"eval_f1": 0.8140900195694716, |
|
"eval_loss": 1.0237640142440796, |
|
"eval_runtime": 0.85, |
|
"eval_samples_per_second": 597.63, |
|
"eval_steps_per_second": 75.292, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 8.1, |
|
"eval_accuracy": 0.8208661675453186, |
|
"eval_f1": 0.8205128205128205, |
|
"eval_loss": 1.1597448587417603, |
|
"eval_runtime": 0.8983, |
|
"eval_samples_per_second": 565.501, |
|
"eval_steps_per_second": 71.244, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 8.1, |
|
"step": 6400, |
|
"total_flos": 1.3556183708860416e+16, |
|
"train_loss": 0.14836265951395033, |
|
"train_runtime": 2029.2795, |
|
"train_samples_per_second": 248.995, |
|
"train_steps_per_second": 3.893 |
|
} |
|
], |
|
"max_steps": 7900, |
|
"num_train_epochs": 10, |
|
"total_flos": 1.3556183708860416e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|