|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 4.53257790368272, |
|
"global_step": 400, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.9974521146102535e-05, |
|
"loss": 1.0269, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.989821441880933e-05, |
|
"loss": 0.9778, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.9771468659711595e-05, |
|
"loss": 0.9404, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.9594929736144978e-05, |
|
"loss": 0.8988, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 1.936949724999762e-05, |
|
"loss": 0.8994, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.9096319953545186e-05, |
|
"loss": 0.8785, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.8776789895672557e-05, |
|
"loss": 0.8423, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.8412535328311813e-05, |
|
"loss": 0.8139, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 1.8005412409243604e-05, |
|
"loss": 0.8333, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 1.7557495743542586e-05, |
|
"loss": 0.8056, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 1.7071067811865477e-05, |
|
"loss": 0.8051, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 1.6548607339452853e-05, |
|
"loss": 0.7923, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 1.599277666511347e-05, |
|
"loss": 0.8327, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 1.5406408174555978e-05, |
|
"loss": 0.7954, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 1.479248986720057e-05, |
|
"loss": 0.797, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 1.4154150130018867e-05, |
|
"loss": 0.7754, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.3494641795990986e-05, |
|
"loss": 0.7847, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 1.2817325568414299e-05, |
|
"loss": 0.7779, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 1.2125652895529766e-05, |
|
"loss": 0.7569, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 1.1423148382732854e-05, |
|
"loss": 0.7688, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 1.0713391831992324e-05, |
|
"loss": 0.7734, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 1e-05, |
|
"loss": 0.7668, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 9.286608168007678e-06, |
|
"loss": 0.7951, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 8.576851617267151e-06, |
|
"loss": 0.7616, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 7.874347104470234e-06, |
|
"loss": 0.7619, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 7.182674431585703e-06, |
|
"loss": 0.7701, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 6.505358204009018e-06, |
|
"loss": 0.7985, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 5.845849869981137e-06, |
|
"loss": 0.75, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"learning_rate": 5.207510132799436e-06, |
|
"loss": 0.7508, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 4.593591825444028e-06, |
|
"loss": 0.7655, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 4.007223334886531e-06, |
|
"loss": 0.7686, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"learning_rate": 3.4513926605471504e-06, |
|
"loss": 0.7551, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 2.9289321881345257e-06, |
|
"loss": 0.7524, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"learning_rate": 2.4425042564574186e-06, |
|
"loss": 0.7722, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"learning_rate": 1.994587590756397e-06, |
|
"loss": 0.7471, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"learning_rate": 1.587464671688187e-06, |
|
"loss": 0.754, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"learning_rate": 1.2232101043274437e-06, |
|
"loss": 0.7549, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"learning_rate": 9.036800464548157e-07, |
|
"loss": 0.7678, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 4.42, |
|
"learning_rate": 6.305027500023841e-07, |
|
"loss": 0.7584, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 4.53, |
|
"learning_rate": 4.0507026385502747e-07, |
|
"loss": 0.7613, |
|
"step": 400 |
|
} |
|
], |
|
"max_steps": 440, |
|
"num_train_epochs": 5, |
|
"total_flos": 3.127602400495534e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|