|
{ |
|
"best_metric": 0.6574238538742065, |
|
"best_model_checkpoint": "finetuned-fake-food/checkpoint-4000", |
|
"epoch": 10.0, |
|
"eval_steps": 100, |
|
"global_step": 4130, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.24213075060532688, |
|
"grad_norm": 0.15737777948379517, |
|
"learning_rate": 0.00019515738498789345, |
|
"loss": 0.6977, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.24213075060532688, |
|
"eval_accuracy": 0.5821917808219178, |
|
"eval_loss": 0.6804767847061157, |
|
"eval_runtime": 5.525, |
|
"eval_samples_per_second": 26.425, |
|
"eval_steps_per_second": 3.439, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.48426150121065376, |
|
"grad_norm": 0.03558634594082832, |
|
"learning_rate": 0.00019031476997578695, |
|
"loss": 0.6956, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.48426150121065376, |
|
"eval_accuracy": 0.4178082191780822, |
|
"eval_loss": 0.6935968399047852, |
|
"eval_runtime": 5.2465, |
|
"eval_samples_per_second": 27.828, |
|
"eval_steps_per_second": 3.621, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.7263922518159807, |
|
"grad_norm": 1.5009288787841797, |
|
"learning_rate": 0.0001854721549636804, |
|
"loss": 0.6795, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.7263922518159807, |
|
"eval_accuracy": 0.6506849315068494, |
|
"eval_loss": 0.6734184622764587, |
|
"eval_runtime": 5.9372, |
|
"eval_samples_per_second": 24.591, |
|
"eval_steps_per_second": 3.2, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.9685230024213075, |
|
"grad_norm": 0.23967677354812622, |
|
"learning_rate": 0.00018062953995157384, |
|
"loss": 0.7061, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.9685230024213075, |
|
"eval_accuracy": 0.5821917808219178, |
|
"eval_loss": 0.6760488152503967, |
|
"eval_runtime": 5.7531, |
|
"eval_samples_per_second": 25.378, |
|
"eval_steps_per_second": 3.303, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.2106537530266344, |
|
"grad_norm": 2.081388235092163, |
|
"learning_rate": 0.00017578692493946732, |
|
"loss": 0.6941, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.2106537530266344, |
|
"eval_accuracy": 0.5821917808219178, |
|
"eval_loss": 0.6746240854263306, |
|
"eval_runtime": 5.812, |
|
"eval_samples_per_second": 25.121, |
|
"eval_steps_per_second": 3.269, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.4527845036319613, |
|
"grad_norm": 0.4285804331302643, |
|
"learning_rate": 0.0001709443099273608, |
|
"loss": 0.6898, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.4527845036319613, |
|
"eval_accuracy": 0.6027397260273972, |
|
"eval_loss": 0.6674954891204834, |
|
"eval_runtime": 6.1547, |
|
"eval_samples_per_second": 23.722, |
|
"eval_steps_per_second": 3.087, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.694915254237288, |
|
"grad_norm": 0.12052281200885773, |
|
"learning_rate": 0.00016610169491525423, |
|
"loss": 0.6956, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.694915254237288, |
|
"eval_accuracy": 0.5753424657534246, |
|
"eval_loss": 0.684603750705719, |
|
"eval_runtime": 6.0144, |
|
"eval_samples_per_second": 24.275, |
|
"eval_steps_per_second": 3.159, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.937046004842615, |
|
"grad_norm": 0.3585425913333893, |
|
"learning_rate": 0.0001612590799031477, |
|
"loss": 0.6847, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.937046004842615, |
|
"eval_accuracy": 0.5821917808219178, |
|
"eval_loss": 0.6745873093605042, |
|
"eval_runtime": 5.8427, |
|
"eval_samples_per_second": 24.988, |
|
"eval_steps_per_second": 3.252, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.179176755447942, |
|
"grad_norm": 1.2445541620254517, |
|
"learning_rate": 0.00015641646489104115, |
|
"loss": 0.6949, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.179176755447942, |
|
"eval_accuracy": 0.589041095890411, |
|
"eval_loss": 0.6779718399047852, |
|
"eval_runtime": 4.8307, |
|
"eval_samples_per_second": 30.223, |
|
"eval_steps_per_second": 3.933, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.4213075060532687, |
|
"grad_norm": 1.429865837097168, |
|
"learning_rate": 0.00015157384987893465, |
|
"loss": 0.703, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.4213075060532687, |
|
"eval_accuracy": 0.5753424657534246, |
|
"eval_loss": 0.6894732117652893, |
|
"eval_runtime": 5.0834, |
|
"eval_samples_per_second": 28.721, |
|
"eval_steps_per_second": 3.738, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.663438256658596, |
|
"grad_norm": 1.2485073804855347, |
|
"learning_rate": 0.0001467312348668281, |
|
"loss": 0.6851, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.663438256658596, |
|
"eval_accuracy": 0.5821917808219178, |
|
"eval_loss": 0.6741558909416199, |
|
"eval_runtime": 6.0858, |
|
"eval_samples_per_second": 23.99, |
|
"eval_steps_per_second": 3.122, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.9055690072639226, |
|
"grad_norm": 0.1997382789850235, |
|
"learning_rate": 0.00014188861985472154, |
|
"loss": 0.6878, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.9055690072639226, |
|
"eval_accuracy": 0.6301369863013698, |
|
"eval_loss": 0.674239456653595, |
|
"eval_runtime": 6.1499, |
|
"eval_samples_per_second": 23.74, |
|
"eval_steps_per_second": 3.089, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 3.1476997578692494, |
|
"grad_norm": 0.4324168860912323, |
|
"learning_rate": 0.00013704600484261504, |
|
"loss": 0.68, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 3.1476997578692494, |
|
"eval_accuracy": 0.5821917808219178, |
|
"eval_loss": 0.6712663173675537, |
|
"eval_runtime": 6.0266, |
|
"eval_samples_per_second": 24.226, |
|
"eval_steps_per_second": 3.153, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 3.389830508474576, |
|
"grad_norm": 0.9948041439056396, |
|
"learning_rate": 0.00013220338983050849, |
|
"loss": 0.6728, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 3.389830508474576, |
|
"eval_accuracy": 0.5958904109589042, |
|
"eval_loss": 0.6838211417198181, |
|
"eval_runtime": 6.1631, |
|
"eval_samples_per_second": 23.689, |
|
"eval_steps_per_second": 3.083, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 3.6319612590799033, |
|
"grad_norm": 1.2490299940109253, |
|
"learning_rate": 0.00012736077481840193, |
|
"loss": 0.698, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.6319612590799033, |
|
"eval_accuracy": 0.5821917808219178, |
|
"eval_loss": 0.6774668097496033, |
|
"eval_runtime": 6.0857, |
|
"eval_samples_per_second": 23.991, |
|
"eval_steps_per_second": 3.122, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.87409200968523, |
|
"grad_norm": 0.2908919155597687, |
|
"learning_rate": 0.0001225181598062954, |
|
"loss": 0.7033, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 3.87409200968523, |
|
"eval_accuracy": 0.5821917808219178, |
|
"eval_loss": 0.6734635829925537, |
|
"eval_runtime": 5.783, |
|
"eval_samples_per_second": 25.247, |
|
"eval_steps_per_second": 3.286, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 4.116222760290557, |
|
"grad_norm": 0.21098549664020538, |
|
"learning_rate": 0.00011767554479418887, |
|
"loss": 0.6973, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 4.116222760290557, |
|
"eval_accuracy": 0.6232876712328768, |
|
"eval_loss": 0.6803831458091736, |
|
"eval_runtime": 4.6265, |
|
"eval_samples_per_second": 31.557, |
|
"eval_steps_per_second": 4.107, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 4.358353510895884, |
|
"grad_norm": 0.03869936615228653, |
|
"learning_rate": 0.00011283292978208233, |
|
"loss": 0.6822, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 4.358353510895884, |
|
"eval_accuracy": 0.6027397260273972, |
|
"eval_loss": 0.6847726702690125, |
|
"eval_runtime": 4.6717, |
|
"eval_samples_per_second": 31.252, |
|
"eval_steps_per_second": 4.067, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 4.600484261501211, |
|
"grad_norm": 0.13196176290512085, |
|
"learning_rate": 0.00010799031476997579, |
|
"loss": 0.6896, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 4.600484261501211, |
|
"eval_accuracy": 0.541095890410959, |
|
"eval_loss": 0.6835151314735413, |
|
"eval_runtime": 5.191, |
|
"eval_samples_per_second": 28.126, |
|
"eval_steps_per_second": 3.66, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 4.842615012106537, |
|
"grad_norm": 0.5055987238883972, |
|
"learning_rate": 0.00010314769975786926, |
|
"loss": 0.6772, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 4.842615012106537, |
|
"eval_accuracy": 0.6095890410958904, |
|
"eval_loss": 0.6753013134002686, |
|
"eval_runtime": 6.1231, |
|
"eval_samples_per_second": 23.844, |
|
"eval_steps_per_second": 3.103, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 5.084745762711864, |
|
"grad_norm": 0.4209335148334503, |
|
"learning_rate": 9.835351089588378e-05, |
|
"loss": 0.6843, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 5.084745762711864, |
|
"eval_accuracy": 0.589041095890411, |
|
"eval_loss": 0.6667279601097107, |
|
"eval_runtime": 4.6413, |
|
"eval_samples_per_second": 31.457, |
|
"eval_steps_per_second": 4.094, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 5.326876513317191, |
|
"grad_norm": 1.4965670108795166, |
|
"learning_rate": 9.351089588377724e-05, |
|
"loss": 0.6898, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 5.326876513317191, |
|
"eval_accuracy": 0.5821917808219178, |
|
"eval_loss": 0.6725605726242065, |
|
"eval_runtime": 6.034, |
|
"eval_samples_per_second": 24.196, |
|
"eval_steps_per_second": 3.149, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 5.5690072639225185, |
|
"grad_norm": 0.4163062572479248, |
|
"learning_rate": 8.86682808716707e-05, |
|
"loss": 0.6868, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 5.5690072639225185, |
|
"eval_accuracy": 0.5616438356164384, |
|
"eval_loss": 0.6784049272537231, |
|
"eval_runtime": 6.0533, |
|
"eval_samples_per_second": 24.119, |
|
"eval_steps_per_second": 3.139, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 5.811138014527845, |
|
"grad_norm": 1.2287280559539795, |
|
"learning_rate": 8.382566585956417e-05, |
|
"loss": 0.6636, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 5.811138014527845, |
|
"eval_accuracy": 0.6301369863013698, |
|
"eval_loss": 0.6639688611030579, |
|
"eval_runtime": 5.3104, |
|
"eval_samples_per_second": 27.493, |
|
"eval_steps_per_second": 3.578, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 6.053268765133172, |
|
"grad_norm": 0.8932170867919922, |
|
"learning_rate": 7.898305084745763e-05, |
|
"loss": 0.6833, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 6.053268765133172, |
|
"eval_accuracy": 0.5136986301369864, |
|
"eval_loss": 0.676824688911438, |
|
"eval_runtime": 4.6631, |
|
"eval_samples_per_second": 31.309, |
|
"eval_steps_per_second": 4.074, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 6.295399515738499, |
|
"grad_norm": 1.1837154626846313, |
|
"learning_rate": 7.414043583535109e-05, |
|
"loss": 0.678, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 6.295399515738499, |
|
"eval_accuracy": 0.6232876712328768, |
|
"eval_loss": 0.6652230024337769, |
|
"eval_runtime": 4.933, |
|
"eval_samples_per_second": 29.597, |
|
"eval_steps_per_second": 3.852, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 6.5375302663438255, |
|
"grad_norm": 1.4030615091323853, |
|
"learning_rate": 6.929782082324455e-05, |
|
"loss": 0.6672, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 6.5375302663438255, |
|
"eval_accuracy": 0.547945205479452, |
|
"eval_loss": 0.6735221147537231, |
|
"eval_runtime": 6.0106, |
|
"eval_samples_per_second": 24.29, |
|
"eval_steps_per_second": 3.161, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 6.779661016949152, |
|
"grad_norm": 0.6782599687576294, |
|
"learning_rate": 6.445520581113802e-05, |
|
"loss": 0.6975, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 6.779661016949152, |
|
"eval_accuracy": 0.589041095890411, |
|
"eval_loss": 0.6686810851097107, |
|
"eval_runtime": 4.7582, |
|
"eval_samples_per_second": 30.684, |
|
"eval_steps_per_second": 3.993, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 7.021791767554479, |
|
"grad_norm": 0.4288092255592346, |
|
"learning_rate": 5.961259079903147e-05, |
|
"loss": 0.6858, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 7.021791767554479, |
|
"eval_accuracy": 0.6027397260273972, |
|
"eval_loss": 0.6672346591949463, |
|
"eval_runtime": 4.7612, |
|
"eval_samples_per_second": 30.665, |
|
"eval_steps_per_second": 3.991, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 7.263922518159807, |
|
"grad_norm": 1.373633861541748, |
|
"learning_rate": 5.4769975786924946e-05, |
|
"loss": 0.6687, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 7.263922518159807, |
|
"eval_accuracy": 0.5753424657534246, |
|
"eval_loss": 0.6647915840148926, |
|
"eval_runtime": 4.681, |
|
"eval_samples_per_second": 31.19, |
|
"eval_steps_per_second": 4.059, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 7.506053268765133, |
|
"grad_norm": 0.4883480668067932, |
|
"learning_rate": 4.9927360774818404e-05, |
|
"loss": 0.6636, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 7.506053268765133, |
|
"eval_accuracy": 0.5684931506849316, |
|
"eval_loss": 0.6673935055732727, |
|
"eval_runtime": 4.6832, |
|
"eval_samples_per_second": 31.175, |
|
"eval_steps_per_second": 4.057, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 7.74818401937046, |
|
"grad_norm": 0.2553524672985077, |
|
"learning_rate": 4.508474576271187e-05, |
|
"loss": 0.6904, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 7.74818401937046, |
|
"eval_accuracy": 0.5342465753424658, |
|
"eval_loss": 0.6751775741577148, |
|
"eval_runtime": 4.7128, |
|
"eval_samples_per_second": 30.979, |
|
"eval_steps_per_second": 4.032, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 7.990314769975787, |
|
"grad_norm": 0.5203524827957153, |
|
"learning_rate": 4.024213075060533e-05, |
|
"loss": 0.6585, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 7.990314769975787, |
|
"eval_accuracy": 0.5958904109589042, |
|
"eval_loss": 0.7023173570632935, |
|
"eval_runtime": 5.9291, |
|
"eval_samples_per_second": 24.624, |
|
"eval_steps_per_second": 3.205, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 8.232445520581114, |
|
"grad_norm": 1.3221914768218994, |
|
"learning_rate": 3.539951573849879e-05, |
|
"loss": 0.6874, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 8.232445520581114, |
|
"eval_accuracy": 0.5753424657534246, |
|
"eval_loss": 0.6615224480628967, |
|
"eval_runtime": 5.9877, |
|
"eval_samples_per_second": 24.383, |
|
"eval_steps_per_second": 3.173, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 8.474576271186441, |
|
"grad_norm": 0.6332941651344299, |
|
"learning_rate": 3.055690072639225e-05, |
|
"loss": 0.6444, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 8.474576271186441, |
|
"eval_accuracy": 0.5205479452054794, |
|
"eval_loss": 0.772119402885437, |
|
"eval_runtime": 5.8976, |
|
"eval_samples_per_second": 24.756, |
|
"eval_steps_per_second": 3.222, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 8.716707021791768, |
|
"grad_norm": 1.8709771633148193, |
|
"learning_rate": 2.5714285714285714e-05, |
|
"loss": 0.6803, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 8.716707021791768, |
|
"eval_accuracy": 0.5821917808219178, |
|
"eval_loss": 0.6808822751045227, |
|
"eval_runtime": 4.743, |
|
"eval_samples_per_second": 30.782, |
|
"eval_steps_per_second": 4.006, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 8.958837772397095, |
|
"grad_norm": 1.1251460313796997, |
|
"learning_rate": 2.0871670702179177e-05, |
|
"loss": 0.6782, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 8.958837772397095, |
|
"eval_accuracy": 0.5821917808219178, |
|
"eval_loss": 0.6638409495353699, |
|
"eval_runtime": 5.3184, |
|
"eval_samples_per_second": 27.452, |
|
"eval_steps_per_second": 3.573, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 9.200968523002421, |
|
"grad_norm": 0.5918538570404053, |
|
"learning_rate": 1.602905569007264e-05, |
|
"loss": 0.6536, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 9.200968523002421, |
|
"eval_accuracy": 0.6232876712328768, |
|
"eval_loss": 0.6606671214103699, |
|
"eval_runtime": 4.6821, |
|
"eval_samples_per_second": 31.183, |
|
"eval_steps_per_second": 4.058, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 9.443099273607748, |
|
"grad_norm": 1.1931524276733398, |
|
"learning_rate": 1.1186440677966102e-05, |
|
"loss": 0.6188, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 9.443099273607748, |
|
"eval_accuracy": 0.5684931506849316, |
|
"eval_loss": 0.7090215682983398, |
|
"eval_runtime": 4.6915, |
|
"eval_samples_per_second": 31.12, |
|
"eval_steps_per_second": 4.05, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 9.685230024213075, |
|
"grad_norm": 2.177264928817749, |
|
"learning_rate": 6.3438256658595635e-06, |
|
"loss": 0.7026, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 9.685230024213075, |
|
"eval_accuracy": 0.6164383561643836, |
|
"eval_loss": 0.6574238538742065, |
|
"eval_runtime": 4.7502, |
|
"eval_samples_per_second": 30.736, |
|
"eval_steps_per_second": 4.0, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 9.927360774818402, |
|
"grad_norm": 0.5541784167289734, |
|
"learning_rate": 1.549636803874092e-06, |
|
"loss": 0.7008, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 9.927360774818402, |
|
"eval_accuracy": 0.6095890410958904, |
|
"eval_loss": 0.6576805710792542, |
|
"eval_runtime": 4.7337, |
|
"eval_samples_per_second": 30.843, |
|
"eval_steps_per_second": 4.014, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 4130, |
|
"total_flos": 6.400838342165299e+17, |
|
"train_loss": 0.6831480086282725, |
|
"train_runtime": 1157.4221, |
|
"train_samples_per_second": 7.137, |
|
"train_steps_per_second": 3.568 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 4130, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 6.400838342165299e+17, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|