|
{ |
|
"best_metric": 0.8958781361579895, |
|
"best_model_checkpoint": "Llama-3.1-8B-medquad-V2/checkpoint-180", |
|
"epoch": 2.630937880633374, |
|
"eval_steps": 10, |
|
"global_step": 180, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.146163215590743, |
|
"grad_norm": 0.3197868764400482, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2503, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.146163215590743, |
|
"eval_loss": 1.1359219551086426, |
|
"eval_runtime": 294.7811, |
|
"eval_samples_per_second": 5.567, |
|
"eval_steps_per_second": 0.699, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.292326431181486, |
|
"grad_norm": 0.24248264729976654, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1182, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.292326431181486, |
|
"eval_loss": 1.0198906660079956, |
|
"eval_runtime": 294.7536, |
|
"eval_samples_per_second": 5.567, |
|
"eval_steps_per_second": 0.699, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.438489646772229, |
|
"grad_norm": 0.21424424648284912, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0864, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.438489646772229, |
|
"eval_loss": 0.9855759143829346, |
|
"eval_runtime": 294.9371, |
|
"eval_samples_per_second": 5.564, |
|
"eval_steps_per_second": 0.698, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.584652862362972, |
|
"grad_norm": 0.1644379198551178, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9031, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.584652862362972, |
|
"eval_loss": 0.9680554866790771, |
|
"eval_runtime": 294.8273, |
|
"eval_samples_per_second": 5.566, |
|
"eval_steps_per_second": 0.699, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.730816077953715, |
|
"grad_norm": 0.3253229260444641, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0773, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.730816077953715, |
|
"eval_loss": 0.9498887062072754, |
|
"eval_runtime": 294.9458, |
|
"eval_samples_per_second": 5.564, |
|
"eval_steps_per_second": 0.698, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.876979293544458, |
|
"grad_norm": 0.17308901250362396, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9575, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.876979293544458, |
|
"eval_loss": 0.9426676630973816, |
|
"eval_runtime": 294.8574, |
|
"eval_samples_per_second": 5.565, |
|
"eval_steps_per_second": 0.699, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.0231425091352009, |
|
"grad_norm": 0.16445040702819824, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9768, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.0231425091352009, |
|
"eval_loss": 0.9452133178710938, |
|
"eval_runtime": 294.8787, |
|
"eval_samples_per_second": 5.565, |
|
"eval_steps_per_second": 0.699, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.169305724725944, |
|
"grad_norm": 0.16264739632606506, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9673, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.169305724725944, |
|
"eval_loss": 0.9263980984687805, |
|
"eval_runtime": 294.9078, |
|
"eval_samples_per_second": 5.564, |
|
"eval_steps_per_second": 0.699, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.315468940316687, |
|
"grad_norm": 0.1224495992064476, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8541, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.315468940316687, |
|
"eval_loss": 0.9281975626945496, |
|
"eval_runtime": 294.9471, |
|
"eval_samples_per_second": 5.564, |
|
"eval_steps_per_second": 0.698, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.46163215590743, |
|
"grad_norm": 0.20104917883872986, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9772, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.46163215590743, |
|
"eval_loss": 0.918040931224823, |
|
"eval_runtime": 294.8931, |
|
"eval_samples_per_second": 5.565, |
|
"eval_steps_per_second": 0.699, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.607795371498173, |
|
"grad_norm": 0.1838410645723343, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8427, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.607795371498173, |
|
"eval_loss": 0.9211308360099792, |
|
"eval_runtime": 294.9367, |
|
"eval_samples_per_second": 5.564, |
|
"eval_steps_per_second": 0.698, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.753958587088916, |
|
"grad_norm": 0.2129116952419281, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9317, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.753958587088916, |
|
"eval_loss": 0.9142090678215027, |
|
"eval_runtime": 294.8478, |
|
"eval_samples_per_second": 5.566, |
|
"eval_steps_per_second": 0.699, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.900121802679659, |
|
"grad_norm": 0.15467825531959534, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9498, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.900121802679659, |
|
"eval_loss": 0.901090681552887, |
|
"eval_runtime": 294.95, |
|
"eval_samples_per_second": 5.564, |
|
"eval_steps_per_second": 0.698, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 2.0462850182704018, |
|
"grad_norm": 0.13313396275043488, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8412, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 2.0462850182704018, |
|
"eval_loss": 0.9035805463790894, |
|
"eval_runtime": 294.9282, |
|
"eval_samples_per_second": 5.564, |
|
"eval_steps_per_second": 0.698, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 2.192448233861145, |
|
"grad_norm": 0.18854761123657227, |
|
"learning_rate": 0.0002, |
|
"loss": 0.899, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.192448233861145, |
|
"eval_loss": 0.9030548334121704, |
|
"eval_runtime": 294.8581, |
|
"eval_samples_per_second": 5.565, |
|
"eval_steps_per_second": 0.699, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.338611449451888, |
|
"grad_norm": 0.14641670882701874, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7488, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.338611449451888, |
|
"eval_loss": 0.8989503383636475, |
|
"eval_runtime": 294.8764, |
|
"eval_samples_per_second": 5.565, |
|
"eval_steps_per_second": 0.699, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.484774665042631, |
|
"grad_norm": 0.24440337717533112, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8824, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 2.484774665042631, |
|
"eval_loss": 0.9033491611480713, |
|
"eval_runtime": 294.8116, |
|
"eval_samples_per_second": 5.566, |
|
"eval_steps_per_second": 0.699, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 2.630937880633374, |
|
"grad_norm": 0.19349223375320435, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8334, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 2.630937880633374, |
|
"eval_loss": 0.8958781361579895, |
|
"eval_runtime": 294.9602, |
|
"eval_samples_per_second": 5.563, |
|
"eval_steps_per_second": 0.698, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 2.630937880633374, |
|
"step": 180, |
|
"total_flos": 3.658397647269151e+17, |
|
"train_loss": 0.9498361190160115, |
|
"train_runtime": 17857.7741, |
|
"train_samples_per_second": 5.145, |
|
"train_steps_per_second": 0.027 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 476, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 7, |
|
"save_steps": 10, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.01 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 5 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.658397647269151e+17, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|