|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.3269309358398038, |
|
"eval_steps": 50, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0016346546791990192, |
|
"eval_loss": 2.2051539421081543, |
|
"eval_runtime": 12.0166, |
|
"eval_samples_per_second": 21.47, |
|
"eval_steps_per_second": 10.735, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.016346546791990192, |
|
"grad_norm": 2.336392641067505, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5648, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.032693093583980384, |
|
"grad_norm": 2.5723531246185303, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5003, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.049039640375970577, |
|
"grad_norm": 2.1780683994293213, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2044, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.06538618716796077, |
|
"grad_norm": 2.092310667037964, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3239, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.08173273395995095, |
|
"grad_norm": 1.9188865423202515, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2587, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.08173273395995095, |
|
"eval_loss": 1.308348298072815, |
|
"eval_runtime": 10.9588, |
|
"eval_samples_per_second": 23.543, |
|
"eval_steps_per_second": 11.771, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.09807928075194115, |
|
"grad_norm": 1.4257837533950806, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1839, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.11442582754393134, |
|
"grad_norm": 2.2847580909729004, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2293, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.13077237433592154, |
|
"grad_norm": 1.782516598701477, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2192, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.14711892112791172, |
|
"grad_norm": 1.4419081211090088, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2495, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.1634654679199019, |
|
"grad_norm": 1.44297456741333, |
|
"learning_rate": 0.0002, |
|
"loss": 1.296, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.1634654679199019, |
|
"eval_loss": 1.2778656482696533, |
|
"eval_runtime": 10.9625, |
|
"eval_samples_per_second": 23.535, |
|
"eval_steps_per_second": 11.767, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.17981201471189212, |
|
"grad_norm": 1.9191259145736694, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2289, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.1961585615038823, |
|
"grad_norm": 1.459952473640442, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2749, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.2125051082958725, |
|
"grad_norm": 1.7037264108657837, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2096, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.22885165508786268, |
|
"grad_norm": 2.016850709915161, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2308, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.2451982018798529, |
|
"grad_norm": 2.183500051498413, |
|
"learning_rate": 0.0002, |
|
"loss": 1.295, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.2451982018798529, |
|
"eval_loss": 1.2615169286727905, |
|
"eval_runtime": 10.9704, |
|
"eval_samples_per_second": 23.518, |
|
"eval_steps_per_second": 11.759, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.2615447486718431, |
|
"grad_norm": 1.81687331199646, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2538, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.2778912954638333, |
|
"grad_norm": 1.7752971649169922, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0968, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.29423784225582345, |
|
"grad_norm": 1.5799192190170288, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1737, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.31058438904781366, |
|
"grad_norm": 1.6626818180084229, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2231, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.3269309358398038, |
|
"grad_norm": 1.9949339628219604, |
|
"learning_rate": 0.0002, |
|
"loss": 1.249, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.3269309358398038, |
|
"eval_loss": 1.239590048789978, |
|
"eval_runtime": 10.9647, |
|
"eval_samples_per_second": 23.53, |
|
"eval_steps_per_second": 11.765, |
|
"step": 200 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4.72600880873472e+16, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|