|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.991123701605288, |
|
"global_step": 164, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00030000000000000003, |
|
"loss": 2.5934, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0003998458072481446, |
|
"loss": 2.5887, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0003990369453344394, |
|
"loss": 2.5383, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00039753766811902755, |
|
"loss": 2.5382, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0003953531762641745, |
|
"loss": 2.5684, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00039249104729072946, |
|
"loss": 2.5694, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00038896120929337566, |
|
"loss": 2.5685, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0003847759065022574, |
|
"loss": 2.5249, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00037994965681044433, |
|
"loss": 2.5835, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00037449920141455944, |
|
"loss": 2.5326, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00036844344674325733, |
|
"loss": 2.564, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0003618033988749895, |
|
"loss": 2.5717, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0003546020906725474, |
|
"loss": 2.5149, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0003468645018871371, |
|
"loss": 2.5695, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.0003386174725091272, |
|
"loss": 2.5374, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.0003298896096660367, |
|
"loss": 2.5413, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.0003207111883907143, |
|
"loss": 2.509, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00031111404660392046, |
|
"loss": 2.5628, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.00030113147467559695, |
|
"loss": 2.5069, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.00029079809994790937, |
|
"loss": 2.5331, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.0002801497666206282, |
|
"loss": 2.5527, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.0002692234114154986, |
|
"loss": 2.6179, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.00025805693545089247, |
|
"loss": 2.5411, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.00024668907277118114, |
|
"loss": 2.5583, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.00023515925598687094, |
|
"loss": 2.534, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.00022350747949156756, |
|
"loss": 2.5433, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 0.0002117741607302378, |
|
"loss": 2.5487, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.0002, |
|
"loss": 3.1324, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 0.00018822583926976218, |
|
"loss": 2.4641, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 0.00017649252050843252, |
|
"loss": 2.4806, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 0.0001648407440131291, |
|
"loss": 2.4693, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 0.000153310927228819, |
|
"loss": 2.5011, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 0.00014194306454910757, |
|
"loss": 2.4595, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 0.00013077658858450138, |
|
"loss": 2.4893, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 0.00011985023337937184, |
|
"loss": 2.4932, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 0.00010920190005209065, |
|
"loss": 2.4871, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 9.886852532440312e-05, |
|
"loss": 2.4672, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 8.888595339607961e-05, |
|
"loss": 2.4597, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 7.928881160928572e-05, |
|
"loss": 2.4531, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 7.011039033396329e-05, |
|
"loss": 2.4749, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 6.138252749087286e-05, |
|
"loss": 2.4551, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 5.313549811286293e-05, |
|
"loss": 2.4796, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 4.53979093274526e-05, |
|
"loss": 2.4705, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 3.819660112501053e-05, |
|
"loss": 2.4915, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 3.1556553256742713e-05, |
|
"loss": 2.4958, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 2.5500798585440567e-05, |
|
"loss": 2.4814, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 2.0050343189555743e-05, |
|
"loss": 2.5034, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 1.5224093497742653e-05, |
|
"loss": 2.4671, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 1.1038790706624391e-05, |
|
"loss": 2.4987, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 7.508952709270567e-06, |
|
"loss": 2.4618, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 4.646823735825523e-06, |
|
"loss": 2.4815, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 2.462331880972468e-06, |
|
"loss": 2.4595, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 9.630546655606364e-07, |
|
"loss": 2.4925, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.5419275185541982e-07, |
|
"loss": 2.4333, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"step": 164, |
|
"total_flos": 4.620604962546647e+17, |
|
"train_loss": 2.525294606278582, |
|
"train_runtime": 28715.0205, |
|
"train_samples_per_second": 0.738, |
|
"train_steps_per_second": 0.006 |
|
} |
|
], |
|
"max_steps": 164, |
|
"num_train_epochs": 2, |
|
"total_flos": 4.620604962546647e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|