|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 3, |
|
"global_step": 141, |
|
"is_hyper_param_search": true, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.06382978723404255, |
|
"grad_norm": 134.82960510253906, |
|
"learning_rate": 3.4524225370179614e-06, |
|
"loss": 2.0515, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.06382978723404255, |
|
"eval_loss": 1.1583281755447388, |
|
"eval_runtime": 97.0528, |
|
"eval_samples_per_second": 3.091, |
|
"eval_steps_per_second": 0.103, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.1276595744680851, |
|
"grad_norm": 141.0943145751953, |
|
"learning_rate": 3.377369873169745e-06, |
|
"loss": 1.5004, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.1276595744680851, |
|
"eval_loss": 1.1162742376327515, |
|
"eval_runtime": 60.2202, |
|
"eval_samples_per_second": 4.982, |
|
"eval_steps_per_second": 0.166, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.19148936170212766, |
|
"grad_norm": 134.9931640625, |
|
"learning_rate": 3.3023172093215284e-06, |
|
"loss": 1.5627, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.19148936170212766, |
|
"eval_loss": 1.0802254676818848, |
|
"eval_runtime": 41.4969, |
|
"eval_samples_per_second": 7.229, |
|
"eval_steps_per_second": 0.241, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.2553191489361702, |
|
"grad_norm": 113.67478942871094, |
|
"learning_rate": 3.227264545473312e-06, |
|
"loss": 1.3625, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.2553191489361702, |
|
"eval_loss": 1.0606831312179565, |
|
"eval_runtime": 41.3217, |
|
"eval_samples_per_second": 7.26, |
|
"eval_steps_per_second": 0.242, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.3191489361702128, |
|
"grad_norm": 105.70118713378906, |
|
"learning_rate": 3.152211881625095e-06, |
|
"loss": 1.6648, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.3191489361702128, |
|
"eval_loss": 1.0375689268112183, |
|
"eval_runtime": 48.7659, |
|
"eval_samples_per_second": 6.152, |
|
"eval_steps_per_second": 0.205, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.3829787234042553, |
|
"grad_norm": 96.33596801757812, |
|
"learning_rate": 3.077159217776879e-06, |
|
"loss": 1.3305, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.3829787234042553, |
|
"eval_loss": 1.0117995738983154, |
|
"eval_runtime": 41.5605, |
|
"eval_samples_per_second": 7.218, |
|
"eval_steps_per_second": 0.241, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.44680851063829785, |
|
"grad_norm": 105.89424133300781, |
|
"learning_rate": 3.002106553928662e-06, |
|
"loss": 1.2195, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.44680851063829785, |
|
"eval_loss": 0.9878906011581421, |
|
"eval_runtime": 44.9672, |
|
"eval_samples_per_second": 6.672, |
|
"eval_steps_per_second": 0.222, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.5106382978723404, |
|
"grad_norm": 96.90633392333984, |
|
"learning_rate": 2.9270538900804454e-06, |
|
"loss": 1.4076, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.5106382978723404, |
|
"eval_loss": 0.9666847586631775, |
|
"eval_runtime": 52.2922, |
|
"eval_samples_per_second": 5.737, |
|
"eval_steps_per_second": 0.191, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.574468085106383, |
|
"grad_norm": 101.37423706054688, |
|
"learning_rate": 2.852001226232229e-06, |
|
"loss": 1.5353, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.574468085106383, |
|
"eval_loss": 0.9519509077072144, |
|
"eval_runtime": 41.7527, |
|
"eval_samples_per_second": 7.185, |
|
"eval_steps_per_second": 0.24, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.6382978723404256, |
|
"grad_norm": 92.97295379638672, |
|
"learning_rate": 2.7769485623840124e-06, |
|
"loss": 1.2954, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.6382978723404256, |
|
"eval_loss": 0.937315046787262, |
|
"eval_runtime": 41.2373, |
|
"eval_samples_per_second": 7.275, |
|
"eval_steps_per_second": 0.242, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.7021276595744681, |
|
"grad_norm": 110.28973388671875, |
|
"learning_rate": 2.701895898535796e-06, |
|
"loss": 1.489, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.7021276595744681, |
|
"eval_loss": 0.9187784790992737, |
|
"eval_runtime": 42.0237, |
|
"eval_samples_per_second": 7.139, |
|
"eval_steps_per_second": 0.238, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.7659574468085106, |
|
"grad_norm": 93.82958984375, |
|
"learning_rate": 2.6268432346875793e-06, |
|
"loss": 1.5987, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.7659574468085106, |
|
"eval_loss": 0.9012949466705322, |
|
"eval_runtime": 41.6757, |
|
"eval_samples_per_second": 7.198, |
|
"eval_steps_per_second": 0.24, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.8297872340425532, |
|
"grad_norm": 74.33155822753906, |
|
"learning_rate": 2.551790570839363e-06, |
|
"loss": 1.3207, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.8297872340425532, |
|
"eval_loss": 0.8868340849876404, |
|
"eval_runtime": 41.5644, |
|
"eval_samples_per_second": 7.218, |
|
"eval_steps_per_second": 0.241, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.8936170212765957, |
|
"grad_norm": 100.4666519165039, |
|
"learning_rate": 2.4767379069911463e-06, |
|
"loss": 1.3121, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.8936170212765957, |
|
"eval_loss": 0.8769957423210144, |
|
"eval_runtime": 51.3924, |
|
"eval_samples_per_second": 5.837, |
|
"eval_steps_per_second": 0.195, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.9574468085106383, |
|
"grad_norm": 78.03990173339844, |
|
"learning_rate": 2.4016852431429298e-06, |
|
"loss": 1.198, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.9574468085106383, |
|
"eval_loss": 0.8720031976699829, |
|
"eval_runtime": 41.262, |
|
"eval_samples_per_second": 7.271, |
|
"eval_steps_per_second": 0.242, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 1.0212765957446808, |
|
"grad_norm": 48.33544158935547, |
|
"learning_rate": 2.326632579294713e-06, |
|
"loss": 1.0786, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 1.0212765957446808, |
|
"eval_loss": 0.8655586838722229, |
|
"eval_runtime": 41.5498, |
|
"eval_samples_per_second": 7.22, |
|
"eval_steps_per_second": 0.241, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 1.0851063829787233, |
|
"grad_norm": 35.39493179321289, |
|
"learning_rate": 2.2515799154464967e-06, |
|
"loss": 0.7367, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 1.0851063829787233, |
|
"eval_loss": 0.8586752414703369, |
|
"eval_runtime": 47.8744, |
|
"eval_samples_per_second": 6.266, |
|
"eval_steps_per_second": 0.209, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 1.148936170212766, |
|
"grad_norm": 38.954620361328125, |
|
"learning_rate": 2.17652725159828e-06, |
|
"loss": 0.5906, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 1.148936170212766, |
|
"eval_loss": 0.854831874370575, |
|
"eval_runtime": 41.7553, |
|
"eval_samples_per_second": 7.185, |
|
"eval_steps_per_second": 0.239, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 1.2127659574468086, |
|
"grad_norm": 61.59217071533203, |
|
"learning_rate": 2.1014745877500633e-06, |
|
"loss": 0.794, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 1.2127659574468086, |
|
"eval_loss": 0.8518243432044983, |
|
"eval_runtime": 41.3107, |
|
"eval_samples_per_second": 7.262, |
|
"eval_steps_per_second": 0.242, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 1.2765957446808511, |
|
"grad_norm": 43.460899353027344, |
|
"learning_rate": 2.026421923901847e-06, |
|
"loss": 0.6699, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.2765957446808511, |
|
"eval_loss": 0.849520742893219, |
|
"eval_runtime": 41.3487, |
|
"eval_samples_per_second": 7.255, |
|
"eval_steps_per_second": 0.242, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.3404255319148937, |
|
"grad_norm": 40.51012420654297, |
|
"learning_rate": 1.9513692600536303e-06, |
|
"loss": 0.5351, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 1.3404255319148937, |
|
"eval_loss": 0.8519204258918762, |
|
"eval_runtime": 41.2821, |
|
"eval_samples_per_second": 7.267, |
|
"eval_steps_per_second": 0.242, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 1.4042553191489362, |
|
"grad_norm": 55.731903076171875, |
|
"learning_rate": 1.8763165962054137e-06, |
|
"loss": 0.6526, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 1.4042553191489362, |
|
"eval_loss": 0.85467129945755, |
|
"eval_runtime": 41.1797, |
|
"eval_samples_per_second": 7.285, |
|
"eval_steps_per_second": 0.243, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 1.4680851063829787, |
|
"grad_norm": 55.70009231567383, |
|
"learning_rate": 1.801263932357197e-06, |
|
"loss": 0.7753, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 1.4680851063829787, |
|
"eval_loss": 0.8556016087532043, |
|
"eval_runtime": 41.2215, |
|
"eval_samples_per_second": 7.278, |
|
"eval_steps_per_second": 0.243, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 1.5319148936170213, |
|
"grad_norm": 51.552974700927734, |
|
"learning_rate": 1.7262112685089807e-06, |
|
"loss": 0.6996, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 1.5319148936170213, |
|
"eval_loss": 0.8541069626808167, |
|
"eval_runtime": 41.8446, |
|
"eval_samples_per_second": 7.169, |
|
"eval_steps_per_second": 0.239, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 1.5957446808510638, |
|
"grad_norm": 61.95724105834961, |
|
"learning_rate": 1.6511586046607642e-06, |
|
"loss": 0.8078, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 1.5957446808510638, |
|
"eval_loss": 0.8522682785987854, |
|
"eval_runtime": 45.1234, |
|
"eval_samples_per_second": 6.648, |
|
"eval_steps_per_second": 0.222, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 1.6595744680851063, |
|
"grad_norm": 51.09059524536133, |
|
"learning_rate": 1.5761059408125475e-06, |
|
"loss": 0.6704, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 1.6595744680851063, |
|
"eval_loss": 0.8499072194099426, |
|
"eval_runtime": 41.6072, |
|
"eval_samples_per_second": 7.21, |
|
"eval_steps_per_second": 0.24, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 1.7234042553191489, |
|
"grad_norm": 57.01670455932617, |
|
"learning_rate": 1.501053276964331e-06, |
|
"loss": 0.5976, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 1.7234042553191489, |
|
"eval_loss": 0.8474313616752625, |
|
"eval_runtime": 41.405, |
|
"eval_samples_per_second": 7.246, |
|
"eval_steps_per_second": 0.242, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 1.7872340425531914, |
|
"grad_norm": 58.575130462646484, |
|
"learning_rate": 1.4260006131161144e-06, |
|
"loss": 0.6588, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 1.7872340425531914, |
|
"eval_loss": 0.8449164032936096, |
|
"eval_runtime": 41.6188, |
|
"eval_samples_per_second": 7.208, |
|
"eval_steps_per_second": 0.24, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 1.851063829787234, |
|
"grad_norm": 60.291622161865234, |
|
"learning_rate": 1.350947949267898e-06, |
|
"loss": 0.7081, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 1.851063829787234, |
|
"eval_loss": 0.8420330882072449, |
|
"eval_runtime": 41.9537, |
|
"eval_samples_per_second": 7.151, |
|
"eval_steps_per_second": 0.238, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 1.9148936170212765, |
|
"grad_norm": 50.23271942138672, |
|
"learning_rate": 1.2758952854196814e-06, |
|
"loss": 0.5632, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.9148936170212765, |
|
"eval_loss": 0.8388514518737793, |
|
"eval_runtime": 41.6015, |
|
"eval_samples_per_second": 7.211, |
|
"eval_steps_per_second": 0.24, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.978723404255319, |
|
"grad_norm": 52.8415412902832, |
|
"learning_rate": 1.2008426215714649e-06, |
|
"loss": 0.6249, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 1.978723404255319, |
|
"eval_loss": 0.8353903293609619, |
|
"eval_runtime": 41.6241, |
|
"eval_samples_per_second": 7.207, |
|
"eval_steps_per_second": 0.24, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 2.0425531914893615, |
|
"grad_norm": 55.50529098510742, |
|
"learning_rate": 1.1257899577232484e-06, |
|
"loss": 0.612, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 2.0425531914893615, |
|
"eval_loss": 0.8322966694831848, |
|
"eval_runtime": 41.6827, |
|
"eval_samples_per_second": 7.197, |
|
"eval_steps_per_second": 0.24, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 2.106382978723404, |
|
"grad_norm": 33.535972595214844, |
|
"learning_rate": 1.0507372938750316e-06, |
|
"loss": 0.4171, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 2.106382978723404, |
|
"eval_loss": 0.8299477100372314, |
|
"eval_runtime": 41.6454, |
|
"eval_samples_per_second": 7.204, |
|
"eval_steps_per_second": 0.24, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 2.1702127659574466, |
|
"grad_norm": 33.902915954589844, |
|
"learning_rate": 9.756846300268151e-07, |
|
"loss": 0.3473, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 2.1702127659574466, |
|
"eval_loss": 0.8301065564155579, |
|
"eval_runtime": 41.3799, |
|
"eval_samples_per_second": 7.25, |
|
"eval_steps_per_second": 0.242, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 2.2340425531914896, |
|
"grad_norm": 54.96790313720703, |
|
"learning_rate": 9.006319661785985e-07, |
|
"loss": 0.4751, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 2.2340425531914896, |
|
"eval_loss": 0.8314597606658936, |
|
"eval_runtime": 41.4767, |
|
"eval_samples_per_second": 7.233, |
|
"eval_steps_per_second": 0.241, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 2.297872340425532, |
|
"grad_norm": 43.61612319946289, |
|
"learning_rate": 8.255793023303821e-07, |
|
"loss": 0.4088, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 2.297872340425532, |
|
"eval_loss": 0.8334099054336548, |
|
"eval_runtime": 60.1786, |
|
"eval_samples_per_second": 4.985, |
|
"eval_steps_per_second": 0.166, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 2.3617021276595747, |
|
"grad_norm": 41.3808708190918, |
|
"learning_rate": 7.505266384821655e-07, |
|
"loss": 0.3174, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 2.3617021276595747, |
|
"eval_loss": 0.8363153338432312, |
|
"eval_runtime": 41.6562, |
|
"eval_samples_per_second": 7.202, |
|
"eval_steps_per_second": 0.24, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 2.425531914893617, |
|
"grad_norm": 39.414100646972656, |
|
"learning_rate": 6.75473974633949e-07, |
|
"loss": 0.3267, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 2.425531914893617, |
|
"eval_loss": 0.8386973142623901, |
|
"eval_runtime": 41.5441, |
|
"eval_samples_per_second": 7.221, |
|
"eval_steps_per_second": 0.241, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 2.4893617021276597, |
|
"grad_norm": 41.55443572998047, |
|
"learning_rate": 6.004213107857324e-07, |
|
"loss": 0.3142, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 2.4893617021276597, |
|
"eval_loss": 0.8412825465202332, |
|
"eval_runtime": 41.7129, |
|
"eval_samples_per_second": 7.192, |
|
"eval_steps_per_second": 0.24, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 2.5531914893617023, |
|
"grad_norm": 45.54193115234375, |
|
"learning_rate": 5.253686469375158e-07, |
|
"loss": 0.4253, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 2.5531914893617023, |
|
"eval_loss": 0.8432453274726868, |
|
"eval_runtime": 41.5872, |
|
"eval_samples_per_second": 7.214, |
|
"eval_steps_per_second": 0.24, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 2.617021276595745, |
|
"grad_norm": 58.93795394897461, |
|
"learning_rate": 4.5031598308929925e-07, |
|
"loss": 0.4269, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 2.617021276595745, |
|
"eval_loss": 0.8454075455665588, |
|
"eval_runtime": 41.7045, |
|
"eval_samples_per_second": 7.193, |
|
"eval_steps_per_second": 0.24, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 2.6808510638297873, |
|
"grad_norm": 30.54688835144043, |
|
"learning_rate": 3.7526331924108274e-07, |
|
"loss": 0.2765, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 2.6808510638297873, |
|
"eval_loss": 0.8469324707984924, |
|
"eval_runtime": 41.6321, |
|
"eval_samples_per_second": 7.206, |
|
"eval_steps_per_second": 0.24, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 2.74468085106383, |
|
"grad_norm": 46.656986236572266, |
|
"learning_rate": 3.002106553928662e-07, |
|
"loss": 0.3071, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 2.74468085106383, |
|
"eval_loss": 0.848118782043457, |
|
"eval_runtime": 41.3133, |
|
"eval_samples_per_second": 7.262, |
|
"eval_steps_per_second": 0.242, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 2.8085106382978724, |
|
"grad_norm": 41.179161071777344, |
|
"learning_rate": 2.2515799154464963e-07, |
|
"loss": 0.3277, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 2.8085106382978724, |
|
"eval_loss": 0.8486995100975037, |
|
"eval_runtime": 41.4208, |
|
"eval_samples_per_second": 7.243, |
|
"eval_steps_per_second": 0.241, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 2.872340425531915, |
|
"grad_norm": 35.64809036254883, |
|
"learning_rate": 1.501053276964331e-07, |
|
"loss": 0.3206, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 2.872340425531915, |
|
"eval_loss": 0.8487841486930847, |
|
"eval_runtime": 41.4922, |
|
"eval_samples_per_second": 7.23, |
|
"eval_steps_per_second": 0.241, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 2.9361702127659575, |
|
"grad_norm": 39.29479217529297, |
|
"learning_rate": 7.505266384821656e-08, |
|
"loss": 0.4095, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 2.9361702127659575, |
|
"eval_loss": 0.848578929901123, |
|
"eval_runtime": 41.4984, |
|
"eval_samples_per_second": 7.229, |
|
"eval_steps_per_second": 0.241, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 37.54413604736328, |
|
"learning_rate": 0.0, |
|
"loss": 0.2987, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 0.8485209941864014, |
|
"eval_runtime": 41.5472, |
|
"eval_samples_per_second": 7.221, |
|
"eval_steps_per_second": 0.241, |
|
"step": 141 |
|
} |
|
], |
|
"logging_steps": 3, |
|
"max_steps": 141, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 523328480700102.0, |
|
"train_batch_size": 64, |
|
"trial_name": null, |
|
"trial_params": { |
|
"_wandb": {}, |
|
"assignments": {}, |
|
"decay": 0.1, |
|
"learning_rate": 3.527475200866178e-06, |
|
"metric": "eval/loss", |
|
"per_device_train_batch_size": 64 |
|
} |
|
} |
|
|