|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.1662510390689942, |
|
"eval_steps": 9, |
|
"global_step": 100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0016625103906899418, |
|
"grad_norm": 0.04670165851712227, |
|
"learning_rate": 1e-05, |
|
"loss": 11.7674, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0016625103906899418, |
|
"eval_loss": 11.760279655456543, |
|
"eval_runtime": 10.8414, |
|
"eval_samples_per_second": 46.765, |
|
"eval_steps_per_second": 5.903, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0033250207813798837, |
|
"grad_norm": 0.04368332028388977, |
|
"learning_rate": 2e-05, |
|
"loss": 11.7606, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.004987531172069825, |
|
"grad_norm": 0.044506169855594635, |
|
"learning_rate": 3e-05, |
|
"loss": 11.7567, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.006650041562759767, |
|
"grad_norm": 0.0508490614593029, |
|
"learning_rate": 4e-05, |
|
"loss": 11.7595, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.00831255195344971, |
|
"grad_norm": 0.044127244502305984, |
|
"learning_rate": 5e-05, |
|
"loss": 11.7587, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.00997506234413965, |
|
"grad_norm": 0.05494147166609764, |
|
"learning_rate": 6e-05, |
|
"loss": 11.7636, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.011637572734829594, |
|
"grad_norm": 0.044881321489810944, |
|
"learning_rate": 7e-05, |
|
"loss": 11.7567, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.013300083125519535, |
|
"grad_norm": 0.05256935581564903, |
|
"learning_rate": 8e-05, |
|
"loss": 11.7558, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.014962593516209476, |
|
"grad_norm": 0.05547723546624184, |
|
"learning_rate": 9e-05, |
|
"loss": 11.7578, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.014962593516209476, |
|
"eval_loss": 11.760008811950684, |
|
"eval_runtime": 10.8532, |
|
"eval_samples_per_second": 46.714, |
|
"eval_steps_per_second": 5.897, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.01662510390689942, |
|
"grad_norm": 0.050355251878499985, |
|
"learning_rate": 0.0001, |
|
"loss": 11.7521, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01828761429758936, |
|
"grad_norm": 0.05273785442113876, |
|
"learning_rate": 9.99695413509548e-05, |
|
"loss": 11.7576, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.0199501246882793, |
|
"grad_norm": 0.047757700085639954, |
|
"learning_rate": 9.987820251299122e-05, |
|
"loss": 11.7637, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.021612635078969242, |
|
"grad_norm": 0.04381760209798813, |
|
"learning_rate": 9.972609476841367e-05, |
|
"loss": 11.7615, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.023275145469659187, |
|
"grad_norm": 0.05205358564853668, |
|
"learning_rate": 9.951340343707852e-05, |
|
"loss": 11.7624, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.02493765586034913, |
|
"grad_norm": 0.05228843539953232, |
|
"learning_rate": 9.924038765061042e-05, |
|
"loss": 11.7613, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.02660016625103907, |
|
"grad_norm": 0.04606500267982483, |
|
"learning_rate": 9.890738003669029e-05, |
|
"loss": 11.7586, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.02826267664172901, |
|
"grad_norm": 0.05653437227010727, |
|
"learning_rate": 9.851478631379982e-05, |
|
"loss": 11.7601, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.029925187032418952, |
|
"grad_norm": 0.049704670906066895, |
|
"learning_rate": 9.806308479691595e-05, |
|
"loss": 11.7634, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.029925187032418952, |
|
"eval_loss": 11.759297370910645, |
|
"eval_runtime": 10.8593, |
|
"eval_samples_per_second": 46.688, |
|
"eval_steps_per_second": 5.894, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.03158769742310889, |
|
"grad_norm": 0.06516430526971817, |
|
"learning_rate": 9.755282581475769e-05, |
|
"loss": 11.7569, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.03325020781379884, |
|
"grad_norm": 0.05210546776652336, |
|
"learning_rate": 9.698463103929542e-05, |
|
"loss": 11.7583, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.034912718204488775, |
|
"grad_norm": 0.05196945369243622, |
|
"learning_rate": 9.635919272833938e-05, |
|
"loss": 11.7641, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.03657522859517872, |
|
"grad_norm": 0.052862655371427536, |
|
"learning_rate": 9.567727288213005e-05, |
|
"loss": 11.7536, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.038237738985868665, |
|
"grad_norm": 0.05532236397266388, |
|
"learning_rate": 9.493970231495835e-05, |
|
"loss": 11.756, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.0399002493765586, |
|
"grad_norm": 0.05812180042266846, |
|
"learning_rate": 9.414737964294636e-05, |
|
"loss": 11.7572, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.04156275976724855, |
|
"grad_norm": 0.06880798190832138, |
|
"learning_rate": 9.330127018922194e-05, |
|
"loss": 11.7558, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.043225270157938485, |
|
"grad_norm": 0.06178494170308113, |
|
"learning_rate": 9.24024048078213e-05, |
|
"loss": 11.7574, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.04488778054862843, |
|
"grad_norm": 0.055603306740522385, |
|
"learning_rate": 9.145187862775209e-05, |
|
"loss": 11.7611, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.04488778054862843, |
|
"eval_loss": 11.758519172668457, |
|
"eval_runtime": 10.6894, |
|
"eval_samples_per_second": 47.43, |
|
"eval_steps_per_second": 5.987, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.046550290939318374, |
|
"grad_norm": 0.047771960496902466, |
|
"learning_rate": 9.045084971874738e-05, |
|
"loss": 11.7565, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.04821280133000831, |
|
"grad_norm": 0.061024993658065796, |
|
"learning_rate": 8.940053768033609e-05, |
|
"loss": 11.7541, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.04987531172069826, |
|
"grad_norm": 0.06483989953994751, |
|
"learning_rate": 8.83022221559489e-05, |
|
"loss": 11.7603, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.051537822111388194, |
|
"grad_norm": 0.05010339245200157, |
|
"learning_rate": 8.715724127386972e-05, |
|
"loss": 11.7539, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.05320033250207814, |
|
"grad_norm": 0.04719735309481621, |
|
"learning_rate": 8.596699001693255e-05, |
|
"loss": 11.7539, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.05486284289276808, |
|
"grad_norm": 0.05507144331932068, |
|
"learning_rate": 8.473291852294987e-05, |
|
"loss": 11.7593, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.05652535328345802, |
|
"grad_norm": 0.0661148950457573, |
|
"learning_rate": 8.345653031794292e-05, |
|
"loss": 11.7532, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.058187863674147966, |
|
"grad_norm": 0.08241010457277298, |
|
"learning_rate": 8.213938048432697e-05, |
|
"loss": 11.756, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.059850374064837904, |
|
"grad_norm": 0.06910108774900436, |
|
"learning_rate": 8.07830737662829e-05, |
|
"loss": 11.7557, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.059850374064837904, |
|
"eval_loss": 11.757696151733398, |
|
"eval_runtime": 10.6836, |
|
"eval_samples_per_second": 47.456, |
|
"eval_steps_per_second": 5.99, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.06151288445552785, |
|
"grad_norm": 0.058035943657159805, |
|
"learning_rate": 7.938926261462366e-05, |
|
"loss": 11.7563, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.06317539484621779, |
|
"grad_norm": 0.06133880466222763, |
|
"learning_rate": 7.795964517353735e-05, |
|
"loss": 11.757, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.06483790523690773, |
|
"grad_norm": 0.05848357081413269, |
|
"learning_rate": 7.649596321166024e-05, |
|
"loss": 11.752, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.06650041562759768, |
|
"grad_norm": 0.05439987778663635, |
|
"learning_rate": 7.500000000000001e-05, |
|
"loss": 11.7565, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.06816292601828762, |
|
"grad_norm": 0.06417480856180191, |
|
"learning_rate": 7.347357813929454e-05, |
|
"loss": 11.7488, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.06982543640897755, |
|
"grad_norm": 0.061617761850357056, |
|
"learning_rate": 7.191855733945387e-05, |
|
"loss": 11.7599, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.0714879467996675, |
|
"grad_norm": 0.051751330494880676, |
|
"learning_rate": 7.033683215379002e-05, |
|
"loss": 11.7565, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.07315045719035744, |
|
"grad_norm": 0.0564432330429554, |
|
"learning_rate": 6.873032967079561e-05, |
|
"loss": 11.7629, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.07481296758104738, |
|
"grad_norm": 0.05816029757261276, |
|
"learning_rate": 6.710100716628344e-05, |
|
"loss": 11.7613, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.07481296758104738, |
|
"eval_loss": 11.756857872009277, |
|
"eval_runtime": 10.9655, |
|
"eval_samples_per_second": 46.236, |
|
"eval_steps_per_second": 5.836, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.07647547797173733, |
|
"grad_norm": 0.08202195912599564, |
|
"learning_rate": 6.545084971874738e-05, |
|
"loss": 11.7502, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.07813798836242726, |
|
"grad_norm": 0.06265095621347427, |
|
"learning_rate": 6.378186779084995e-05, |
|
"loss": 11.7583, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.0798004987531172, |
|
"grad_norm": 0.09170009940862656, |
|
"learning_rate": 6.209609477998338e-05, |
|
"loss": 11.7566, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.08146300914380715, |
|
"grad_norm": 0.06977274268865585, |
|
"learning_rate": 6.0395584540887963e-05, |
|
"loss": 11.7544, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.0831255195344971, |
|
"grad_norm": 0.060967620462179184, |
|
"learning_rate": 5.868240888334653e-05, |
|
"loss": 11.754, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.08478802992518704, |
|
"grad_norm": 0.0709136351943016, |
|
"learning_rate": 5.695865504800327e-05, |
|
"loss": 11.756, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.08645054031587697, |
|
"grad_norm": 0.05604271590709686, |
|
"learning_rate": 5.522642316338268e-05, |
|
"loss": 11.7583, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.08811305070656691, |
|
"grad_norm": 0.06510225683450699, |
|
"learning_rate": 5.348782368720626e-05, |
|
"loss": 11.7568, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.08977556109725686, |
|
"grad_norm": 0.06891396641731262, |
|
"learning_rate": 5.174497483512506e-05, |
|
"loss": 11.7463, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.08977556109725686, |
|
"eval_loss": 11.756059646606445, |
|
"eval_runtime": 10.8526, |
|
"eval_samples_per_second": 46.717, |
|
"eval_steps_per_second": 5.897, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.0914380714879468, |
|
"grad_norm": 0.05632815137505531, |
|
"learning_rate": 5e-05, |
|
"loss": 11.7531, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.09310058187863675, |
|
"grad_norm": 0.0708206370472908, |
|
"learning_rate": 4.825502516487497e-05, |
|
"loss": 11.7558, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.09476309226932668, |
|
"grad_norm": 0.08333998173475266, |
|
"learning_rate": 4.6512176312793736e-05, |
|
"loss": 11.7516, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.09642560266001662, |
|
"grad_norm": 0.06372737884521484, |
|
"learning_rate": 4.477357683661734e-05, |
|
"loss": 11.7488, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.09808811305070657, |
|
"grad_norm": 0.05615927651524544, |
|
"learning_rate": 4.3041344951996746e-05, |
|
"loss": 11.7482, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.09975062344139651, |
|
"grad_norm": 0.0624222457408905, |
|
"learning_rate": 4.131759111665349e-05, |
|
"loss": 11.7565, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.10141313383208644, |
|
"grad_norm": 0.06704821437597275, |
|
"learning_rate": 3.960441545911204e-05, |
|
"loss": 11.7547, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.10307564422277639, |
|
"grad_norm": 0.09131279587745667, |
|
"learning_rate": 3.790390522001662e-05, |
|
"loss": 11.7494, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.10473815461346633, |
|
"grad_norm": 0.06280036270618439, |
|
"learning_rate": 3.6218132209150045e-05, |
|
"loss": 11.7585, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.10473815461346633, |
|
"eval_loss": 11.755424499511719, |
|
"eval_runtime": 10.8532, |
|
"eval_samples_per_second": 46.714, |
|
"eval_steps_per_second": 5.897, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.10640066500415628, |
|
"grad_norm": 0.07713477313518524, |
|
"learning_rate": 3.4549150281252636e-05, |
|
"loss": 11.7581, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.10806317539484622, |
|
"grad_norm": 0.080855593085289, |
|
"learning_rate": 3.289899283371657e-05, |
|
"loss": 11.7548, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.10972568578553615, |
|
"grad_norm": 0.06689275801181793, |
|
"learning_rate": 3.12696703292044e-05, |
|
"loss": 11.7557, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.1113881961762261, |
|
"grad_norm": 0.0768292099237442, |
|
"learning_rate": 2.9663167846209998e-05, |
|
"loss": 11.7577, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.11305070656691604, |
|
"grad_norm": 0.052165545523166656, |
|
"learning_rate": 2.8081442660546125e-05, |
|
"loss": 11.7578, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.11471321695760599, |
|
"grad_norm": 0.07575516402721405, |
|
"learning_rate": 2.6526421860705473e-05, |
|
"loss": 11.7549, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.11637572734829593, |
|
"grad_norm": 0.05817292258143425, |
|
"learning_rate": 2.500000000000001e-05, |
|
"loss": 11.7565, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.11803823773898586, |
|
"grad_norm": 0.07991883158683777, |
|
"learning_rate": 2.350403678833976e-05, |
|
"loss": 11.7516, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.11970074812967581, |
|
"grad_norm": 0.06573547422885895, |
|
"learning_rate": 2.2040354826462668e-05, |
|
"loss": 11.7566, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.11970074812967581, |
|
"eval_loss": 11.754947662353516, |
|
"eval_runtime": 10.8431, |
|
"eval_samples_per_second": 46.758, |
|
"eval_steps_per_second": 5.902, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.12136325852036575, |
|
"grad_norm": 0.07040063291788101, |
|
"learning_rate": 2.061073738537635e-05, |
|
"loss": 11.7524, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.1230257689110557, |
|
"grad_norm": 0.05799221619963646, |
|
"learning_rate": 1.9216926233717085e-05, |
|
"loss": 11.7536, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.12468827930174564, |
|
"grad_norm": 0.06470559537410736, |
|
"learning_rate": 1.7860619515673033e-05, |
|
"loss": 11.7533, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.12635078969243557, |
|
"grad_norm": 0.0871320515871048, |
|
"learning_rate": 1.6543469682057106e-05, |
|
"loss": 11.759, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.12801330008312553, |
|
"grad_norm": 0.09251094609498978, |
|
"learning_rate": 1.526708147705013e-05, |
|
"loss": 11.7559, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.12967581047381546, |
|
"grad_norm": 0.08235142379999161, |
|
"learning_rate": 1.4033009983067452e-05, |
|
"loss": 11.7498, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.1313383208645054, |
|
"grad_norm": 0.08003734797239304, |
|
"learning_rate": 1.2842758726130283e-05, |
|
"loss": 11.7497, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.13300083125519535, |
|
"grad_norm": 0.06140708178281784, |
|
"learning_rate": 1.1697777844051105e-05, |
|
"loss": 11.7542, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.13466334164588528, |
|
"grad_norm": 0.06575233489274979, |
|
"learning_rate": 1.0599462319663905e-05, |
|
"loss": 11.7592, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.13466334164588528, |
|
"eval_loss": 11.754680633544922, |
|
"eval_runtime": 10.852, |
|
"eval_samples_per_second": 46.719, |
|
"eval_steps_per_second": 5.898, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.13632585203657524, |
|
"grad_norm": 0.06072350591421127, |
|
"learning_rate": 9.549150281252633e-06, |
|
"loss": 11.7557, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.13798836242726517, |
|
"grad_norm": 0.07811221480369568, |
|
"learning_rate": 8.548121372247918e-06, |
|
"loss": 11.7565, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.1396508728179551, |
|
"grad_norm": 0.09204588085412979, |
|
"learning_rate": 7.597595192178702e-06, |
|
"loss": 11.7634, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.14131338320864506, |
|
"grad_norm": 0.07153452932834625, |
|
"learning_rate": 6.698729810778065e-06, |
|
"loss": 11.7585, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.142975893599335, |
|
"grad_norm": 0.08605170249938965, |
|
"learning_rate": 5.852620357053651e-06, |
|
"loss": 11.7568, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.14463840399002495, |
|
"grad_norm": 0.08406588435173035, |
|
"learning_rate": 5.060297685041659e-06, |
|
"loss": 11.7558, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.14630091438071488, |
|
"grad_norm": 0.07880561798810959, |
|
"learning_rate": 4.322727117869951e-06, |
|
"loss": 11.7556, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.1479634247714048, |
|
"grad_norm": 0.05538419634103775, |
|
"learning_rate": 3.6408072716606346e-06, |
|
"loss": 11.7538, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.14962593516209477, |
|
"grad_norm": 0.07911358028650284, |
|
"learning_rate": 3.0153689607045845e-06, |
|
"loss": 11.7514, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.14962593516209477, |
|
"eval_loss": 11.754559516906738, |
|
"eval_runtime": 10.6897, |
|
"eval_samples_per_second": 47.429, |
|
"eval_steps_per_second": 5.987, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.1512884455527847, |
|
"grad_norm": 0.07232284545898438, |
|
"learning_rate": 2.4471741852423237e-06, |
|
"loss": 11.7607, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.15295095594347466, |
|
"grad_norm": 0.09192491322755814, |
|
"learning_rate": 1.9369152030840556e-06, |
|
"loss": 11.7541, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.1546134663341646, |
|
"grad_norm": 0.07534570246934891, |
|
"learning_rate": 1.4852136862001764e-06, |
|
"loss": 11.7526, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.15627597672485452, |
|
"grad_norm": 0.09265202283859253, |
|
"learning_rate": 1.0926199633097157e-06, |
|
"loss": 11.757, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.15793848711554448, |
|
"grad_norm": 0.10842297971248627, |
|
"learning_rate": 7.596123493895991e-07, |
|
"loss": 11.7572, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.1596009975062344, |
|
"grad_norm": 0.06330010294914246, |
|
"learning_rate": 4.865965629214819e-07, |
|
"loss": 11.7508, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.16126350789692437, |
|
"grad_norm": 0.08400006592273712, |
|
"learning_rate": 2.7390523158633554e-07, |
|
"loss": 11.7589, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.1629260182876143, |
|
"grad_norm": 0.08327385783195496, |
|
"learning_rate": 1.2179748700879012e-07, |
|
"loss": 11.7501, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.16458852867830423, |
|
"grad_norm": 0.08100760728120804, |
|
"learning_rate": 3.04586490452119e-08, |
|
"loss": 11.7602, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.16458852867830423, |
|
"eval_loss": 11.754537582397461, |
|
"eval_runtime": 10.6869, |
|
"eval_samples_per_second": 47.441, |
|
"eval_steps_per_second": 5.989, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.1662510390689942, |
|
"grad_norm": 0.07855262607336044, |
|
"learning_rate": 0.0, |
|
"loss": 11.7542, |
|
"step": 100 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 100, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 25, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 20370004377600.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|