|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.1092896174863388, |
|
"eval_steps": 9, |
|
"global_step": 100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.001092896174863388, |
|
"grad_norm": 1.52214777469635, |
|
"learning_rate": 1e-05, |
|
"loss": 5.7187, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.001092896174863388, |
|
"eval_loss": 5.797797203063965, |
|
"eval_runtime": 55.6063, |
|
"eval_samples_per_second": 55.407, |
|
"eval_steps_per_second": 1.744, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.002185792349726776, |
|
"grad_norm": 1.465293049812317, |
|
"learning_rate": 2e-05, |
|
"loss": 5.7381, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.003278688524590164, |
|
"grad_norm": 1.5233027935028076, |
|
"learning_rate": 3e-05, |
|
"loss": 5.7964, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.004371584699453552, |
|
"grad_norm": 1.3974599838256836, |
|
"learning_rate": 4e-05, |
|
"loss": 5.7245, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.00546448087431694, |
|
"grad_norm": 1.4633687734603882, |
|
"learning_rate": 5e-05, |
|
"loss": 5.683, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.006557377049180328, |
|
"grad_norm": 1.150214672088623, |
|
"learning_rate": 6e-05, |
|
"loss": 5.6083, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.007650273224043716, |
|
"grad_norm": 0.8514282703399658, |
|
"learning_rate": 7e-05, |
|
"loss": 5.4842, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.008743169398907104, |
|
"grad_norm": 0.7441701292991638, |
|
"learning_rate": 8e-05, |
|
"loss": 5.5433, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.009836065573770493, |
|
"grad_norm": 0.8355361819267273, |
|
"learning_rate": 9e-05, |
|
"loss": 5.4061, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.009836065573770493, |
|
"eval_loss": 5.372408390045166, |
|
"eval_runtime": 55.7059, |
|
"eval_samples_per_second": 55.308, |
|
"eval_steps_per_second": 1.741, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.01092896174863388, |
|
"grad_norm": 0.7220072746276855, |
|
"learning_rate": 0.0001, |
|
"loss": 5.2304, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.012021857923497269, |
|
"grad_norm": 0.9673782587051392, |
|
"learning_rate": 9.99695413509548e-05, |
|
"loss": 5.28, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.013114754098360656, |
|
"grad_norm": 0.8410780429840088, |
|
"learning_rate": 9.987820251299122e-05, |
|
"loss": 5.3055, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.014207650273224045, |
|
"grad_norm": 1.2083158493041992, |
|
"learning_rate": 9.972609476841367e-05, |
|
"loss": 5.2112, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.015300546448087432, |
|
"grad_norm": 0.7996488213539124, |
|
"learning_rate": 9.951340343707852e-05, |
|
"loss": 5.2837, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.01639344262295082, |
|
"grad_norm": 0.7078868746757507, |
|
"learning_rate": 9.924038765061042e-05, |
|
"loss": 5.1988, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.017486338797814208, |
|
"grad_norm": 1.7895820140838623, |
|
"learning_rate": 9.890738003669029e-05, |
|
"loss": 5.1302, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.018579234972677595, |
|
"grad_norm": 1.1687918901443481, |
|
"learning_rate": 9.851478631379982e-05, |
|
"loss": 5.2009, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.019672131147540985, |
|
"grad_norm": 0.8420019149780273, |
|
"learning_rate": 9.806308479691595e-05, |
|
"loss": 5.2331, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.019672131147540985, |
|
"eval_loss": 5.167764186859131, |
|
"eval_runtime": 55.7371, |
|
"eval_samples_per_second": 55.277, |
|
"eval_steps_per_second": 1.74, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.020765027322404372, |
|
"grad_norm": 1.4209792613983154, |
|
"learning_rate": 9.755282581475769e-05, |
|
"loss": 5.1434, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.02185792349726776, |
|
"grad_norm": 0.8728744387626648, |
|
"learning_rate": 9.698463103929542e-05, |
|
"loss": 5.1567, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.022950819672131147, |
|
"grad_norm": 1.877350926399231, |
|
"learning_rate": 9.635919272833938e-05, |
|
"loss": 5.1842, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.024043715846994537, |
|
"grad_norm": 0.6153156757354736, |
|
"learning_rate": 9.567727288213005e-05, |
|
"loss": 5.172, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.025136612021857924, |
|
"grad_norm": 0.973233163356781, |
|
"learning_rate": 9.493970231495835e-05, |
|
"loss": 5.1466, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.02622950819672131, |
|
"grad_norm": 0.9793280363082886, |
|
"learning_rate": 9.414737964294636e-05, |
|
"loss": 5.1597, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.0273224043715847, |
|
"grad_norm": 0.954116940498352, |
|
"learning_rate": 9.330127018922194e-05, |
|
"loss": 5.0359, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.02841530054644809, |
|
"grad_norm": 0.7521523237228394, |
|
"learning_rate": 9.24024048078213e-05, |
|
"loss": 5.1146, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.029508196721311476, |
|
"grad_norm": 0.5011205673217773, |
|
"learning_rate": 9.145187862775209e-05, |
|
"loss": 5.0219, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.029508196721311476, |
|
"eval_loss": 5.060585975646973, |
|
"eval_runtime": 55.7106, |
|
"eval_samples_per_second": 55.304, |
|
"eval_steps_per_second": 1.741, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.030601092896174863, |
|
"grad_norm": 0.7214334011077881, |
|
"learning_rate": 9.045084971874738e-05, |
|
"loss": 4.9877, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.03169398907103825, |
|
"grad_norm": 2.0999269485473633, |
|
"learning_rate": 8.940053768033609e-05, |
|
"loss": 5.0469, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.03278688524590164, |
|
"grad_norm": 0.9483531713485718, |
|
"learning_rate": 8.83022221559489e-05, |
|
"loss": 4.9721, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.033879781420765025, |
|
"grad_norm": 0.9655952453613281, |
|
"learning_rate": 8.715724127386972e-05, |
|
"loss": 5.1057, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.034972677595628415, |
|
"grad_norm": 0.6362683176994324, |
|
"learning_rate": 8.596699001693255e-05, |
|
"loss": 4.9924, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.036065573770491806, |
|
"grad_norm": 0.835968554019928, |
|
"learning_rate": 8.473291852294987e-05, |
|
"loss": 4.9999, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.03715846994535519, |
|
"grad_norm": 2.468137741088867, |
|
"learning_rate": 8.345653031794292e-05, |
|
"loss": 5.0356, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.03825136612021858, |
|
"grad_norm": 1.0865174531936646, |
|
"learning_rate": 8.213938048432697e-05, |
|
"loss": 4.9696, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.03934426229508197, |
|
"grad_norm": 1.0075640678405762, |
|
"learning_rate": 8.07830737662829e-05, |
|
"loss": 4.9541, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.03934426229508197, |
|
"eval_loss": 4.962901592254639, |
|
"eval_runtime": 55.6825, |
|
"eval_samples_per_second": 55.332, |
|
"eval_steps_per_second": 1.742, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.040437158469945354, |
|
"grad_norm": 1.3719457387924194, |
|
"learning_rate": 7.938926261462366e-05, |
|
"loss": 4.9748, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.041530054644808745, |
|
"grad_norm": 1.7131140232086182, |
|
"learning_rate": 7.795964517353735e-05, |
|
"loss": 4.9173, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.04262295081967213, |
|
"grad_norm": 0.6441985964775085, |
|
"learning_rate": 7.649596321166024e-05, |
|
"loss": 4.9594, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.04371584699453552, |
|
"grad_norm": 2.106320381164551, |
|
"learning_rate": 7.500000000000001e-05, |
|
"loss": 4.8943, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.04480874316939891, |
|
"grad_norm": 3.2894480228424072, |
|
"learning_rate": 7.347357813929454e-05, |
|
"loss": 4.9183, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.04590163934426229, |
|
"grad_norm": 1.4625245332717896, |
|
"learning_rate": 7.191855733945387e-05, |
|
"loss": 4.8716, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.046994535519125684, |
|
"grad_norm": 4.140956878662109, |
|
"learning_rate": 7.033683215379002e-05, |
|
"loss": 4.9576, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.048087431693989074, |
|
"grad_norm": 4.324865341186523, |
|
"learning_rate": 6.873032967079561e-05, |
|
"loss": 4.9355, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.04918032786885246, |
|
"grad_norm": 1.2742332220077515, |
|
"learning_rate": 6.710100716628344e-05, |
|
"loss": 4.9136, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.04918032786885246, |
|
"eval_loss": 4.958442211151123, |
|
"eval_runtime": 55.6763, |
|
"eval_samples_per_second": 55.338, |
|
"eval_steps_per_second": 1.742, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.05027322404371585, |
|
"grad_norm": 2.4827942848205566, |
|
"learning_rate": 6.545084971874738e-05, |
|
"loss": 4.9411, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.05136612021857923, |
|
"grad_norm": 2.5973329544067383, |
|
"learning_rate": 6.378186779084995e-05, |
|
"loss": 4.9952, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.05245901639344262, |
|
"grad_norm": 0.8299262523651123, |
|
"learning_rate": 6.209609477998338e-05, |
|
"loss": 4.9138, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.053551912568306013, |
|
"grad_norm": 4.389142990112305, |
|
"learning_rate": 6.0395584540887963e-05, |
|
"loss": 4.9635, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.0546448087431694, |
|
"grad_norm": 5.450596809387207, |
|
"learning_rate": 5.868240888334653e-05, |
|
"loss": 5.0935, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.05573770491803279, |
|
"grad_norm": 2.6530776023864746, |
|
"learning_rate": 5.695865504800327e-05, |
|
"loss": 4.9609, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.05683060109289618, |
|
"grad_norm": 1.504117727279663, |
|
"learning_rate": 5.522642316338268e-05, |
|
"loss": 4.8891, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.05792349726775956, |
|
"grad_norm": 1.8544734716415405, |
|
"learning_rate": 5.348782368720626e-05, |
|
"loss": 4.9346, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.05901639344262295, |
|
"grad_norm": 1.908761739730835, |
|
"learning_rate": 5.174497483512506e-05, |
|
"loss": 4.9979, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.05901639344262295, |
|
"eval_loss": 4.928157329559326, |
|
"eval_runtime": 55.6535, |
|
"eval_samples_per_second": 55.36, |
|
"eval_steps_per_second": 1.743, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.060109289617486336, |
|
"grad_norm": 0.9097221493721008, |
|
"learning_rate": 5e-05, |
|
"loss": 4.9558, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.06120218579234973, |
|
"grad_norm": 1.899079442024231, |
|
"learning_rate": 4.825502516487497e-05, |
|
"loss": 4.9715, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.06229508196721312, |
|
"grad_norm": 1.7221193313598633, |
|
"learning_rate": 4.6512176312793736e-05, |
|
"loss": 4.9064, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.0633879781420765, |
|
"grad_norm": 0.7375671863555908, |
|
"learning_rate": 4.477357683661734e-05, |
|
"loss": 4.8998, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.06448087431693988, |
|
"grad_norm": 1.5496197938919067, |
|
"learning_rate": 4.3041344951996746e-05, |
|
"loss": 4.9465, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.06557377049180328, |
|
"grad_norm": 1.029213309288025, |
|
"learning_rate": 4.131759111665349e-05, |
|
"loss": 4.9669, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.06666666666666667, |
|
"grad_norm": 0.6538008451461792, |
|
"learning_rate": 3.960441545911204e-05, |
|
"loss": 4.8936, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.06775956284153005, |
|
"grad_norm": 1.0834981203079224, |
|
"learning_rate": 3.790390522001662e-05, |
|
"loss": 4.9531, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.06885245901639345, |
|
"grad_norm": 0.691550612449646, |
|
"learning_rate": 3.6218132209150045e-05, |
|
"loss": 4.9323, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.06885245901639345, |
|
"eval_loss": 4.916704177856445, |
|
"eval_runtime": 55.705, |
|
"eval_samples_per_second": 55.309, |
|
"eval_steps_per_second": 1.741, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.06994535519125683, |
|
"grad_norm": 0.45348408818244934, |
|
"learning_rate": 3.4549150281252636e-05, |
|
"loss": 4.8709, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.07103825136612021, |
|
"grad_norm": 1.04542875289917, |
|
"learning_rate": 3.289899283371657e-05, |
|
"loss": 4.952, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.07213114754098361, |
|
"grad_norm": 0.697375476360321, |
|
"learning_rate": 3.12696703292044e-05, |
|
"loss": 4.9087, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.073224043715847, |
|
"grad_norm": 0.8204128742218018, |
|
"learning_rate": 2.9663167846209998e-05, |
|
"loss": 4.9664, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.07431693989071038, |
|
"grad_norm": 1.083094835281372, |
|
"learning_rate": 2.8081442660546125e-05, |
|
"loss": 4.9942, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.07540983606557378, |
|
"grad_norm": 0.5777742862701416, |
|
"learning_rate": 2.6526421860705473e-05, |
|
"loss": 4.9119, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.07650273224043716, |
|
"grad_norm": 0.9902992248535156, |
|
"learning_rate": 2.500000000000001e-05, |
|
"loss": 4.8564, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.07759562841530054, |
|
"grad_norm": 0.69707190990448, |
|
"learning_rate": 2.350403678833976e-05, |
|
"loss": 4.8472, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.07868852459016394, |
|
"grad_norm": 0.5401630997657776, |
|
"learning_rate": 2.2040354826462668e-05, |
|
"loss": 4.946, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.07868852459016394, |
|
"eval_loss": 4.9106550216674805, |
|
"eval_runtime": 55.7387, |
|
"eval_samples_per_second": 55.276, |
|
"eval_steps_per_second": 1.74, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.07978142076502732, |
|
"grad_norm": 0.41800597310066223, |
|
"learning_rate": 2.061073738537635e-05, |
|
"loss": 4.9306, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.08087431693989071, |
|
"grad_norm": 0.9597459435462952, |
|
"learning_rate": 1.9216926233717085e-05, |
|
"loss": 4.9114, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.08196721311475409, |
|
"grad_norm": 0.4304816722869873, |
|
"learning_rate": 1.7860619515673033e-05, |
|
"loss": 4.8847, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.08306010928961749, |
|
"grad_norm": 0.7232154011726379, |
|
"learning_rate": 1.6543469682057106e-05, |
|
"loss": 4.9066, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.08415300546448087, |
|
"grad_norm": 0.6170682907104492, |
|
"learning_rate": 1.526708147705013e-05, |
|
"loss": 4.923, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.08524590163934426, |
|
"grad_norm": 0.49551859498023987, |
|
"learning_rate": 1.4033009983067452e-05, |
|
"loss": 4.9564, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.08633879781420765, |
|
"grad_norm": 0.46370407938957214, |
|
"learning_rate": 1.2842758726130283e-05, |
|
"loss": 4.9055, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.08743169398907104, |
|
"grad_norm": 0.5205159783363342, |
|
"learning_rate": 1.1697777844051105e-05, |
|
"loss": 4.8709, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.08852459016393442, |
|
"grad_norm": 0.6012383699417114, |
|
"learning_rate": 1.0599462319663905e-05, |
|
"loss": 4.9006, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.08852459016393442, |
|
"eval_loss": 4.908712387084961, |
|
"eval_runtime": 55.644, |
|
"eval_samples_per_second": 55.37, |
|
"eval_steps_per_second": 1.743, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.08961748633879782, |
|
"grad_norm": 0.6838329434394836, |
|
"learning_rate": 9.549150281252633e-06, |
|
"loss": 5.003, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.0907103825136612, |
|
"grad_norm": 0.6821178793907166, |
|
"learning_rate": 8.548121372247918e-06, |
|
"loss": 4.9449, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.09180327868852459, |
|
"grad_norm": 0.5011351108551025, |
|
"learning_rate": 7.597595192178702e-06, |
|
"loss": 4.8949, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.09289617486338798, |
|
"grad_norm": 0.6648871898651123, |
|
"learning_rate": 6.698729810778065e-06, |
|
"loss": 4.8848, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.09398907103825137, |
|
"grad_norm": 0.5797678828239441, |
|
"learning_rate": 5.852620357053651e-06, |
|
"loss": 4.8899, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.09508196721311475, |
|
"grad_norm": 0.9921677112579346, |
|
"learning_rate": 5.060297685041659e-06, |
|
"loss": 4.984, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.09617486338797815, |
|
"grad_norm": 0.5813995599746704, |
|
"learning_rate": 4.322727117869951e-06, |
|
"loss": 4.8188, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.09726775956284153, |
|
"grad_norm": 0.5218381285667419, |
|
"learning_rate": 3.6408072716606346e-06, |
|
"loss": 4.8906, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.09836065573770492, |
|
"grad_norm": 0.8028129935264587, |
|
"learning_rate": 3.0153689607045845e-06, |
|
"loss": 4.8837, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.09836065573770492, |
|
"eval_loss": 4.907496929168701, |
|
"eval_runtime": 55.6919, |
|
"eval_samples_per_second": 55.322, |
|
"eval_steps_per_second": 1.742, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.0994535519125683, |
|
"grad_norm": 0.45788004994392395, |
|
"learning_rate": 2.4471741852423237e-06, |
|
"loss": 4.8955, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.1005464480874317, |
|
"grad_norm": 0.4592156708240509, |
|
"learning_rate": 1.9369152030840556e-06, |
|
"loss": 4.8768, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.10163934426229508, |
|
"grad_norm": 0.5255284309387207, |
|
"learning_rate": 1.4852136862001764e-06, |
|
"loss": 4.9354, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.10273224043715846, |
|
"grad_norm": 0.7076261043548584, |
|
"learning_rate": 1.0926199633097157e-06, |
|
"loss": 4.8474, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.10382513661202186, |
|
"grad_norm": 0.6252774000167847, |
|
"learning_rate": 7.596123493895991e-07, |
|
"loss": 4.8941, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.10491803278688525, |
|
"grad_norm": 0.45233267545700073, |
|
"learning_rate": 4.865965629214819e-07, |
|
"loss": 4.8658, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.10601092896174863, |
|
"grad_norm": 0.4194464683532715, |
|
"learning_rate": 2.7390523158633554e-07, |
|
"loss": 4.8726, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.10710382513661203, |
|
"grad_norm": 0.4344501495361328, |
|
"learning_rate": 1.2179748700879012e-07, |
|
"loss": 4.8811, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.10819672131147541, |
|
"grad_norm": 0.7105288505554199, |
|
"learning_rate": 3.04586490452119e-08, |
|
"loss": 4.962, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.10819672131147541, |
|
"eval_loss": 4.906777858734131, |
|
"eval_runtime": 55.621, |
|
"eval_samples_per_second": 55.393, |
|
"eval_steps_per_second": 1.744, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.1092896174863388, |
|
"grad_norm": 0.42179667949676514, |
|
"learning_rate": 0.0, |
|
"loss": 4.9524, |
|
"step": 100 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 100, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 25, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.34220225606058e+17, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|