|
{ |
|
"best_metric": 3.3320932388305664, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-200", |
|
"epoch": 0.37261294829995345, |
|
"eval_steps": 50, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0018630647414997672, |
|
"grad_norm": 1.2902519702911377, |
|
"learning_rate": 5e-06, |
|
"loss": 3.6391, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0018630647414997672, |
|
"eval_loss": 5.516392230987549, |
|
"eval_runtime": 32.1445, |
|
"eval_samples_per_second": 28.154, |
|
"eval_steps_per_second": 14.093, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0037261294829995344, |
|
"grad_norm": 1.502539873123169, |
|
"learning_rate": 1e-05, |
|
"loss": 4.285, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0055891942244993015, |
|
"grad_norm": 1.6728174686431885, |
|
"learning_rate": 1.5e-05, |
|
"loss": 4.3833, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.007452258965999069, |
|
"grad_norm": 1.809103012084961, |
|
"learning_rate": 2e-05, |
|
"loss": 4.5885, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.009315323707498836, |
|
"grad_norm": 1.8019917011260986, |
|
"learning_rate": 2.5e-05, |
|
"loss": 4.6271, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.011178388448998603, |
|
"grad_norm": 1.8416504859924316, |
|
"learning_rate": 3e-05, |
|
"loss": 4.6321, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.01304145319049837, |
|
"grad_norm": 1.9281914234161377, |
|
"learning_rate": 3.5e-05, |
|
"loss": 4.3472, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.014904517931998137, |
|
"grad_norm": 2.1244757175445557, |
|
"learning_rate": 4e-05, |
|
"loss": 4.8937, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.016767582673497903, |
|
"grad_norm": 1.9028316736221313, |
|
"learning_rate": 4.5e-05, |
|
"loss": 5.0692, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.018630647414997672, |
|
"grad_norm": 2.2906155586242676, |
|
"learning_rate": 5e-05, |
|
"loss": 4.3372, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.020493712156497437, |
|
"grad_norm": 2.0029475688934326, |
|
"learning_rate": 5.500000000000001e-05, |
|
"loss": 4.4519, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.022356776897997206, |
|
"grad_norm": 1.9513241052627563, |
|
"learning_rate": 6e-05, |
|
"loss": 4.3201, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.02421984163949697, |
|
"grad_norm": 1.7343864440917969, |
|
"learning_rate": 6.500000000000001e-05, |
|
"loss": 4.0463, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.02608290638099674, |
|
"grad_norm": 1.9337594509124756, |
|
"learning_rate": 7e-05, |
|
"loss": 4.0561, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.027945971122496506, |
|
"grad_norm": 1.6187396049499512, |
|
"learning_rate": 7.500000000000001e-05, |
|
"loss": 3.5715, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.029809035863996275, |
|
"grad_norm": 1.5559985637664795, |
|
"learning_rate": 8e-05, |
|
"loss": 4.0944, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.031672100605496044, |
|
"grad_norm": 1.6261332035064697, |
|
"learning_rate": 8.5e-05, |
|
"loss": 3.9513, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.033535165346995806, |
|
"grad_norm": 1.8501555919647217, |
|
"learning_rate": 9e-05, |
|
"loss": 3.9689, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.035398230088495575, |
|
"grad_norm": 2.036672592163086, |
|
"learning_rate": 9.5e-05, |
|
"loss": 3.9789, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.037261294829995344, |
|
"grad_norm": 2.047884702682495, |
|
"learning_rate": 0.0001, |
|
"loss": 3.663, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.03912435957149511, |
|
"grad_norm": 1.90353262424469, |
|
"learning_rate": 9.999238475781957e-05, |
|
"loss": 3.5892, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.040987424312994875, |
|
"grad_norm": 1.863972783088684, |
|
"learning_rate": 9.99695413509548e-05, |
|
"loss": 3.9369, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.04285048905449464, |
|
"grad_norm": 1.4511252641677856, |
|
"learning_rate": 9.99314767377287e-05, |
|
"loss": 3.1318, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.04471355379599441, |
|
"grad_norm": 1.4355757236480713, |
|
"learning_rate": 9.987820251299122e-05, |
|
"loss": 3.983, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.04657661853749418, |
|
"grad_norm": 1.4540237188339233, |
|
"learning_rate": 9.980973490458728e-05, |
|
"loss": 4.1114, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.04843968327899394, |
|
"grad_norm": 1.5578112602233887, |
|
"learning_rate": 9.972609476841367e-05, |
|
"loss": 3.8437, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.05030274802049371, |
|
"grad_norm": 1.8616338968276978, |
|
"learning_rate": 9.962730758206611e-05, |
|
"loss": 3.516, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.05216581276199348, |
|
"grad_norm": 1.6210098266601562, |
|
"learning_rate": 9.951340343707852e-05, |
|
"loss": 3.3105, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.05402887750349324, |
|
"grad_norm": 1.6511797904968262, |
|
"learning_rate": 9.938441702975689e-05, |
|
"loss": 3.9478, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.05589194224499301, |
|
"grad_norm": 1.6636693477630615, |
|
"learning_rate": 9.924038765061042e-05, |
|
"loss": 3.6779, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.05775500698649278, |
|
"grad_norm": 1.4905117750167847, |
|
"learning_rate": 9.908135917238321e-05, |
|
"loss": 3.2574, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.05961807172799255, |
|
"grad_norm": 1.6063334941864014, |
|
"learning_rate": 9.890738003669029e-05, |
|
"loss": 3.9851, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.06148113646949231, |
|
"grad_norm": 1.7283437252044678, |
|
"learning_rate": 9.871850323926177e-05, |
|
"loss": 3.705, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.06334420121099209, |
|
"grad_norm": 1.5343276262283325, |
|
"learning_rate": 9.851478631379982e-05, |
|
"loss": 3.5288, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.06520726595249185, |
|
"grad_norm": 1.7014079093933105, |
|
"learning_rate": 9.829629131445342e-05, |
|
"loss": 3.8288, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.06707033069399161, |
|
"grad_norm": 1.7944095134735107, |
|
"learning_rate": 9.806308479691595e-05, |
|
"loss": 3.8894, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.06893339543549139, |
|
"grad_norm": 1.5314217805862427, |
|
"learning_rate": 9.781523779815179e-05, |
|
"loss": 3.2699, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.07079646017699115, |
|
"grad_norm": 1.6168136596679688, |
|
"learning_rate": 9.755282581475769e-05, |
|
"loss": 3.8024, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.07265952491849091, |
|
"grad_norm": 1.3477504253387451, |
|
"learning_rate": 9.727592877996585e-05, |
|
"loss": 3.0357, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.07452258965999069, |
|
"grad_norm": 2.0698187351226807, |
|
"learning_rate": 9.698463103929542e-05, |
|
"loss": 3.7317, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.07638565440149045, |
|
"grad_norm": 1.6671048402786255, |
|
"learning_rate": 9.667902132486009e-05, |
|
"loss": 3.4916, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.07824871914299023, |
|
"grad_norm": 1.8396328687667847, |
|
"learning_rate": 9.635919272833938e-05, |
|
"loss": 3.5342, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.08011178388448999, |
|
"grad_norm": 1.5536775588989258, |
|
"learning_rate": 9.602524267262203e-05, |
|
"loss": 3.8207, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.08197484862598975, |
|
"grad_norm": 1.5929099321365356, |
|
"learning_rate": 9.567727288213005e-05, |
|
"loss": 3.8526, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.08383791336748952, |
|
"grad_norm": 2.140622854232788, |
|
"learning_rate": 9.53153893518325e-05, |
|
"loss": 3.9536, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.08570097810898929, |
|
"grad_norm": 2.204415798187256, |
|
"learning_rate": 9.493970231495835e-05, |
|
"loss": 3.8141, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.08756404285048905, |
|
"grad_norm": 2.16359806060791, |
|
"learning_rate": 9.45503262094184e-05, |
|
"loss": 4.0383, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.08942710759198882, |
|
"grad_norm": 2.2646803855895996, |
|
"learning_rate": 9.414737964294636e-05, |
|
"loss": 3.9917, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.09129017233348859, |
|
"grad_norm": 2.4855337142944336, |
|
"learning_rate": 9.373098535696979e-05, |
|
"loss": 3.8354, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.09315323707498836, |
|
"grad_norm": 2.349766254425049, |
|
"learning_rate": 9.330127018922194e-05, |
|
"loss": 3.9016, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.09315323707498836, |
|
"eval_loss": 3.6060919761657715, |
|
"eval_runtime": 32.1835, |
|
"eval_samples_per_second": 28.12, |
|
"eval_steps_per_second": 14.076, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.09501630181648812, |
|
"grad_norm": 2.1820530891418457, |
|
"learning_rate": 9.285836503510562e-05, |
|
"loss": 2.9137, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.09687936655798789, |
|
"grad_norm": 2.1806118488311768, |
|
"learning_rate": 9.24024048078213e-05, |
|
"loss": 3.1755, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.09874243129948766, |
|
"grad_norm": 1.8996943235397339, |
|
"learning_rate": 9.193352839727121e-05, |
|
"loss": 3.3476, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.10060549604098742, |
|
"grad_norm": 1.8744741678237915, |
|
"learning_rate": 9.145187862775209e-05, |
|
"loss": 3.196, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.10246856078248719, |
|
"grad_norm": 1.70234215259552, |
|
"learning_rate": 9.09576022144496e-05, |
|
"loss": 3.3386, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.10433162552398696, |
|
"grad_norm": 1.7539249658584595, |
|
"learning_rate": 9.045084971874738e-05, |
|
"loss": 3.4391, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.10619469026548672, |
|
"grad_norm": 1.4793843030929565, |
|
"learning_rate": 8.993177550236464e-05, |
|
"loss": 3.9207, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.10805775500698649, |
|
"grad_norm": 1.1978991031646729, |
|
"learning_rate": 8.940053768033609e-05, |
|
"loss": 3.2176, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.10992081974848626, |
|
"grad_norm": 1.1151164770126343, |
|
"learning_rate": 8.885729807284856e-05, |
|
"loss": 3.0396, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.11178388448998602, |
|
"grad_norm": 1.0803781747817993, |
|
"learning_rate": 8.83022221559489e-05, |
|
"loss": 2.9356, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.1136469492314858, |
|
"grad_norm": 1.1919324398040771, |
|
"learning_rate": 8.773547901113862e-05, |
|
"loss": 3.0124, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.11551001397298556, |
|
"grad_norm": 1.0512971878051758, |
|
"learning_rate": 8.715724127386972e-05, |
|
"loss": 3.2844, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.11737307871448532, |
|
"grad_norm": 1.3854892253875732, |
|
"learning_rate": 8.656768508095853e-05, |
|
"loss": 3.3558, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.1192361434559851, |
|
"grad_norm": 1.2489628791809082, |
|
"learning_rate": 8.596699001693255e-05, |
|
"loss": 3.2082, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.12109920819748486, |
|
"grad_norm": 1.0986685752868652, |
|
"learning_rate": 8.535533905932738e-05, |
|
"loss": 3.0937, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.12296227293898462, |
|
"grad_norm": 1.2437379360198975, |
|
"learning_rate": 8.473291852294987e-05, |
|
"loss": 3.2174, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.1248253376804844, |
|
"grad_norm": 1.1477092504501343, |
|
"learning_rate": 8.409991800312493e-05, |
|
"loss": 3.2221, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.12668840242198418, |
|
"grad_norm": 1.4317594766616821, |
|
"learning_rate": 8.345653031794292e-05, |
|
"loss": 3.2315, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.12855146716348392, |
|
"grad_norm": 1.2513046264648438, |
|
"learning_rate": 8.280295144952536e-05, |
|
"loss": 3.4117, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.1304145319049837, |
|
"grad_norm": 1.2725088596343994, |
|
"learning_rate": 8.213938048432697e-05, |
|
"loss": 3.4154, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.13227759664648348, |
|
"grad_norm": 1.301317572593689, |
|
"learning_rate": 8.146601955249188e-05, |
|
"loss": 3.2894, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.13414066138798322, |
|
"grad_norm": 1.2711513042449951, |
|
"learning_rate": 8.07830737662829e-05, |
|
"loss": 3.352, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.136003726129483, |
|
"grad_norm": 1.2225326299667358, |
|
"learning_rate": 8.009075115760243e-05, |
|
"loss": 3.2548, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.13786679087098277, |
|
"grad_norm": 1.2163617610931396, |
|
"learning_rate": 7.938926261462366e-05, |
|
"loss": 3.5588, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.13972985561248252, |
|
"grad_norm": 2.666508436203003, |
|
"learning_rate": 7.86788218175523e-05, |
|
"loss": 3.3621, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.1415929203539823, |
|
"grad_norm": 1.1624727249145508, |
|
"learning_rate": 7.795964517353735e-05, |
|
"loss": 3.3753, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.14345598509548207, |
|
"grad_norm": 1.2207733392715454, |
|
"learning_rate": 7.723195175075136e-05, |
|
"loss": 3.0322, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.14531904983698182, |
|
"grad_norm": 1.153766393661499, |
|
"learning_rate": 7.649596321166024e-05, |
|
"loss": 3.3144, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.1471821145784816, |
|
"grad_norm": 1.0764729976654053, |
|
"learning_rate": 7.575190374550272e-05, |
|
"loss": 2.8701, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.14904517931998137, |
|
"grad_norm": 1.2744287252426147, |
|
"learning_rate": 7.500000000000001e-05, |
|
"loss": 3.5407, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.15090824406148112, |
|
"grad_norm": 1.4500547647476196, |
|
"learning_rate": 7.424048101231686e-05, |
|
"loss": 3.3808, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.1527713088029809, |
|
"grad_norm": 2.205737352371216, |
|
"learning_rate": 7.347357813929454e-05, |
|
"loss": 3.2512, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.15463437354448067, |
|
"grad_norm": 1.1358613967895508, |
|
"learning_rate": 7.269952498697734e-05, |
|
"loss": 3.4559, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.15649743828598045, |
|
"grad_norm": 1.6348944902420044, |
|
"learning_rate": 7.191855733945387e-05, |
|
"loss": 3.9512, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.1583605030274802, |
|
"grad_norm": 1.229777455329895, |
|
"learning_rate": 7.113091308703498e-05, |
|
"loss": 2.9064, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.16022356776897997, |
|
"grad_norm": 1.1657277345657349, |
|
"learning_rate": 7.033683215379002e-05, |
|
"loss": 3.0757, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.16208663251047975, |
|
"grad_norm": 1.4670532941818237, |
|
"learning_rate": 6.953655642446368e-05, |
|
"loss": 3.4218, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.1639496972519795, |
|
"grad_norm": 1.5080535411834717, |
|
"learning_rate": 6.873032967079561e-05, |
|
"loss": 3.3886, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.16581276199347927, |
|
"grad_norm": 1.4212788343429565, |
|
"learning_rate": 6.7918397477265e-05, |
|
"loss": 3.6124, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.16767582673497905, |
|
"grad_norm": 1.553883671760559, |
|
"learning_rate": 6.710100716628344e-05, |
|
"loss": 3.7264, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.1695388914764788, |
|
"grad_norm": 1.4848854541778564, |
|
"learning_rate": 6.627840772285784e-05, |
|
"loss": 3.1516, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.17140195621797857, |
|
"grad_norm": 1.2597936391830444, |
|
"learning_rate": 6.545084971874738e-05, |
|
"loss": 3.3816, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.17326502095947835, |
|
"grad_norm": 1.4741157293319702, |
|
"learning_rate": 6.461858523613684e-05, |
|
"loss": 3.4027, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.1751280857009781, |
|
"grad_norm": 1.5933159589767456, |
|
"learning_rate": 6.378186779084995e-05, |
|
"loss": 3.4609, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.17699115044247787, |
|
"grad_norm": 1.6153674125671387, |
|
"learning_rate": 6.294095225512603e-05, |
|
"loss": 3.7559, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.17885421518397765, |
|
"grad_norm": 1.5263915061950684, |
|
"learning_rate": 6.209609477998338e-05, |
|
"loss": 3.5021, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.1807172799254774, |
|
"grad_norm": 2.1421830654144287, |
|
"learning_rate": 6.124755271719325e-05, |
|
"loss": 4.2071, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.18258034466697717, |
|
"grad_norm": 1.6600545644760132, |
|
"learning_rate": 6.0395584540887963e-05, |
|
"loss": 3.4893, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.18444340940847695, |
|
"grad_norm": 2.4432666301727295, |
|
"learning_rate": 5.9540449768827246e-05, |
|
"loss": 3.889, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.18630647414997673, |
|
"grad_norm": 3.4110183715820312, |
|
"learning_rate": 5.868240888334653e-05, |
|
"loss": 4.2759, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.18630647414997673, |
|
"eval_loss": 3.3999626636505127, |
|
"eval_runtime": 32.1259, |
|
"eval_samples_per_second": 28.17, |
|
"eval_steps_per_second": 14.101, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.18816953889147647, |
|
"grad_norm": 1.373574137687683, |
|
"learning_rate": 5.782172325201155e-05, |
|
"loss": 3.0844, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.19003260363297625, |
|
"grad_norm": 1.2612931728363037, |
|
"learning_rate": 5.695865504800327e-05, |
|
"loss": 3.4433, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.19189566837447602, |
|
"grad_norm": 1.346120834350586, |
|
"learning_rate": 5.6093467170257374e-05, |
|
"loss": 3.3105, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.19375873311597577, |
|
"grad_norm": 1.3103049993515015, |
|
"learning_rate": 5.522642316338268e-05, |
|
"loss": 3.7383, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.19562179785747555, |
|
"grad_norm": 1.2798504829406738, |
|
"learning_rate": 5.435778713738292e-05, |
|
"loss": 3.0195, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.19748486259897532, |
|
"grad_norm": 1.2848749160766602, |
|
"learning_rate": 5.348782368720626e-05, |
|
"loss": 2.9208, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.19934792734047507, |
|
"grad_norm": 1.1921271085739136, |
|
"learning_rate": 5.26167978121472e-05, |
|
"loss": 3.238, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.20121099208197485, |
|
"grad_norm": 1.2079963684082031, |
|
"learning_rate": 5.174497483512506e-05, |
|
"loss": 3.2653, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.20307405682347462, |
|
"grad_norm": 1.150623083114624, |
|
"learning_rate": 5.0872620321864185e-05, |
|
"loss": 3.3047, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.20493712156497437, |
|
"grad_norm": 1.12302565574646, |
|
"learning_rate": 5e-05, |
|
"loss": 2.9365, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.20680018630647415, |
|
"grad_norm": 0.8901563882827759, |
|
"learning_rate": 4.912737967813583e-05, |
|
"loss": 2.5866, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.20866325104797392, |
|
"grad_norm": 1.5094679594039917, |
|
"learning_rate": 4.825502516487497e-05, |
|
"loss": 3.5975, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.21052631578947367, |
|
"grad_norm": 1.1921519041061401, |
|
"learning_rate": 4.738320218785281e-05, |
|
"loss": 3.0366, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.21238938053097345, |
|
"grad_norm": 1.2302180528640747, |
|
"learning_rate": 4.6512176312793736e-05, |
|
"loss": 3.1477, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.21425244527247322, |
|
"grad_norm": 0.9722185730934143, |
|
"learning_rate": 4.564221286261709e-05, |
|
"loss": 2.9138, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.21611551001397297, |
|
"grad_norm": 1.2122098207473755, |
|
"learning_rate": 4.477357683661734e-05, |
|
"loss": 3.3952, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.21797857475547275, |
|
"grad_norm": 1.0260534286499023, |
|
"learning_rate": 4.390653282974264e-05, |
|
"loss": 3.0228, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.21984163949697252, |
|
"grad_norm": 1.134804606437683, |
|
"learning_rate": 4.3041344951996746e-05, |
|
"loss": 3.2426, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.2217047042384723, |
|
"grad_norm": 1.2314471006393433, |
|
"learning_rate": 4.2178276747988446e-05, |
|
"loss": 3.1156, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.22356776897997205, |
|
"grad_norm": 1.9310802221298218, |
|
"learning_rate": 4.131759111665349e-05, |
|
"loss": 3.2605, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.22543083372147182, |
|
"grad_norm": 1.244099497795105, |
|
"learning_rate": 4.045955023117276e-05, |
|
"loss": 3.3355, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.2272938984629716, |
|
"grad_norm": 1.1614549160003662, |
|
"learning_rate": 3.960441545911204e-05, |
|
"loss": 2.9461, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.22915696320447135, |
|
"grad_norm": 1.1840636730194092, |
|
"learning_rate": 3.875244728280676e-05, |
|
"loss": 3.1649, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.23102002794597112, |
|
"grad_norm": 1.1917698383331299, |
|
"learning_rate": 3.790390522001662e-05, |
|
"loss": 3.2872, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.2328830926874709, |
|
"grad_norm": 1.3176980018615723, |
|
"learning_rate": 3.705904774487396e-05, |
|
"loss": 3.4799, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.23474615742897065, |
|
"grad_norm": 1.1151647567749023, |
|
"learning_rate": 3.6218132209150045e-05, |
|
"loss": 2.9211, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.23660922217047042, |
|
"grad_norm": 1.0221425294876099, |
|
"learning_rate": 3.5381414763863166e-05, |
|
"loss": 2.8233, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.2384722869119702, |
|
"grad_norm": 1.9823639392852783, |
|
"learning_rate": 3.4549150281252636e-05, |
|
"loss": 3.0263, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.24033535165346995, |
|
"grad_norm": 1.2429327964782715, |
|
"learning_rate": 3.372159227714218e-05, |
|
"loss": 3.3876, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.24219841639496972, |
|
"grad_norm": 1.3118374347686768, |
|
"learning_rate": 3.289899283371657e-05, |
|
"loss": 3.3716, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.2440614811364695, |
|
"grad_norm": 1.3129647970199585, |
|
"learning_rate": 3.2081602522734986e-05, |
|
"loss": 3.2332, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.24592454587796925, |
|
"grad_norm": 1.2996779680252075, |
|
"learning_rate": 3.12696703292044e-05, |
|
"loss": 3.4357, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.24778761061946902, |
|
"grad_norm": 1.2652643918991089, |
|
"learning_rate": 3.046344357553632e-05, |
|
"loss": 3.3381, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.2496506753609688, |
|
"grad_norm": 1.2161061763763428, |
|
"learning_rate": 2.9663167846209998e-05, |
|
"loss": 3.4872, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.2515137401024686, |
|
"grad_norm": 1.251867651939392, |
|
"learning_rate": 2.886908691296504e-05, |
|
"loss": 3.2131, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.25337680484396835, |
|
"grad_norm": 1.3443360328674316, |
|
"learning_rate": 2.8081442660546125e-05, |
|
"loss": 3.1788, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.25523986958546807, |
|
"grad_norm": 1.188568115234375, |
|
"learning_rate": 2.7300475013022663e-05, |
|
"loss": 3.4101, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.25710293432696785, |
|
"grad_norm": 1.5125073194503784, |
|
"learning_rate": 2.6526421860705473e-05, |
|
"loss": 3.6851, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.2589659990684676, |
|
"grad_norm": 1.3027081489562988, |
|
"learning_rate": 2.575951898768315e-05, |
|
"loss": 3.3351, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.2608290638099674, |
|
"grad_norm": 1.2359957695007324, |
|
"learning_rate": 2.500000000000001e-05, |
|
"loss": 3.2338, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.2626921285514672, |
|
"grad_norm": 1.3322489261627197, |
|
"learning_rate": 2.4248096254497288e-05, |
|
"loss": 2.8686, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.26455519329296695, |
|
"grad_norm": 2.0788941383361816, |
|
"learning_rate": 2.350403678833976e-05, |
|
"loss": 3.6646, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.26641825803446667, |
|
"grad_norm": 1.3164119720458984, |
|
"learning_rate": 2.2768048249248648e-05, |
|
"loss": 3.4545, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.26828132277596645, |
|
"grad_norm": 1.652212381362915, |
|
"learning_rate": 2.2040354826462668e-05, |
|
"loss": 3.5121, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.2701443875174662, |
|
"grad_norm": 1.5663607120513916, |
|
"learning_rate": 2.132117818244771e-05, |
|
"loss": 3.7725, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.272007452258966, |
|
"grad_norm": 1.5914528369903564, |
|
"learning_rate": 2.061073738537635e-05, |
|
"loss": 3.7862, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.2738705170004658, |
|
"grad_norm": 1.9115759134292603, |
|
"learning_rate": 1.9909248842397584e-05, |
|
"loss": 4.0243, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.27573358174196555, |
|
"grad_norm": 1.703169584274292, |
|
"learning_rate": 1.9216926233717085e-05, |
|
"loss": 3.3129, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.2775966464834653, |
|
"grad_norm": 1.8172022104263306, |
|
"learning_rate": 1.8533980447508137e-05, |
|
"loss": 3.9249, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.27945971122496505, |
|
"grad_norm": 2.922093391418457, |
|
"learning_rate": 1.7860619515673033e-05, |
|
"loss": 4.2185, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.27945971122496505, |
|
"eval_loss": 3.3465943336486816, |
|
"eval_runtime": 32.1695, |
|
"eval_samples_per_second": 28.132, |
|
"eval_steps_per_second": 14.082, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.2813227759664648, |
|
"grad_norm": 1.0994431972503662, |
|
"learning_rate": 1.7197048550474643e-05, |
|
"loss": 2.7072, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.2831858407079646, |
|
"grad_norm": 1.0657951831817627, |
|
"learning_rate": 1.6543469682057106e-05, |
|
"loss": 3.1659, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.2850489054494644, |
|
"grad_norm": 1.3122929334640503, |
|
"learning_rate": 1.5900081996875083e-05, |
|
"loss": 3.1423, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.28691197019096415, |
|
"grad_norm": 1.0391207933425903, |
|
"learning_rate": 1.526708147705013e-05, |
|
"loss": 2.8446, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.2887750349324639, |
|
"grad_norm": 1.1318707466125488, |
|
"learning_rate": 1.4644660940672627e-05, |
|
"loss": 3.1408, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.29063809967396365, |
|
"grad_norm": 1.162157416343689, |
|
"learning_rate": 1.4033009983067452e-05, |
|
"loss": 2.852, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.2925011644154634, |
|
"grad_norm": 1.4153108596801758, |
|
"learning_rate": 1.3432314919041478e-05, |
|
"loss": 3.2088, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.2943642291569632, |
|
"grad_norm": 1.2663054466247559, |
|
"learning_rate": 1.2842758726130283e-05, |
|
"loss": 3.3267, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.296227293898463, |
|
"grad_norm": 1.0257394313812256, |
|
"learning_rate": 1.22645209888614e-05, |
|
"loss": 3.1659, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.29809035863996275, |
|
"grad_norm": 1.4004676342010498, |
|
"learning_rate": 1.1697777844051105e-05, |
|
"loss": 3.1246, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.2999534233814625, |
|
"grad_norm": 1.103905439376831, |
|
"learning_rate": 1.1142701927151456e-05, |
|
"loss": 3.2662, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.30181648812296225, |
|
"grad_norm": 1.0684690475463867, |
|
"learning_rate": 1.0599462319663905e-05, |
|
"loss": 3.0609, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.303679552864462, |
|
"grad_norm": 1.1447290182113647, |
|
"learning_rate": 1.006822449763537e-05, |
|
"loss": 2.9505, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.3055426176059618, |
|
"grad_norm": 1.2237478494644165, |
|
"learning_rate": 9.549150281252633e-06, |
|
"loss": 3.2494, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.3074056823474616, |
|
"grad_norm": 1.0637820959091187, |
|
"learning_rate": 9.042397785550405e-06, |
|
"loss": 2.9076, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.30926874708896135, |
|
"grad_norm": 1.2343932390213013, |
|
"learning_rate": 8.548121372247918e-06, |
|
"loss": 3.3097, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.3111318118304611, |
|
"grad_norm": 1.0267237424850464, |
|
"learning_rate": 8.066471602728803e-06, |
|
"loss": 2.92, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.3129948765719609, |
|
"grad_norm": 1.1801772117614746, |
|
"learning_rate": 7.597595192178702e-06, |
|
"loss": 3.0281, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.3148579413134606, |
|
"grad_norm": 1.3864883184432983, |
|
"learning_rate": 7.1416349648943894e-06, |
|
"loss": 3.2964, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.3167210060549604, |
|
"grad_norm": 1.212416410446167, |
|
"learning_rate": 6.698729810778065e-06, |
|
"loss": 3.1213, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.3185840707964602, |
|
"grad_norm": 1.1211936473846436, |
|
"learning_rate": 6.269014643030213e-06, |
|
"loss": 2.8045, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.32044713553795995, |
|
"grad_norm": 1.1482990980148315, |
|
"learning_rate": 5.852620357053651e-06, |
|
"loss": 3.0468, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.3223102002794597, |
|
"grad_norm": 1.3345028162002563, |
|
"learning_rate": 5.449673790581611e-06, |
|
"loss": 2.9481, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.3241732650209595, |
|
"grad_norm": 1.0090028047561646, |
|
"learning_rate": 5.060297685041659e-06, |
|
"loss": 3.0541, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.3260363297624592, |
|
"grad_norm": 1.2291425466537476, |
|
"learning_rate": 4.684610648167503e-06, |
|
"loss": 3.4524, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.327899394503959, |
|
"grad_norm": 1.4188092947006226, |
|
"learning_rate": 4.322727117869951e-06, |
|
"loss": 3.387, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.32976245924545877, |
|
"grad_norm": 1.2818740606307983, |
|
"learning_rate": 3.974757327377981e-06, |
|
"loss": 3.5364, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.33162552398695855, |
|
"grad_norm": 1.3271952867507935, |
|
"learning_rate": 3.6408072716606346e-06, |
|
"loss": 3.1821, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.3334885887284583, |
|
"grad_norm": 1.2895276546478271, |
|
"learning_rate": 3.3209786751399187e-06, |
|
"loss": 3.0432, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.3353516534699581, |
|
"grad_norm": 1.2879928350448608, |
|
"learning_rate": 3.0153689607045845e-06, |
|
"loss": 3.367, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.3372147182114579, |
|
"grad_norm": 1.3287880420684814, |
|
"learning_rate": 2.724071220034158e-06, |
|
"loss": 3.2295, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.3390777829529576, |
|
"grad_norm": 1.314154863357544, |
|
"learning_rate": 2.4471741852423237e-06, |
|
"loss": 3.3232, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.34094084769445737, |
|
"grad_norm": 1.3569366931915283, |
|
"learning_rate": 2.1847622018482283e-06, |
|
"loss": 2.9797, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.34280391243595715, |
|
"grad_norm": 1.5291014909744263, |
|
"learning_rate": 1.9369152030840556e-06, |
|
"loss": 3.2835, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.3446669771774569, |
|
"grad_norm": 1.3267407417297363, |
|
"learning_rate": 1.70370868554659e-06, |
|
"loss": 3.1526, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.3465300419189567, |
|
"grad_norm": 1.36220383644104, |
|
"learning_rate": 1.4852136862001764e-06, |
|
"loss": 3.7465, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.3483931066604565, |
|
"grad_norm": 1.2978960275650024, |
|
"learning_rate": 1.2814967607382432e-06, |
|
"loss": 3.7277, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.3502561714019562, |
|
"grad_norm": 1.3940683603286743, |
|
"learning_rate": 1.0926199633097157e-06, |
|
"loss": 3.4685, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.35211923614345597, |
|
"grad_norm": 1.4953304529190063, |
|
"learning_rate": 9.186408276168013e-07, |
|
"loss": 3.5472, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.35398230088495575, |
|
"grad_norm": 1.4648510217666626, |
|
"learning_rate": 7.596123493895991e-07, |
|
"loss": 3.3383, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.3558453656264555, |
|
"grad_norm": 1.5643545389175415, |
|
"learning_rate": 6.15582970243117e-07, |
|
"loss": 3.4377, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.3577084303679553, |
|
"grad_norm": 1.4430068731307983, |
|
"learning_rate": 4.865965629214819e-07, |
|
"loss": 3.6607, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.3595714951094551, |
|
"grad_norm": 1.5247834920883179, |
|
"learning_rate": 3.7269241793390085e-07, |
|
"loss": 3.8584, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.3614345598509548, |
|
"grad_norm": 1.5962194204330444, |
|
"learning_rate": 2.7390523158633554e-07, |
|
"loss": 3.6967, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.36329762459245457, |
|
"grad_norm": 3.414898633956909, |
|
"learning_rate": 1.9026509541272275e-07, |
|
"loss": 3.7934, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.36516068933395435, |
|
"grad_norm": 1.7514005899429321, |
|
"learning_rate": 1.2179748700879012e-07, |
|
"loss": 3.9039, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.3670237540754541, |
|
"grad_norm": 1.91195809841156, |
|
"learning_rate": 6.852326227130834e-08, |
|
"loss": 3.604, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.3688868188169539, |
|
"grad_norm": 2.1014304161071777, |
|
"learning_rate": 3.04586490452119e-08, |
|
"loss": 3.6654, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.3707498835584537, |
|
"grad_norm": 2.118112564086914, |
|
"learning_rate": 7.615242180436522e-09, |
|
"loss": 3.8243, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.37261294829995345, |
|
"grad_norm": 3.0300862789154053, |
|
"learning_rate": 0.0, |
|
"loss": 3.6882, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.37261294829995345, |
|
"eval_loss": 3.3320932388305664, |
|
"eval_runtime": 32.1296, |
|
"eval_samples_per_second": 28.167, |
|
"eval_steps_per_second": 14.099, |
|
"step": 200 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5.4428946137088e+16, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|