|
{ |
|
"best_metric": 0.20296970009803772, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-100", |
|
"epoch": 2.0106331561140647, |
|
"eval_steps": 25, |
|
"global_step": 130, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.015466408893185114, |
|
"grad_norm": 0.34002751111984253, |
|
"learning_rate": 2.9999999999999997e-05, |
|
"loss": 0.4411, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.015466408893185114, |
|
"eval_loss": 1.1426082849502563, |
|
"eval_runtime": 7.1707, |
|
"eval_samples_per_second": 6.973, |
|
"eval_steps_per_second": 6.973, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.030932817786370227, |
|
"grad_norm": 0.4139275550842285, |
|
"learning_rate": 5.9999999999999995e-05, |
|
"loss": 0.4555, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.04639922667955534, |
|
"grad_norm": 0.481924831867218, |
|
"learning_rate": 8.999999999999999e-05, |
|
"loss": 0.424, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.061865635572740454, |
|
"grad_norm": 0.4299992322921753, |
|
"learning_rate": 0.00011999999999999999, |
|
"loss": 0.3855, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.07733204446592556, |
|
"grad_norm": 0.42819246649742126, |
|
"learning_rate": 0.00015, |
|
"loss": 0.4611, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.09279845335911067, |
|
"grad_norm": 0.9149371385574341, |
|
"learning_rate": 0.00017999999999999998, |
|
"loss": 0.7301, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.1082648622522958, |
|
"grad_norm": 1.1431907415390015, |
|
"learning_rate": 0.00020999999999999998, |
|
"loss": 0.7905, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.12373127114548091, |
|
"grad_norm": 1.802942156791687, |
|
"learning_rate": 0.00023999999999999998, |
|
"loss": 0.6581, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.13919768003866603, |
|
"grad_norm": 2.6315770149230957, |
|
"learning_rate": 0.00027, |
|
"loss": 0.6625, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.15466408893185113, |
|
"grad_norm": 1.5995557308197021, |
|
"learning_rate": 0.0003, |
|
"loss": 0.5518, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.17013049782503625, |
|
"grad_norm": 1.1412254571914673, |
|
"learning_rate": 0.00029994859874633357, |
|
"loss": 0.4248, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.18559690671822135, |
|
"grad_norm": 1.1108283996582031, |
|
"learning_rate": 0.000299794430213186, |
|
"loss": 0.4044, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.20106331561140647, |
|
"grad_norm": 1.4058749675750732, |
|
"learning_rate": 0.00029953760005996916, |
|
"loss": 0.4258, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.2165297245045916, |
|
"grad_norm": 0.8541085124015808, |
|
"learning_rate": 0.00029917828430524096, |
|
"loss": 0.419, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.2319961333977767, |
|
"grad_norm": 0.7577608823776245, |
|
"learning_rate": 0.00029871672920607153, |
|
"loss": 0.4055, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.24746254229096182, |
|
"grad_norm": 0.8010592460632324, |
|
"learning_rate": 0.00029815325108927063, |
|
"loss": 0.4587, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.26292895118414694, |
|
"grad_norm": 2.178513526916504, |
|
"learning_rate": 0.00029748823613459316, |
|
"loss": 0.3844, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.27839536007733207, |
|
"grad_norm": 1.049085021018982, |
|
"learning_rate": 0.0002967221401100708, |
|
"loss": 0.3085, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.29386176897051713, |
|
"grad_norm": 0.2664034962654114, |
|
"learning_rate": 0.0002958554880596515, |
|
"loss": 0.2314, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.30932817786370226, |
|
"grad_norm": 0.9501022696495056, |
|
"learning_rate": 0.0002948888739433602, |
|
"loss": 0.2162, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.3247945867568874, |
|
"grad_norm": 0.29297515749931335, |
|
"learning_rate": 0.00029382296023022894, |
|
"loss": 0.1734, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.3402609956500725, |
|
"grad_norm": 0.29325342178344727, |
|
"learning_rate": 0.00029265847744427303, |
|
"loss": 0.2472, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.35572740454325763, |
|
"grad_norm": 0.7397837042808533, |
|
"learning_rate": 0.00029139622366382674, |
|
"loss": 0.3872, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.3711938134364427, |
|
"grad_norm": 0.5978482365608215, |
|
"learning_rate": 0.00029003706397458023, |
|
"loss": 0.3756, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.3866602223296278, |
|
"grad_norm": 0.8143917918205261, |
|
"learning_rate": 0.000288581929876693, |
|
"loss": 0.3305, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.3866602223296278, |
|
"eval_loss": 0.2919376492500305, |
|
"eval_runtime": 7.3131, |
|
"eval_samples_per_second": 6.837, |
|
"eval_steps_per_second": 6.837, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.40212663122281295, |
|
"grad_norm": 0.5971204042434692, |
|
"learning_rate": 0.0002870318186463901, |
|
"loss": 0.329, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.41759304011599807, |
|
"grad_norm": 0.5656357407569885, |
|
"learning_rate": 0.0002853877926524791, |
|
"loss": 0.2991, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.4330594490091832, |
|
"grad_norm": 0.9046909809112549, |
|
"learning_rate": 0.00028365097862825513, |
|
"loss": 0.3522, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.4485258579023683, |
|
"grad_norm": 1.8343451023101807, |
|
"learning_rate": 0.00028182256689929475, |
|
"loss": 0.3253, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.4639922667955534, |
|
"grad_norm": 1.3387500047683716, |
|
"learning_rate": 0.0002799038105676658, |
|
"loss": 0.3363, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.4794586756887385, |
|
"grad_norm": 0.7546955943107605, |
|
"learning_rate": 0.0002778960246531138, |
|
"loss": 0.3319, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.49492508458192364, |
|
"grad_norm": 0.5383890271186829, |
|
"learning_rate": 0.0002758005851918136, |
|
"loss": 0.4198, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.5103914934751087, |
|
"grad_norm": 2.9426941871643066, |
|
"learning_rate": 0.0002736189282933023, |
|
"loss": 0.3846, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.5258579023682939, |
|
"grad_norm": 1.3911627531051636, |
|
"learning_rate": 0.0002713525491562421, |
|
"loss": 0.2681, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.541324311261479, |
|
"grad_norm": 0.33964619040489197, |
|
"learning_rate": 0.00026900300104368524, |
|
"loss": 0.2078, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.5567907201546641, |
|
"grad_norm": 0.6098275780677795, |
|
"learning_rate": 0.0002665718942185456, |
|
"loss": 0.2027, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.5722571290478492, |
|
"grad_norm": 0.5025777816772461, |
|
"learning_rate": 0.00026406089484000466, |
|
"loss": 0.206, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.5877235379410343, |
|
"grad_norm": 0.4298511743545532, |
|
"learning_rate": 0.00026147172382160914, |
|
"loss": 0.2803, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.6031899468342194, |
|
"grad_norm": 0.9432958364486694, |
|
"learning_rate": 0.00025880615565184313, |
|
"loss": 0.3849, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.6186563557274045, |
|
"grad_norm": 0.6062995791435242, |
|
"learning_rate": 0.00025606601717798207, |
|
"loss": 0.3508, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.6341227646205897, |
|
"grad_norm": 0.5804493427276611, |
|
"learning_rate": 0.0002532531863540631, |
|
"loss": 0.3212, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.6495891735137748, |
|
"grad_norm": 0.5816200375556946, |
|
"learning_rate": 0.0002503695909538287, |
|
"loss": 0.2876, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.6650555824069598, |
|
"grad_norm": 0.4420316219329834, |
|
"learning_rate": 0.0002474172072495275, |
|
"loss": 0.3054, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.680521991300145, |
|
"grad_norm": 0.45776253938674927, |
|
"learning_rate": 0.0002443980586574756, |
|
"loss": 0.279, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.6959884001933301, |
|
"grad_norm": 0.7748484015464783, |
|
"learning_rate": 0.00024131421435130807, |
|
"loss": 0.2875, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.7114548090865153, |
|
"grad_norm": 0.5803005695343018, |
|
"learning_rate": 0.00023816778784387094, |
|
"loss": 0.3088, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.7269212179797003, |
|
"grad_norm": 1.1293902397155762, |
|
"learning_rate": 0.0002349609355387249, |
|
"loss": 0.3188, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.7423876268728854, |
|
"grad_norm": 1.2384657859802246, |
|
"learning_rate": 0.00023169585525225405, |
|
"loss": 0.4065, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.7578540357660706, |
|
"grad_norm": 1.8807259798049927, |
|
"learning_rate": 0.0002283747847073923, |
|
"loss": 0.2961, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.7733204446592556, |
|
"grad_norm": 0.7909408211708069, |
|
"learning_rate": 0.000225, |
|
"loss": 0.2371, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.7733204446592556, |
|
"eval_loss": 0.26686012744903564, |
|
"eval_runtime": 7.3174, |
|
"eval_samples_per_second": 6.833, |
|
"eval_steps_per_second": 6.833, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.7887868535524408, |
|
"grad_norm": 0.3425706624984741, |
|
"learning_rate": 0.00022157381403894124, |
|
"loss": 0.1907, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.8042532624456259, |
|
"grad_norm": 0.5927841663360596, |
|
"learning_rate": 0.00021809857496093199, |
|
"loss": 0.1673, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.8197196713388111, |
|
"grad_norm": 0.3090386986732483, |
|
"learning_rate": 0.00021457666452124428, |
|
"loss": 0.2062, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.8351860802319961, |
|
"grad_norm": 0.7005581855773926, |
|
"learning_rate": 0.00021101049646137003, |
|
"loss": 0.3042, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.8506524891251812, |
|
"grad_norm": 0.44823092222213745, |
|
"learning_rate": 0.00020740251485476345, |
|
"loss": 0.2754, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.8661188980183664, |
|
"grad_norm": 0.43465909361839294, |
|
"learning_rate": 0.000203755192431795, |
|
"loss": 0.2858, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.8815853069115515, |
|
"grad_norm": 0.45701804757118225, |
|
"learning_rate": 0.0002000710288850656, |
|
"loss": 0.2677, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.8970517158047366, |
|
"grad_norm": 0.36131733655929565, |
|
"learning_rate": 0.0001963525491562421, |
|
"loss": 0.2443, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.9125181246979217, |
|
"grad_norm": 0.3090292811393738, |
|
"learning_rate": 0.0001926023017055884, |
|
"loss": 0.2337, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.9279845335911068, |
|
"grad_norm": 0.4051980972290039, |
|
"learning_rate": 0.0001888228567653781, |
|
"loss": 0.2617, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.943450942484292, |
|
"grad_norm": 0.4554498493671417, |
|
"learning_rate": 0.0001850168045783858, |
|
"loss": 0.2417, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.958917351377477, |
|
"grad_norm": 0.6270922422409058, |
|
"learning_rate": 0.00018118675362266385, |
|
"loss": 0.2552, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.9743837602706622, |
|
"grad_norm": 0.750235378742218, |
|
"learning_rate": 0.00017733532882382213, |
|
"loss": 0.2594, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.9898501691638473, |
|
"grad_norm": 1.0947400331497192, |
|
"learning_rate": 0.00017346516975603462, |
|
"loss": 0.339, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 1.0053165780570323, |
|
"grad_norm": 0.9929707050323486, |
|
"learning_rate": 0.00016957892883300775, |
|
"loss": 0.3453, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 1.0207829869502174, |
|
"grad_norm": 0.45975467562675476, |
|
"learning_rate": 0.000165679269490148, |
|
"loss": 0.2237, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 1.0362493958434027, |
|
"grad_norm": 0.28348809480667114, |
|
"learning_rate": 0.00016176886435917675, |
|
"loss": 0.1931, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 1.0517158047365878, |
|
"grad_norm": 0.22970043122768402, |
|
"learning_rate": 0.0001578503934364416, |
|
"loss": 0.1668, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 1.0671822136297728, |
|
"grad_norm": 0.2886936664581299, |
|
"learning_rate": 0.00015392654224618098, |
|
"loss": 0.1539, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 1.082648622522958, |
|
"grad_norm": 0.3277800977230072, |
|
"learning_rate": 0.00015, |
|
"loss": 0.2382, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.098115031416143, |
|
"grad_norm": 0.7377809882164001, |
|
"learning_rate": 0.00014607345775381904, |
|
"loss": 0.2307, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 1.1135814403093283, |
|
"grad_norm": 0.4106524884700775, |
|
"learning_rate": 0.0001421496065635584, |
|
"loss": 0.2433, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 1.1290478492025133, |
|
"grad_norm": 0.5610142350196838, |
|
"learning_rate": 0.00013823113564082325, |
|
"loss": 0.2568, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 1.1445142580956984, |
|
"grad_norm": 0.9046674370765686, |
|
"learning_rate": 0.000134320730509852, |
|
"loss": 0.2751, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 1.1599806669888835, |
|
"grad_norm": 0.617001473903656, |
|
"learning_rate": 0.00013042107116699228, |
|
"loss": 0.2139, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 1.1599806669888835, |
|
"eval_loss": 0.2165301889181137, |
|
"eval_runtime": 7.3111, |
|
"eval_samples_per_second": 6.839, |
|
"eval_steps_per_second": 6.839, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 1.1754470758820685, |
|
"grad_norm": 0.6039713025093079, |
|
"learning_rate": 0.00012653483024396533, |
|
"loss": 0.2185, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 1.1909134847752538, |
|
"grad_norm": 0.4915192127227783, |
|
"learning_rate": 0.00012266467117617787, |
|
"loss": 0.2277, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 1.206379893668439, |
|
"grad_norm": 0.31167200207710266, |
|
"learning_rate": 0.00011881324637733611, |
|
"loss": 0.2205, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 1.221846302561624, |
|
"grad_norm": 0.4483063817024231, |
|
"learning_rate": 0.00011498319542161423, |
|
"loss": 0.24, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 1.237312711454809, |
|
"grad_norm": 0.4659959077835083, |
|
"learning_rate": 0.00011117714323462186, |
|
"loss": 0.2655, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.252779120347994, |
|
"grad_norm": 0.2737232446670532, |
|
"learning_rate": 0.0001073976982944116, |
|
"loss": 0.2592, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 1.2682455292411794, |
|
"grad_norm": 0.49031612277030945, |
|
"learning_rate": 0.0001036474508437579, |
|
"loss": 0.2176, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 1.2837119381343645, |
|
"grad_norm": 0.2844369113445282, |
|
"learning_rate": 9.992897111493437e-05, |
|
"loss": 0.1823, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 1.2991783470275495, |
|
"grad_norm": 0.30533647537231445, |
|
"learning_rate": 9.624480756820496e-05, |
|
"loss": 0.1705, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 1.3146447559207346, |
|
"grad_norm": 0.37838345766067505, |
|
"learning_rate": 9.259748514523653e-05, |
|
"loss": 0.1426, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 1.3301111648139199, |
|
"grad_norm": 0.4842795133590698, |
|
"learning_rate": 8.898950353862998e-05, |
|
"loss": 0.1921, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 1.345577573707105, |
|
"grad_norm": 0.7472468018531799, |
|
"learning_rate": 8.54233354787557e-05, |
|
"loss": 0.241, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 1.36104398260029, |
|
"grad_norm": 0.8189084529876709, |
|
"learning_rate": 8.190142503906798e-05, |
|
"loss": 0.2342, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 1.376510391493475, |
|
"grad_norm": 0.45445919036865234, |
|
"learning_rate": 7.842618596105872e-05, |
|
"loss": 0.2118, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 1.3919768003866602, |
|
"grad_norm": 0.4599757492542267, |
|
"learning_rate": 7.500000000000002e-05, |
|
"loss": 0.212, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.4074432092798452, |
|
"grad_norm": 0.6367545127868652, |
|
"learning_rate": 7.162521529260767e-05, |
|
"loss": 0.2341, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 1.4229096181730305, |
|
"grad_norm": 0.5731837749481201, |
|
"learning_rate": 6.830414474774594e-05, |
|
"loss": 0.2237, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 1.4383760270662156, |
|
"grad_norm": 0.5188281536102295, |
|
"learning_rate": 6.50390644612751e-05, |
|
"loss": 0.2233, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 1.4538424359594007, |
|
"grad_norm": 0.3524056375026703, |
|
"learning_rate": 6.183221215612904e-05, |
|
"loss": 0.236, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 1.4693088448525857, |
|
"grad_norm": 0.28299957513809204, |
|
"learning_rate": 5.8685785648691894e-05, |
|
"loss": 0.2433, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 1.484775253745771, |
|
"grad_norm": 0.3486190736293793, |
|
"learning_rate": 5.56019413425244e-05, |
|
"loss": 0.2873, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 1.500241662638956, |
|
"grad_norm": 0.28295159339904785, |
|
"learning_rate": 5.2582792750472464e-05, |
|
"loss": 0.2721, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 1.5157080715321412, |
|
"grad_norm": 0.3966909646987915, |
|
"learning_rate": 4.963040904617131e-05, |
|
"loss": 0.1962, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 1.5311744804253262, |
|
"grad_norm": 0.3956305682659149, |
|
"learning_rate": 4.6746813645936877e-05, |
|
"loss": 0.1928, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 1.5466408893185113, |
|
"grad_norm": 0.28398260474205017, |
|
"learning_rate": 4.3933982822017876e-05, |
|
"loss": 0.1685, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.5466408893185113, |
|
"eval_loss": 0.20296970009803772, |
|
"eval_runtime": 7.3114, |
|
"eval_samples_per_second": 6.839, |
|
"eval_steps_per_second": 6.839, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.5621072982116964, |
|
"grad_norm": 0.1923961192369461, |
|
"learning_rate": 4.1193844348156886e-05, |
|
"loss": 0.1462, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 1.5775737071048814, |
|
"grad_norm": 0.18478325009346008, |
|
"learning_rate": 3.852827617839084e-05, |
|
"loss": 0.1648, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 1.5930401159980667, |
|
"grad_norm": 0.30043265223503113, |
|
"learning_rate": 3.593910515999536e-05, |
|
"loss": 0.1844, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 1.6085065248912518, |
|
"grad_norm": 0.5838200449943542, |
|
"learning_rate": 3.342810578145436e-05, |
|
"loss": 0.2355, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 1.623972933784437, |
|
"grad_norm": 0.5884765386581421, |
|
"learning_rate": 3.099699895631474e-05, |
|
"loss": 0.2287, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 1.6394393426776221, |
|
"grad_norm": 0.4631075859069824, |
|
"learning_rate": 2.8647450843757897e-05, |
|
"loss": 0.2004, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 1.6549057515708072, |
|
"grad_norm": 0.3879186809062958, |
|
"learning_rate": 2.6381071706697644e-05, |
|
"loss": 0.1996, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 1.6703721604639923, |
|
"grad_norm": 0.2909069359302521, |
|
"learning_rate": 2.4199414808186406e-05, |
|
"loss": 0.1985, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 1.6858385693571774, |
|
"grad_norm": 0.34202706813812256, |
|
"learning_rate": 2.210397534688617e-05, |
|
"loss": 0.1848, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 1.7013049782503624, |
|
"grad_norm": 0.3348611295223236, |
|
"learning_rate": 2.009618943233419e-05, |
|
"loss": 0.2004, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.7167713871435475, |
|
"grad_norm": 0.4890515208244324, |
|
"learning_rate": 1.8177433100705207e-05, |
|
"loss": 0.2107, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 1.7322377960367326, |
|
"grad_norm": 0.560546875, |
|
"learning_rate": 1.634902137174483e-05, |
|
"loss": 0.2933, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 1.7477042049299178, |
|
"grad_norm": 0.5725692510604858, |
|
"learning_rate": 1.4612207347520938e-05, |
|
"loss": 0.2927, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 1.763170613823103, |
|
"grad_norm": 0.33504927158355713, |
|
"learning_rate": 1.2968181353609852e-05, |
|
"loss": 0.1681, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 1.7786370227162882, |
|
"grad_norm": 0.34945377707481384, |
|
"learning_rate": 1.1418070123306989e-05, |
|
"loss": 0.1594, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 1.7941034316094733, |
|
"grad_norm": 0.31711897253990173, |
|
"learning_rate": 9.962936025419754e-06, |
|
"loss": 0.1475, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 1.8095698405026583, |
|
"grad_norm": 0.25584596395492554, |
|
"learning_rate": 8.603776336173235e-06, |
|
"loss": 0.1476, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 1.8250362493958434, |
|
"grad_norm": 0.34091395139694214, |
|
"learning_rate": 7.34152255572697e-06, |
|
"loss": 0.1856, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 1.8405026582890285, |
|
"grad_norm": 0.3409639894962311, |
|
"learning_rate": 6.1770397697710414e-06, |
|
"loss": 0.1982, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 1.8559690671822135, |
|
"grad_norm": 0.2535560131072998, |
|
"learning_rate": 5.11112605663977e-06, |
|
"loss": 0.1881, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.8714354760753986, |
|
"grad_norm": 0.26184314489364624, |
|
"learning_rate": 4.144511940348516e-06, |
|
"loss": 0.1976, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 1.886901884968584, |
|
"grad_norm": 0.28429871797561646, |
|
"learning_rate": 3.2778598899291465e-06, |
|
"loss": 0.1938, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 1.902368293861769, |
|
"grad_norm": 0.23394553363323212, |
|
"learning_rate": 2.511763865406824e-06, |
|
"loss": 0.1655, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 1.917834702754954, |
|
"grad_norm": 0.36569544672966003, |
|
"learning_rate": 1.8467489107293509e-06, |
|
"loss": 0.2076, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 1.9333011116481393, |
|
"grad_norm": 0.38919389247894287, |
|
"learning_rate": 1.2832707939284427e-06, |
|
"loss": 0.1977, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 1.9333011116481393, |
|
"eval_loss": 0.18706867098808289, |
|
"eval_runtime": 7.3048, |
|
"eval_samples_per_second": 6.845, |
|
"eval_steps_per_second": 6.845, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 1.9487675205413244, |
|
"grad_norm": 0.2989993989467621, |
|
"learning_rate": 8.217156947590064e-07, |
|
"loss": 0.2162, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 1.9642339294345095, |
|
"grad_norm": 0.41685956716537476, |
|
"learning_rate": 4.623999400308054e-07, |
|
"loss": 0.2151, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 1.9797003383276945, |
|
"grad_norm": 0.2608714699745178, |
|
"learning_rate": 2.05569786813925e-07, |
|
"loss": 0.2491, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 1.9951667472208796, |
|
"grad_norm": 0.4548462927341461, |
|
"learning_rate": 5.1401253666411016e-08, |
|
"loss": 0.333, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 2.0106331561140647, |
|
"grad_norm": 0.17462550103664398, |
|
"learning_rate": 0.0, |
|
"loss": 0.1465, |
|
"step": 130 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 130, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 1, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.052912237505413e+17, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|