|
{ |
|
"best_metric": 0.8461538461538461, |
|
"best_model_checkpoint": "MAE-CT-M1N0-M12_v8_split3_v3/checkpoint-3710", |
|
"epoch": 147.00579710144928, |
|
"eval_steps": 500, |
|
"global_step": 10350, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.000966183574879227, |
|
"grad_norm": 1.7746223211288452, |
|
"learning_rate": 9.661835748792271e-08, |
|
"loss": 0.7034, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.001932367149758454, |
|
"grad_norm": 3.510434865951538, |
|
"learning_rate": 1.9323671497584542e-07, |
|
"loss": 0.7006, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.002898550724637681, |
|
"grad_norm": 5.10714054107666, |
|
"learning_rate": 2.8985507246376816e-07, |
|
"loss": 0.7, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.003864734299516908, |
|
"grad_norm": 1.8410993814468384, |
|
"learning_rate": 3.8647342995169085e-07, |
|
"loss": 0.6961, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.004830917874396135, |
|
"grad_norm": 1.225154161453247, |
|
"learning_rate": 4.830917874396135e-07, |
|
"loss": 0.6918, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.005797101449275362, |
|
"grad_norm": 2.331960678100586, |
|
"learning_rate": 5.797101449275363e-07, |
|
"loss": 0.683, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.00676328502415459, |
|
"grad_norm": 5.630166053771973, |
|
"learning_rate": 6.763285024154589e-07, |
|
"loss": 0.6676, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.00676328502415459, |
|
"eval_accuracy": 0.6538461538461539, |
|
"eval_loss": 0.6765444874763489, |
|
"eval_runtime": 18.2677, |
|
"eval_samples_per_second": 4.27, |
|
"eval_steps_per_second": 1.095, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.0009661835748793, |
|
"grad_norm": 1.8150583505630493, |
|
"learning_rate": 7.729468599033817e-07, |
|
"loss": 0.6831, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.0019323671497584, |
|
"grad_norm": 3.461575984954834, |
|
"learning_rate": 8.695652173913044e-07, |
|
"loss": 0.7022, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.0028985507246377, |
|
"grad_norm": 3.506470203399658, |
|
"learning_rate": 9.66183574879227e-07, |
|
"loss": 0.6608, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.0038647342995168, |
|
"grad_norm": 1.7554961442947388, |
|
"learning_rate": 1.0628019323671499e-06, |
|
"loss": 0.6582, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.0048309178743962, |
|
"grad_norm": 2.083617687225342, |
|
"learning_rate": 1.1594202898550726e-06, |
|
"loss": 0.6522, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.0057971014492753, |
|
"grad_norm": 1.8603112697601318, |
|
"learning_rate": 1.2560386473429952e-06, |
|
"loss": 0.6149, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.0067632850241546, |
|
"grad_norm": 8.43808364868164, |
|
"learning_rate": 1.3526570048309178e-06, |
|
"loss": 0.6735, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.0067632850241546, |
|
"eval_accuracy": 0.6666666666666666, |
|
"eval_loss": 0.6379338502883911, |
|
"eval_runtime": 17.518, |
|
"eval_samples_per_second": 4.453, |
|
"eval_steps_per_second": 1.142, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 2.0009661835748793, |
|
"grad_norm": 3.509326457977295, |
|
"learning_rate": 1.4492753623188408e-06, |
|
"loss": 0.6462, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.0019323671497586, |
|
"grad_norm": 2.7582528591156006, |
|
"learning_rate": 1.5458937198067634e-06, |
|
"loss": 0.6394, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.0028985507246375, |
|
"grad_norm": 4.160943508148193, |
|
"learning_rate": 1.6425120772946862e-06, |
|
"loss": 0.5903, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 2.003864734299517, |
|
"grad_norm": 4.066112995147705, |
|
"learning_rate": 1.7391304347826088e-06, |
|
"loss": 0.6221, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 2.004830917874396, |
|
"grad_norm": 7.548180103302002, |
|
"learning_rate": 1.8357487922705318e-06, |
|
"loss": 0.7383, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 2.0057971014492755, |
|
"grad_norm": 2.1525609493255615, |
|
"learning_rate": 1.932367149758454e-06, |
|
"loss": 0.6885, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.006763285024155, |
|
"grad_norm": 7.099206447601318, |
|
"learning_rate": 2.028985507246377e-06, |
|
"loss": 0.6391, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 2.006763285024155, |
|
"eval_accuracy": 0.6666666666666666, |
|
"eval_loss": 0.6261193752288818, |
|
"eval_runtime": 17.3999, |
|
"eval_samples_per_second": 4.483, |
|
"eval_steps_per_second": 1.149, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 3.0009661835748793, |
|
"grad_norm": 2.9164934158325195, |
|
"learning_rate": 2.1256038647342997e-06, |
|
"loss": 0.5678, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 3.0019323671497586, |
|
"grad_norm": 4.371841907501221, |
|
"learning_rate": 2.222222222222222e-06, |
|
"loss": 0.6158, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 3.0028985507246375, |
|
"grad_norm": 5.164270401000977, |
|
"learning_rate": 2.3188405797101453e-06, |
|
"loss": 0.6795, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 3.003864734299517, |
|
"grad_norm": 5.403680324554443, |
|
"learning_rate": 2.4154589371980677e-06, |
|
"loss": 0.5925, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 3.004830917874396, |
|
"grad_norm": 4.002570629119873, |
|
"learning_rate": 2.5120772946859904e-06, |
|
"loss": 0.621, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 3.0057971014492755, |
|
"grad_norm": 16.2905216217041, |
|
"learning_rate": 2.6086956521739132e-06, |
|
"loss": 0.5738, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 3.006763285024155, |
|
"grad_norm": 31.137666702270508, |
|
"learning_rate": 2.7053140096618356e-06, |
|
"loss": 0.8085, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 3.006763285024155, |
|
"eval_accuracy": 0.6666666666666666, |
|
"eval_loss": 0.6050156950950623, |
|
"eval_runtime": 17.572, |
|
"eval_samples_per_second": 4.439, |
|
"eval_steps_per_second": 1.138, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 4.000966183574879, |
|
"grad_norm": 4.449572563171387, |
|
"learning_rate": 2.801932367149759e-06, |
|
"loss": 0.6442, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 4.001932367149759, |
|
"grad_norm": 6.249354839324951, |
|
"learning_rate": 2.8985507246376816e-06, |
|
"loss": 0.574, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 4.0028985507246375, |
|
"grad_norm": 8.182052612304688, |
|
"learning_rate": 2.995169082125604e-06, |
|
"loss": 0.5859, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 4.003864734299517, |
|
"grad_norm": 9.79288387298584, |
|
"learning_rate": 3.0917874396135268e-06, |
|
"loss": 0.5761, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 4.004830917874396, |
|
"grad_norm": 9.698325157165527, |
|
"learning_rate": 3.188405797101449e-06, |
|
"loss": 0.5726, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 4.005797101449275, |
|
"grad_norm": 15.086708068847656, |
|
"learning_rate": 3.2850241545893724e-06, |
|
"loss": 0.6452, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 4.006763285024155, |
|
"grad_norm": 41.014373779296875, |
|
"learning_rate": 3.381642512077295e-06, |
|
"loss": 0.4774, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 4.006763285024155, |
|
"eval_accuracy": 0.6538461538461539, |
|
"eval_loss": 0.5725159049034119, |
|
"eval_runtime": 16.8333, |
|
"eval_samples_per_second": 4.634, |
|
"eval_steps_per_second": 1.188, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 5.000966183574879, |
|
"grad_norm": 9.569558143615723, |
|
"learning_rate": 3.4782608695652175e-06, |
|
"loss": 0.6024, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 5.001932367149759, |
|
"grad_norm": 12.862885475158691, |
|
"learning_rate": 3.5748792270531403e-06, |
|
"loss": 0.5685, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 5.0028985507246375, |
|
"grad_norm": 12.0186128616333, |
|
"learning_rate": 3.6714975845410635e-06, |
|
"loss": 0.519, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 5.003864734299517, |
|
"grad_norm": 15.317484855651855, |
|
"learning_rate": 3.768115942028986e-06, |
|
"loss": 0.5404, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 5.004830917874396, |
|
"grad_norm": 13.592208862304688, |
|
"learning_rate": 3.864734299516908e-06, |
|
"loss": 0.5157, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 5.005797101449275, |
|
"grad_norm": 7.7680253982543945, |
|
"learning_rate": 3.961352657004831e-06, |
|
"loss": 0.477, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 5.006763285024155, |
|
"grad_norm": 5.047906398773193, |
|
"learning_rate": 4.057971014492754e-06, |
|
"loss": 0.5668, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 5.006763285024155, |
|
"eval_accuracy": 0.717948717948718, |
|
"eval_loss": 0.5513753294944763, |
|
"eval_runtime": 15.872, |
|
"eval_samples_per_second": 4.914, |
|
"eval_steps_per_second": 1.26, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 6.000966183574879, |
|
"grad_norm": 7.734065055847168, |
|
"learning_rate": 4.154589371980677e-06, |
|
"loss": 0.398, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 6.001932367149759, |
|
"grad_norm": 31.831592559814453, |
|
"learning_rate": 4.251207729468599e-06, |
|
"loss": 0.5832, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 6.0028985507246375, |
|
"grad_norm": 33.391658782958984, |
|
"learning_rate": 4.347826086956522e-06, |
|
"loss": 0.4523, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 6.003864734299517, |
|
"grad_norm": 49.021305084228516, |
|
"learning_rate": 4.444444444444444e-06, |
|
"loss": 0.3297, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 6.004830917874396, |
|
"grad_norm": 16.681032180786133, |
|
"learning_rate": 4.541062801932368e-06, |
|
"loss": 0.5626, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 6.005797101449275, |
|
"grad_norm": 43.30965805053711, |
|
"learning_rate": 4.637681159420291e-06, |
|
"loss": 0.4457, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 6.006763285024155, |
|
"grad_norm": 130.68243408203125, |
|
"learning_rate": 4.7342995169082125e-06, |
|
"loss": 0.6607, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 6.006763285024155, |
|
"eval_accuracy": 0.7564102564102564, |
|
"eval_loss": 0.4834001064300537, |
|
"eval_runtime": 16.065, |
|
"eval_samples_per_second": 4.855, |
|
"eval_steps_per_second": 1.245, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 7.000966183574879, |
|
"grad_norm": 20.249547958374023, |
|
"learning_rate": 4.830917874396135e-06, |
|
"loss": 0.3441, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 7.001932367149759, |
|
"grad_norm": 4.507364273071289, |
|
"learning_rate": 4.927536231884059e-06, |
|
"loss": 0.4055, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 7.0028985507246375, |
|
"grad_norm": 32.87883377075195, |
|
"learning_rate": 5.024154589371981e-06, |
|
"loss": 0.347, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 7.003864734299517, |
|
"grad_norm": 51.900856018066406, |
|
"learning_rate": 5.1207729468599045e-06, |
|
"loss": 0.4185, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 7.004830917874396, |
|
"grad_norm": 59.27176284790039, |
|
"learning_rate": 5.2173913043478265e-06, |
|
"loss": 0.5276, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 7.005797101449275, |
|
"grad_norm": 49.032371520996094, |
|
"learning_rate": 5.314009661835749e-06, |
|
"loss": 0.4577, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 7.006763285024155, |
|
"grad_norm": 4.786675930023193, |
|
"learning_rate": 5.410628019323671e-06, |
|
"loss": 1.0769, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 7.006763285024155, |
|
"eval_accuracy": 0.6666666666666666, |
|
"eval_loss": 0.7449601888656616, |
|
"eval_runtime": 17.1147, |
|
"eval_samples_per_second": 4.557, |
|
"eval_steps_per_second": 1.169, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 8.00096618357488, |
|
"grad_norm": 8.022248268127441, |
|
"learning_rate": 5.507246376811595e-06, |
|
"loss": 0.6193, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 8.001932367149758, |
|
"grad_norm": 11.327919960021973, |
|
"learning_rate": 5.603864734299518e-06, |
|
"loss": 0.5499, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 8.002898550724638, |
|
"grad_norm": 21.584156036376953, |
|
"learning_rate": 5.70048309178744e-06, |
|
"loss": 0.4251, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 8.003864734299517, |
|
"grad_norm": 42.50540542602539, |
|
"learning_rate": 5.797101449275363e-06, |
|
"loss": 0.5429, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 8.004830917874395, |
|
"grad_norm": 77.20704650878906, |
|
"learning_rate": 5.893719806763285e-06, |
|
"loss": 0.3544, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 8.005797101449275, |
|
"grad_norm": 93.73331451416016, |
|
"learning_rate": 5.990338164251208e-06, |
|
"loss": 0.5156, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 8.006763285024155, |
|
"grad_norm": 2.17142915725708, |
|
"learning_rate": 6.086956521739132e-06, |
|
"loss": 0.5425, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 8.006763285024155, |
|
"eval_accuracy": 0.7948717948717948, |
|
"eval_loss": 0.46234259009361267, |
|
"eval_runtime": 17.202, |
|
"eval_samples_per_second": 4.534, |
|
"eval_steps_per_second": 1.163, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 9.00096618357488, |
|
"grad_norm": 57.35111999511719, |
|
"learning_rate": 6.1835748792270535e-06, |
|
"loss": 0.304, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 9.001932367149758, |
|
"grad_norm": 12.979510307312012, |
|
"learning_rate": 6.280193236714976e-06, |
|
"loss": 0.3022, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 9.002898550724638, |
|
"grad_norm": 26.668115615844727, |
|
"learning_rate": 6.376811594202898e-06, |
|
"loss": 0.7653, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 9.003864734299517, |
|
"grad_norm": 0.5207400918006897, |
|
"learning_rate": 6.473429951690822e-06, |
|
"loss": 0.3052, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 9.004830917874395, |
|
"grad_norm": 29.032527923583984, |
|
"learning_rate": 6.570048309178745e-06, |
|
"loss": 1.0119, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 9.005797101449275, |
|
"grad_norm": 15.286548614501953, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 0.6187, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 9.006763285024155, |
|
"grad_norm": 6.56998872756958, |
|
"learning_rate": 6.76328502415459e-06, |
|
"loss": 0.2959, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 9.006763285024155, |
|
"eval_accuracy": 0.7564102564102564, |
|
"eval_loss": 0.511256217956543, |
|
"eval_runtime": 17.0114, |
|
"eval_samples_per_second": 4.585, |
|
"eval_steps_per_second": 1.176, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 10.00096618357488, |
|
"grad_norm": 4.055560111999512, |
|
"learning_rate": 6.859903381642513e-06, |
|
"loss": 0.5726, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 10.001932367149758, |
|
"grad_norm": 100.66243743896484, |
|
"learning_rate": 6.956521739130435e-06, |
|
"loss": 0.2548, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 10.002898550724638, |
|
"grad_norm": 23.80630111694336, |
|
"learning_rate": 7.053140096618359e-06, |
|
"loss": 0.2119, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 10.003864734299517, |
|
"grad_norm": 103.37820434570312, |
|
"learning_rate": 7.149758454106281e-06, |
|
"loss": 0.4181, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 10.004830917874395, |
|
"grad_norm": 44.21693420410156, |
|
"learning_rate": 7.246376811594203e-06, |
|
"loss": 1.2421, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 10.005797101449275, |
|
"grad_norm": 13.183788299560547, |
|
"learning_rate": 7.342995169082127e-06, |
|
"loss": 0.4219, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 10.006763285024155, |
|
"grad_norm": 95.68376159667969, |
|
"learning_rate": 7.439613526570049e-06, |
|
"loss": 0.7674, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 10.006763285024155, |
|
"eval_accuracy": 0.7307692307692307, |
|
"eval_loss": 0.5330198407173157, |
|
"eval_runtime": 16.5489, |
|
"eval_samples_per_second": 4.713, |
|
"eval_steps_per_second": 1.209, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 11.00096618357488, |
|
"grad_norm": 60.94060516357422, |
|
"learning_rate": 7.536231884057972e-06, |
|
"loss": 0.5811, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 11.001932367149758, |
|
"grad_norm": 33.39159393310547, |
|
"learning_rate": 7.632850241545895e-06, |
|
"loss": 0.6433, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 11.002898550724638, |
|
"grad_norm": 25.03746223449707, |
|
"learning_rate": 7.729468599033817e-06, |
|
"loss": 0.4988, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 11.003864734299517, |
|
"grad_norm": 17.853822708129883, |
|
"learning_rate": 7.82608695652174e-06, |
|
"loss": 0.4904, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 11.004830917874395, |
|
"grad_norm": 20.189794540405273, |
|
"learning_rate": 7.922705314009662e-06, |
|
"loss": 0.3451, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 11.005797101449275, |
|
"grad_norm": 35.152103424072266, |
|
"learning_rate": 8.019323671497586e-06, |
|
"loss": 0.6849, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 11.006763285024155, |
|
"grad_norm": 128.79225158691406, |
|
"learning_rate": 8.115942028985508e-06, |
|
"loss": 0.3015, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 11.006763285024155, |
|
"eval_accuracy": 0.7435897435897436, |
|
"eval_loss": 0.6790037155151367, |
|
"eval_runtime": 17.2359, |
|
"eval_samples_per_second": 4.525, |
|
"eval_steps_per_second": 1.16, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 12.00096618357488, |
|
"grad_norm": 35.15237808227539, |
|
"learning_rate": 8.212560386473431e-06, |
|
"loss": 0.4053, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 12.001932367149758, |
|
"grad_norm": 5.28515625, |
|
"learning_rate": 8.309178743961353e-06, |
|
"loss": 0.5247, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 12.002898550724638, |
|
"grad_norm": 10.3850736618042, |
|
"learning_rate": 8.405797101449275e-06, |
|
"loss": 0.216, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 12.003864734299517, |
|
"grad_norm": 19.821121215820312, |
|
"learning_rate": 8.502415458937199e-06, |
|
"loss": 0.2827, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 12.004830917874395, |
|
"grad_norm": 1.2210925817489624, |
|
"learning_rate": 8.599033816425122e-06, |
|
"loss": 0.2345, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 12.005797101449275, |
|
"grad_norm": 1.1479548215866089, |
|
"learning_rate": 8.695652173913044e-06, |
|
"loss": 0.5561, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 12.006763285024155, |
|
"grad_norm": 193.2356719970703, |
|
"learning_rate": 8.792270531400966e-06, |
|
"loss": 0.6252, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 12.006763285024155, |
|
"eval_accuracy": 0.6666666666666666, |
|
"eval_loss": 1.6536943912506104, |
|
"eval_runtime": 18.3082, |
|
"eval_samples_per_second": 4.26, |
|
"eval_steps_per_second": 1.092, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 13.00096618357488, |
|
"grad_norm": 13.043883323669434, |
|
"learning_rate": 8.888888888888888e-06, |
|
"loss": 1.3737, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 13.001932367149758, |
|
"grad_norm": 10.54146957397461, |
|
"learning_rate": 8.985507246376812e-06, |
|
"loss": 0.5883, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 13.002898550724638, |
|
"grad_norm": 53.698272705078125, |
|
"learning_rate": 9.082125603864736e-06, |
|
"loss": 0.2905, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 13.003864734299517, |
|
"grad_norm": 58.599098205566406, |
|
"learning_rate": 9.178743961352658e-06, |
|
"loss": 0.7171, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 13.004830917874395, |
|
"grad_norm": 46.291561126708984, |
|
"learning_rate": 9.275362318840581e-06, |
|
"loss": 0.2068, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 13.005797101449275, |
|
"grad_norm": 0.8758206367492676, |
|
"learning_rate": 9.371980676328503e-06, |
|
"loss": 0.3303, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 13.006763285024155, |
|
"grad_norm": 0.09083396941423416, |
|
"learning_rate": 9.468599033816425e-06, |
|
"loss": 0.4554, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 13.006763285024155, |
|
"eval_accuracy": 0.7564102564102564, |
|
"eval_loss": 0.8944117426872253, |
|
"eval_runtime": 17.9825, |
|
"eval_samples_per_second": 4.338, |
|
"eval_steps_per_second": 1.112, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 14.00096618357488, |
|
"grad_norm": 106.69209289550781, |
|
"learning_rate": 9.565217391304349e-06, |
|
"loss": 0.3241, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 14.001932367149758, |
|
"grad_norm": 86.01146697998047, |
|
"learning_rate": 9.66183574879227e-06, |
|
"loss": 0.4302, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 14.002898550724638, |
|
"grad_norm": 93.19570922851562, |
|
"learning_rate": 9.758454106280194e-06, |
|
"loss": 0.692, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 14.003864734299517, |
|
"grad_norm": 93.05182647705078, |
|
"learning_rate": 9.855072463768118e-06, |
|
"loss": 0.7247, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 14.004830917874395, |
|
"grad_norm": 2.0111024379730225, |
|
"learning_rate": 9.95169082125604e-06, |
|
"loss": 0.3524, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 14.005797101449275, |
|
"grad_norm": 48.2626838684082, |
|
"learning_rate": 9.994632313472894e-06, |
|
"loss": 0.3212, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 14.006763285024155, |
|
"grad_norm": 0.2520197033882141, |
|
"learning_rate": 9.98389694041868e-06, |
|
"loss": 0.364, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 14.006763285024155, |
|
"eval_accuracy": 0.717948717948718, |
|
"eval_loss": 0.8103033900260925, |
|
"eval_runtime": 18.9745, |
|
"eval_samples_per_second": 4.111, |
|
"eval_steps_per_second": 1.054, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 15.00096618357488, |
|
"grad_norm": 0.2682894170284271, |
|
"learning_rate": 9.973161567364467e-06, |
|
"loss": 0.124, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 15.001932367149758, |
|
"grad_norm": 31.360898971557617, |
|
"learning_rate": 9.962426194310253e-06, |
|
"loss": 0.3927, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 15.002898550724638, |
|
"grad_norm": 36.74116134643555, |
|
"learning_rate": 9.95169082125604e-06, |
|
"loss": 0.4221, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 15.003864734299517, |
|
"grad_norm": 196.0001220703125, |
|
"learning_rate": 9.940955448201826e-06, |
|
"loss": 0.1994, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 15.004830917874395, |
|
"grad_norm": 18.42249298095703, |
|
"learning_rate": 9.930220075147611e-06, |
|
"loss": 0.4539, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 15.005797101449275, |
|
"grad_norm": 19.64423179626465, |
|
"learning_rate": 9.919484702093398e-06, |
|
"loss": 0.2981, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 15.006763285024155, |
|
"grad_norm": 13.708057403564453, |
|
"learning_rate": 9.908749329039184e-06, |
|
"loss": 0.444, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 15.006763285024155, |
|
"eval_accuracy": 0.782051282051282, |
|
"eval_loss": 0.710326611995697, |
|
"eval_runtime": 21.7172, |
|
"eval_samples_per_second": 3.592, |
|
"eval_steps_per_second": 0.921, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 16.000966183574878, |
|
"grad_norm": 25.7137451171875, |
|
"learning_rate": 9.89801395598497e-06, |
|
"loss": 0.3589, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 16.00193236714976, |
|
"grad_norm": 180.5447540283203, |
|
"learning_rate": 9.887278582930757e-06, |
|
"loss": 0.3119, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 16.002898550724638, |
|
"grad_norm": 1.0929948091506958, |
|
"learning_rate": 9.876543209876543e-06, |
|
"loss": 0.3492, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 16.003864734299516, |
|
"grad_norm": 33.48530960083008, |
|
"learning_rate": 9.865807836822331e-06, |
|
"loss": 0.6954, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 16.004830917874397, |
|
"grad_norm": 1.5131254196166992, |
|
"learning_rate": 9.855072463768118e-06, |
|
"loss": 0.5318, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 16.005797101449275, |
|
"grad_norm": 1.1009883880615234, |
|
"learning_rate": 9.844337090713904e-06, |
|
"loss": 0.5071, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 16.006763285024153, |
|
"grad_norm": 0.46484410762786865, |
|
"learning_rate": 9.833601717659689e-06, |
|
"loss": 0.104, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 16.006763285024153, |
|
"eval_accuracy": 0.717948717948718, |
|
"eval_loss": 0.8999806642532349, |
|
"eval_runtime": 18.5229, |
|
"eval_samples_per_second": 4.211, |
|
"eval_steps_per_second": 1.08, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 17.000966183574878, |
|
"grad_norm": 0.40560030937194824, |
|
"learning_rate": 9.822866344605476e-06, |
|
"loss": 0.4563, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 17.00193236714976, |
|
"grad_norm": 124.55720520019531, |
|
"learning_rate": 9.812130971551262e-06, |
|
"loss": 0.5613, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 17.002898550724638, |
|
"grad_norm": 4.143601417541504, |
|
"learning_rate": 9.801395598497048e-06, |
|
"loss": 0.3869, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 17.003864734299516, |
|
"grad_norm": 0.04259469732642174, |
|
"learning_rate": 9.790660225442835e-06, |
|
"loss": 0.0657, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 17.004830917874397, |
|
"grad_norm": 81.4675064086914, |
|
"learning_rate": 9.779924852388621e-06, |
|
"loss": 0.239, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 17.005797101449275, |
|
"grad_norm": 0.18485671281814575, |
|
"learning_rate": 9.769189479334408e-06, |
|
"loss": 0.0619, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 17.006763285024153, |
|
"grad_norm": 0.026976848021149635, |
|
"learning_rate": 9.758454106280194e-06, |
|
"loss": 0.5647, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 17.006763285024153, |
|
"eval_accuracy": 0.7051282051282052, |
|
"eval_loss": 1.4782413244247437, |
|
"eval_runtime": 18.6906, |
|
"eval_samples_per_second": 4.173, |
|
"eval_steps_per_second": 1.07, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 18.000966183574878, |
|
"grad_norm": 54.548683166503906, |
|
"learning_rate": 9.74771873322598e-06, |
|
"loss": 0.183, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 18.00193236714976, |
|
"grad_norm": 12.151978492736816, |
|
"learning_rate": 9.736983360171767e-06, |
|
"loss": 0.5206, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 18.002898550724638, |
|
"grad_norm": 275.7248840332031, |
|
"learning_rate": 9.726247987117554e-06, |
|
"loss": 0.3181, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 18.003864734299516, |
|
"grad_norm": 0.09706564992666245, |
|
"learning_rate": 9.71551261406334e-06, |
|
"loss": 0.1154, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 18.004830917874397, |
|
"grad_norm": 0.7155212163925171, |
|
"learning_rate": 9.704777241009125e-06, |
|
"loss": 0.2555, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 18.005797101449275, |
|
"grad_norm": 97.17144012451172, |
|
"learning_rate": 9.694041867954911e-06, |
|
"loss": 0.5985, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 18.006763285024153, |
|
"grad_norm": 0.2159235030412674, |
|
"learning_rate": 9.683306494900698e-06, |
|
"loss": 0.783, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 18.006763285024153, |
|
"eval_accuracy": 0.8205128205128205, |
|
"eval_loss": 0.8539489507675171, |
|
"eval_runtime": 19.5687, |
|
"eval_samples_per_second": 3.986, |
|
"eval_steps_per_second": 1.022, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 19.000966183574878, |
|
"grad_norm": 117.28670501708984, |
|
"learning_rate": 9.672571121846484e-06, |
|
"loss": 0.3806, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 19.00193236714976, |
|
"grad_norm": 0.10745132714509964, |
|
"learning_rate": 9.66183574879227e-06, |
|
"loss": 0.2618, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 19.002898550724638, |
|
"grad_norm": 0.10657572746276855, |
|
"learning_rate": 9.651100375738057e-06, |
|
"loss": 0.1682, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 19.003864734299516, |
|
"grad_norm": 17.445707321166992, |
|
"learning_rate": 9.640365002683844e-06, |
|
"loss": 0.0466, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 19.004830917874397, |
|
"grad_norm": 0.01827850751578808, |
|
"learning_rate": 9.62962962962963e-06, |
|
"loss": 0.3148, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 19.005797101449275, |
|
"grad_norm": 67.58177947998047, |
|
"learning_rate": 9.618894256575418e-06, |
|
"loss": 0.5658, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 19.006763285024153, |
|
"grad_norm": 0.4448859393596649, |
|
"learning_rate": 9.608158883521203e-06, |
|
"loss": 0.5938, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 19.006763285024153, |
|
"eval_accuracy": 0.7564102564102564, |
|
"eval_loss": 0.8426063656806946, |
|
"eval_runtime": 23.6146, |
|
"eval_samples_per_second": 3.303, |
|
"eval_steps_per_second": 0.847, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 20.000966183574878, |
|
"grad_norm": 119.15202331542969, |
|
"learning_rate": 9.59742351046699e-06, |
|
"loss": 0.2652, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 20.00193236714976, |
|
"grad_norm": 52.361080169677734, |
|
"learning_rate": 9.586688137412776e-06, |
|
"loss": 0.5006, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 20.002898550724638, |
|
"grad_norm": 33.284122467041016, |
|
"learning_rate": 9.575952764358562e-06, |
|
"loss": 0.3182, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 20.003864734299516, |
|
"grad_norm": 0.39933377504348755, |
|
"learning_rate": 9.565217391304349e-06, |
|
"loss": 0.0969, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 20.004830917874397, |
|
"grad_norm": 374.1847839355469, |
|
"learning_rate": 9.554482018250135e-06, |
|
"loss": 0.0703, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 20.005797101449275, |
|
"grad_norm": 0.1522367000579834, |
|
"learning_rate": 9.543746645195922e-06, |
|
"loss": 0.2595, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 20.006763285024153, |
|
"grad_norm": 0.15178386867046356, |
|
"learning_rate": 9.533011272141708e-06, |
|
"loss": 0.5341, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 20.006763285024153, |
|
"eval_accuracy": 0.7948717948717948, |
|
"eval_loss": 0.9861828684806824, |
|
"eval_runtime": 20.329, |
|
"eval_samples_per_second": 3.837, |
|
"eval_steps_per_second": 0.984, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 21.000966183574878, |
|
"grad_norm": 244.99745178222656, |
|
"learning_rate": 9.522275899087494e-06, |
|
"loss": 0.2293, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 21.00193236714976, |
|
"grad_norm": 31.514474868774414, |
|
"learning_rate": 9.511540526033281e-06, |
|
"loss": 0.0871, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 21.002898550724638, |
|
"grad_norm": 0.014621300622820854, |
|
"learning_rate": 9.500805152979067e-06, |
|
"loss": 0.2701, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 21.003864734299516, |
|
"grad_norm": 0.14359234273433685, |
|
"learning_rate": 9.490069779924854e-06, |
|
"loss": 0.2155, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 21.004830917874397, |
|
"grad_norm": 17.387298583984375, |
|
"learning_rate": 9.479334406870639e-06, |
|
"loss": 0.6972, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 21.005797101449275, |
|
"grad_norm": 263.881591796875, |
|
"learning_rate": 9.468599033816425e-06, |
|
"loss": 0.495, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 21.006763285024153, |
|
"grad_norm": 0.6225783228874207, |
|
"learning_rate": 9.457863660762211e-06, |
|
"loss": 0.3391, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 21.006763285024153, |
|
"eval_accuracy": 0.7692307692307693, |
|
"eval_loss": 1.1159329414367676, |
|
"eval_runtime": 20.3876, |
|
"eval_samples_per_second": 3.826, |
|
"eval_steps_per_second": 0.981, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 22.000966183574878, |
|
"grad_norm": 2.346330404281616, |
|
"learning_rate": 9.447128287707998e-06, |
|
"loss": 0.0129, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 22.00193236714976, |
|
"grad_norm": 0.19982995092868805, |
|
"learning_rate": 9.436392914653784e-06, |
|
"loss": 0.2005, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 22.002898550724638, |
|
"grad_norm": 0.0399252213537693, |
|
"learning_rate": 9.42565754159957e-06, |
|
"loss": 0.0019, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 22.003864734299516, |
|
"grad_norm": 0.027430692687630653, |
|
"learning_rate": 9.414922168545357e-06, |
|
"loss": 0.0012, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 22.004830917874397, |
|
"grad_norm": 0.05206064507365227, |
|
"learning_rate": 9.404186795491144e-06, |
|
"loss": 0.3566, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 22.005797101449275, |
|
"grad_norm": 0.07161853462457657, |
|
"learning_rate": 9.39345142243693e-06, |
|
"loss": 0.3149, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 22.006763285024153, |
|
"grad_norm": 0.1686069220304489, |
|
"learning_rate": 9.382716049382717e-06, |
|
"loss": 0.2071, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 22.006763285024153, |
|
"eval_accuracy": 0.7692307692307693, |
|
"eval_loss": 1.5833228826522827, |
|
"eval_runtime": 20.3409, |
|
"eval_samples_per_second": 3.835, |
|
"eval_steps_per_second": 0.983, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 23.000966183574878, |
|
"grad_norm": 0.11971097439527512, |
|
"learning_rate": 9.371980676328503e-06, |
|
"loss": 0.1224, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 23.00193236714976, |
|
"grad_norm": 0.04427675902843475, |
|
"learning_rate": 9.36124530327429e-06, |
|
"loss": 0.2063, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 23.002898550724638, |
|
"grad_norm": 0.017134735360741615, |
|
"learning_rate": 9.350509930220076e-06, |
|
"loss": 0.0009, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 23.003864734299516, |
|
"grad_norm": 1.9943066835403442, |
|
"learning_rate": 9.339774557165862e-06, |
|
"loss": 0.2292, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 23.004830917874397, |
|
"grad_norm": 53.962642669677734, |
|
"learning_rate": 9.329039184111649e-06, |
|
"loss": 0.3195, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 23.005797101449275, |
|
"grad_norm": 0.11377642303705215, |
|
"learning_rate": 9.318303811057435e-06, |
|
"loss": 0.1909, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 23.006763285024153, |
|
"grad_norm": 0.021974513307213783, |
|
"learning_rate": 9.307568438003222e-06, |
|
"loss": 0.1159, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 23.006763285024153, |
|
"eval_accuracy": 0.8205128205128205, |
|
"eval_loss": 1.0204980373382568, |
|
"eval_runtime": 20.0597, |
|
"eval_samples_per_second": 3.888, |
|
"eval_steps_per_second": 0.997, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 24.000966183574878, |
|
"grad_norm": 0.022601498290896416, |
|
"learning_rate": 9.296833064949008e-06, |
|
"loss": 0.2401, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 24.00193236714976, |
|
"grad_norm": 0.023662317544221878, |
|
"learning_rate": 9.286097691894795e-06, |
|
"loss": 0.003, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 24.002898550724638, |
|
"grad_norm": 0.17592191696166992, |
|
"learning_rate": 9.275362318840581e-06, |
|
"loss": 0.1163, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 24.003864734299516, |
|
"grad_norm": 0.014995891600847244, |
|
"learning_rate": 9.264626945786368e-06, |
|
"loss": 0.0144, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 24.004830917874397, |
|
"grad_norm": 0.16420097649097443, |
|
"learning_rate": 9.253891572732154e-06, |
|
"loss": 0.1508, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 24.005797101449275, |
|
"grad_norm": 11.045714378356934, |
|
"learning_rate": 9.243156199677939e-06, |
|
"loss": 0.6033, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 24.006763285024153, |
|
"grad_norm": 0.010024874471127987, |
|
"learning_rate": 9.232420826623725e-06, |
|
"loss": 0.1579, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 24.006763285024153, |
|
"eval_accuracy": 0.7692307692307693, |
|
"eval_loss": 1.4633172750473022, |
|
"eval_runtime": 21.196, |
|
"eval_samples_per_second": 3.68, |
|
"eval_steps_per_second": 0.944, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 25.000966183574878, |
|
"grad_norm": 3.15612530708313, |
|
"learning_rate": 9.221685453569512e-06, |
|
"loss": 0.1799, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 25.00193236714976, |
|
"grad_norm": 0.048897918313741684, |
|
"learning_rate": 9.210950080515298e-06, |
|
"loss": 0.2754, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 25.002898550724638, |
|
"grad_norm": 0.013033032417297363, |
|
"learning_rate": 9.200214707461085e-06, |
|
"loss": 0.078, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 25.003864734299516, |
|
"grad_norm": 228.9864044189453, |
|
"learning_rate": 9.189479334406871e-06, |
|
"loss": 0.0223, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 25.004830917874397, |
|
"grad_norm": 0.38404083251953125, |
|
"learning_rate": 9.178743961352658e-06, |
|
"loss": 0.09, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 25.005797101449275, |
|
"grad_norm": 9.970251083374023, |
|
"learning_rate": 9.168008588298444e-06, |
|
"loss": 0.0889, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 25.006763285024153, |
|
"grad_norm": 0.005024947226047516, |
|
"learning_rate": 9.15727321524423e-06, |
|
"loss": 0.1042, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 25.006763285024153, |
|
"eval_accuracy": 0.7564102564102564, |
|
"eval_loss": 1.5863817930221558, |
|
"eval_runtime": 16.6058, |
|
"eval_samples_per_second": 4.697, |
|
"eval_steps_per_second": 1.204, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 26.000966183574878, |
|
"grad_norm": 34.71379089355469, |
|
"learning_rate": 9.146537842190017e-06, |
|
"loss": 0.4249, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 26.00193236714976, |
|
"grad_norm": 0.02387184463441372, |
|
"learning_rate": 9.135802469135803e-06, |
|
"loss": 0.1022, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 26.002898550724638, |
|
"grad_norm": 456.8200988769531, |
|
"learning_rate": 9.12506709608159e-06, |
|
"loss": 0.3789, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 26.003864734299516, |
|
"grad_norm": 0.012509716674685478, |
|
"learning_rate": 9.114331723027376e-06, |
|
"loss": 0.1754, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 26.004830917874397, |
|
"grad_norm": 0.04585114121437073, |
|
"learning_rate": 9.103596349973163e-06, |
|
"loss": 0.0927, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 26.005797101449275, |
|
"grad_norm": 0.029922403395175934, |
|
"learning_rate": 9.092860976918949e-06, |
|
"loss": 0.3417, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 26.006763285024153, |
|
"grad_norm": 0.011729502119123936, |
|
"learning_rate": 9.082125603864736e-06, |
|
"loss": 0.1466, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 26.006763285024153, |
|
"eval_accuracy": 0.7692307692307693, |
|
"eval_loss": 1.2990210056304932, |
|
"eval_runtime": 16.4386, |
|
"eval_samples_per_second": 4.745, |
|
"eval_steps_per_second": 1.217, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 27.000966183574878, |
|
"grad_norm": 0.03241152688860893, |
|
"learning_rate": 9.071390230810522e-06, |
|
"loss": 0.0027, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 27.00193236714976, |
|
"grad_norm": 19.074296951293945, |
|
"learning_rate": 9.060654857756308e-06, |
|
"loss": 0.1867, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 27.002898550724638, |
|
"grad_norm": 0.04132108390331268, |
|
"learning_rate": 9.049919484702095e-06, |
|
"loss": 0.0007, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 27.003864734299516, |
|
"grad_norm": 0.014680879190564156, |
|
"learning_rate": 9.039184111647881e-06, |
|
"loss": 0.0004, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 27.004830917874397, |
|
"grad_norm": 0.01582922227680683, |
|
"learning_rate": 9.028448738593668e-06, |
|
"loss": 0.001, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 27.005797101449275, |
|
"grad_norm": 0.02607502043247223, |
|
"learning_rate": 9.017713365539453e-06, |
|
"loss": 0.0036, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 27.006763285024153, |
|
"grad_norm": 0.021080270409584045, |
|
"learning_rate": 9.006977992485239e-06, |
|
"loss": 0.0006, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 27.006763285024153, |
|
"eval_accuracy": 0.7564102564102564, |
|
"eval_loss": 1.5596784353256226, |
|
"eval_runtime": 16.1439, |
|
"eval_samples_per_second": 4.832, |
|
"eval_steps_per_second": 1.239, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 28.000966183574878, |
|
"grad_norm": 0.00953815970569849, |
|
"learning_rate": 8.996242619431025e-06, |
|
"loss": 0.1386, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 28.00193236714976, |
|
"grad_norm": 148.93067932128906, |
|
"learning_rate": 8.985507246376812e-06, |
|
"loss": 0.2969, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 28.002898550724638, |
|
"grad_norm": 0.05550064891576767, |
|
"learning_rate": 8.974771873322598e-06, |
|
"loss": 0.0064, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 28.003864734299516, |
|
"grad_norm": 0.03587929904460907, |
|
"learning_rate": 8.964036500268385e-06, |
|
"loss": 0.0021, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 28.004830917874397, |
|
"grad_norm": 0.009905187413096428, |
|
"learning_rate": 8.953301127214171e-06, |
|
"loss": 0.0016, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 28.005797101449275, |
|
"grad_norm": 0.016557922586798668, |
|
"learning_rate": 8.942565754159958e-06, |
|
"loss": 0.332, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 28.006763285024153, |
|
"grad_norm": 0.0018835013033822179, |
|
"learning_rate": 8.931830381105744e-06, |
|
"loss": 0.001, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 28.006763285024153, |
|
"eval_accuracy": 0.7564102564102564, |
|
"eval_loss": 1.6433550119400024, |
|
"eval_runtime": 16.9604, |
|
"eval_samples_per_second": 4.599, |
|
"eval_steps_per_second": 1.179, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 29.000966183574878, |
|
"grad_norm": 0.011498975567519665, |
|
"learning_rate": 8.92109500805153e-06, |
|
"loss": 0.0006, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 29.00193236714976, |
|
"grad_norm": 0.022594112902879715, |
|
"learning_rate": 8.910359634997317e-06, |
|
"loss": 0.0019, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 29.002898550724638, |
|
"grad_norm": 108.30265045166016, |
|
"learning_rate": 8.899624261943104e-06, |
|
"loss": 0.2073, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 29.003864734299516, |
|
"grad_norm": 0.018151551485061646, |
|
"learning_rate": 8.888888888888888e-06, |
|
"loss": 0.0019, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 29.004830917874397, |
|
"grad_norm": 171.54039001464844, |
|
"learning_rate": 8.878153515834675e-06, |
|
"loss": 0.5704, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 29.005797101449275, |
|
"grad_norm": 0.12190333008766174, |
|
"learning_rate": 8.867418142780463e-06, |
|
"loss": 0.2071, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 29.006763285024153, |
|
"grad_norm": 0.1234963983297348, |
|
"learning_rate": 8.85668276972625e-06, |
|
"loss": 0.0096, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 29.006763285024153, |
|
"eval_accuracy": 0.782051282051282, |
|
"eval_loss": 1.4161337614059448, |
|
"eval_runtime": 75.4835, |
|
"eval_samples_per_second": 1.033, |
|
"eval_steps_per_second": 0.265, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 30.000966183574878, |
|
"grad_norm": 0.02181861735880375, |
|
"learning_rate": 8.845947396672036e-06, |
|
"loss": 0.072, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 30.00193236714976, |
|
"grad_norm": 0.012783943675458431, |
|
"learning_rate": 8.835212023617822e-06, |
|
"loss": 0.0005, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 30.002898550724638, |
|
"grad_norm": 0.009668432176113129, |
|
"learning_rate": 8.824476650563609e-06, |
|
"loss": 0.1864, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 30.003864734299516, |
|
"grad_norm": 171.04098510742188, |
|
"learning_rate": 8.813741277509395e-06, |
|
"loss": 0.0289, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 30.004830917874397, |
|
"grad_norm": 2.430178642272949, |
|
"learning_rate": 8.803005904455182e-06, |
|
"loss": 0.2184, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 30.005797101449275, |
|
"grad_norm": 0.37614426016807556, |
|
"learning_rate": 8.792270531400966e-06, |
|
"loss": 0.1647, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 30.006763285024153, |
|
"grad_norm": 0.03991688787937164, |
|
"learning_rate": 8.781535158346753e-06, |
|
"loss": 0.0015, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 30.006763285024153, |
|
"eval_accuracy": 0.7948717948717948, |
|
"eval_loss": 1.3385039567947388, |
|
"eval_runtime": 22.7136, |
|
"eval_samples_per_second": 3.434, |
|
"eval_steps_per_second": 0.881, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 31.000966183574878, |
|
"grad_norm": 0.011698647402226925, |
|
"learning_rate": 8.77079978529254e-06, |
|
"loss": 0.1503, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 31.00193236714976, |
|
"grad_norm": 0.005670130252838135, |
|
"learning_rate": 8.760064412238326e-06, |
|
"loss": 0.0006, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 31.002898550724638, |
|
"grad_norm": 0.05591421201825142, |
|
"learning_rate": 8.749329039184112e-06, |
|
"loss": 0.0745, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 31.003864734299516, |
|
"grad_norm": 3.357697010040283, |
|
"learning_rate": 8.738593666129899e-06, |
|
"loss": 0.0008, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 31.004830917874397, |
|
"grad_norm": 51.18307876586914, |
|
"learning_rate": 8.727858293075685e-06, |
|
"loss": 0.167, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 31.005797101449275, |
|
"grad_norm": 73.81510925292969, |
|
"learning_rate": 8.717122920021472e-06, |
|
"loss": 0.099, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 31.006763285024153, |
|
"grad_norm": 0.019633440300822258, |
|
"learning_rate": 8.706387546967258e-06, |
|
"loss": 0.3627, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 31.006763285024153, |
|
"eval_accuracy": 0.7435897435897436, |
|
"eval_loss": 1.7863702774047852, |
|
"eval_runtime": 22.6796, |
|
"eval_samples_per_second": 3.439, |
|
"eval_steps_per_second": 0.882, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 32.00096618357488, |
|
"grad_norm": 0.0671672448515892, |
|
"learning_rate": 8.695652173913044e-06, |
|
"loss": 0.0005, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 32.001932367149756, |
|
"grad_norm": 0.35884493589401245, |
|
"learning_rate": 8.684916800858831e-06, |
|
"loss": 0.0008, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 32.00289855072464, |
|
"grad_norm": 0.008876653388142586, |
|
"learning_rate": 8.674181427804617e-06, |
|
"loss": 0.0002, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 32.00386473429952, |
|
"grad_norm": 0.0027054892852902412, |
|
"learning_rate": 8.663446054750402e-06, |
|
"loss": 0.0003, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 32.00483091787439, |
|
"grad_norm": 0.00598740391433239, |
|
"learning_rate": 8.652710681696189e-06, |
|
"loss": 0.0006, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 32.005797101449275, |
|
"grad_norm": 0.008918968960642815, |
|
"learning_rate": 8.641975308641975e-06, |
|
"loss": 0.0182, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 32.00676328502416, |
|
"grad_norm": 0.005757227074354887, |
|
"learning_rate": 8.631239935587761e-06, |
|
"loss": 0.1541, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 32.00676328502416, |
|
"eval_accuracy": 0.782051282051282, |
|
"eval_loss": 1.5617612600326538, |
|
"eval_runtime": 24.1692, |
|
"eval_samples_per_second": 3.227, |
|
"eval_steps_per_second": 0.827, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 33.00096618357488, |
|
"grad_norm": 0.006783771328628063, |
|
"learning_rate": 8.62050456253355e-06, |
|
"loss": 0.0002, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 33.001932367149756, |
|
"grad_norm": 0.037278104573488235, |
|
"learning_rate": 8.609769189479336e-06, |
|
"loss": 0.0176, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 33.00289855072464, |
|
"grad_norm": 427.9874267578125, |
|
"learning_rate": 8.599033816425122e-06, |
|
"loss": 0.0923, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 33.00386473429952, |
|
"grad_norm": 0.005532737821340561, |
|
"learning_rate": 8.588298443370909e-06, |
|
"loss": 0.0549, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 33.00483091787439, |
|
"grad_norm": 0.0045069679617881775, |
|
"learning_rate": 8.577563070316695e-06, |
|
"loss": 0.0134, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 33.005797101449275, |
|
"grad_norm": 231.7147979736328, |
|
"learning_rate": 8.56682769726248e-06, |
|
"loss": 0.1822, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 33.00676328502416, |
|
"grad_norm": 0.005993337836116552, |
|
"learning_rate": 8.556092324208267e-06, |
|
"loss": 0.1285, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 33.00676328502416, |
|
"eval_accuracy": 0.7948717948717948, |
|
"eval_loss": 1.30621337890625, |
|
"eval_runtime": 20.254, |
|
"eval_samples_per_second": 3.851, |
|
"eval_steps_per_second": 0.987, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 34.00096618357488, |
|
"grad_norm": 0.010524412617087364, |
|
"learning_rate": 8.545356951154053e-06, |
|
"loss": 0.0002, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 34.001932367149756, |
|
"grad_norm": 0.005317069590091705, |
|
"learning_rate": 8.53462157809984e-06, |
|
"loss": 0.1733, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 34.00289855072464, |
|
"grad_norm": 0.007308751344680786, |
|
"learning_rate": 8.523886205045626e-06, |
|
"loss": 0.1447, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 34.00386473429952, |
|
"grad_norm": 0.6433327794075012, |
|
"learning_rate": 8.513150831991412e-06, |
|
"loss": 0.0151, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 34.00483091787439, |
|
"grad_norm": 0.013438474386930466, |
|
"learning_rate": 8.502415458937199e-06, |
|
"loss": 0.0002, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 34.005797101449275, |
|
"grad_norm": 0.006189200561493635, |
|
"learning_rate": 8.491680085882985e-06, |
|
"loss": 0.0764, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 34.00676328502416, |
|
"grad_norm": 0.011971415020525455, |
|
"learning_rate": 8.480944712828772e-06, |
|
"loss": 0.2193, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 34.00676328502416, |
|
"eval_accuracy": 0.782051282051282, |
|
"eval_loss": 1.555395483970642, |
|
"eval_runtime": 20.8944, |
|
"eval_samples_per_second": 3.733, |
|
"eval_steps_per_second": 0.957, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 35.00096618357488, |
|
"grad_norm": 0.007673191372305155, |
|
"learning_rate": 8.470209339774558e-06, |
|
"loss": 0.0002, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 35.001932367149756, |
|
"grad_norm": 1.346144437789917, |
|
"learning_rate": 8.459473966720345e-06, |
|
"loss": 0.1336, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 35.00289855072464, |
|
"grad_norm": 0.05735534802079201, |
|
"learning_rate": 8.448738593666131e-06, |
|
"loss": 0.0006, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 35.00386473429952, |
|
"grad_norm": 0.0024482065346091986, |
|
"learning_rate": 8.438003220611916e-06, |
|
"loss": 0.0002, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 35.00483091787439, |
|
"grad_norm": 0.24069902300834656, |
|
"learning_rate": 8.427267847557702e-06, |
|
"loss": 0.0247, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 35.005797101449275, |
|
"grad_norm": 0.0022392012178897858, |
|
"learning_rate": 8.416532474503489e-06, |
|
"loss": 0.0005, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 35.00676328502416, |
|
"grad_norm": 0.0014107138849794865, |
|
"learning_rate": 8.405797101449275e-06, |
|
"loss": 0.0002, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 35.00676328502416, |
|
"eval_accuracy": 0.7948717948717948, |
|
"eval_loss": 1.5444982051849365, |
|
"eval_runtime": 20.3236, |
|
"eval_samples_per_second": 3.838, |
|
"eval_steps_per_second": 0.984, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 36.00096618357488, |
|
"grad_norm": 0.001967865275219083, |
|
"learning_rate": 8.395061728395062e-06, |
|
"loss": 0.0033, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 36.001932367149756, |
|
"grad_norm": 403.927490234375, |
|
"learning_rate": 8.384326355340848e-06, |
|
"loss": 0.0848, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 36.00289855072464, |
|
"grad_norm": 0.012290756218135357, |
|
"learning_rate": 8.373590982286636e-06, |
|
"loss": 0.002, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 36.00386473429952, |
|
"grad_norm": 0.007809770293533802, |
|
"learning_rate": 8.362855609232423e-06, |
|
"loss": 0.0001, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 36.00483091787439, |
|
"grad_norm": 167.53094482421875, |
|
"learning_rate": 8.352120236178209e-06, |
|
"loss": 0.1313, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 36.005797101449275, |
|
"grad_norm": 0.005764774978160858, |
|
"learning_rate": 8.341384863123994e-06, |
|
"loss": 0.0001, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 36.00676328502416, |
|
"grad_norm": 0.006284950766712427, |
|
"learning_rate": 8.33064949006978e-06, |
|
"loss": 0.0003, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 36.00676328502416, |
|
"eval_accuracy": 0.782051282051282, |
|
"eval_loss": 1.718872308731079, |
|
"eval_runtime": 20.0134, |
|
"eval_samples_per_second": 3.897, |
|
"eval_steps_per_second": 0.999, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 37.00096618357488, |
|
"grad_norm": 0.002441326156258583, |
|
"learning_rate": 8.319914117015567e-06, |
|
"loss": 0.0001, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 37.001932367149756, |
|
"grad_norm": 0.005556989461183548, |
|
"learning_rate": 8.309178743961353e-06, |
|
"loss": 0.0004, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 37.00289855072464, |
|
"grad_norm": 0.0032976714428514242, |
|
"learning_rate": 8.29844337090714e-06, |
|
"loss": 0.0001, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 37.00386473429952, |
|
"grad_norm": 0.006368944887071848, |
|
"learning_rate": 8.287707997852926e-06, |
|
"loss": 0.0001, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 37.00483091787439, |
|
"grad_norm": 0.005885376129299402, |
|
"learning_rate": 8.276972624798713e-06, |
|
"loss": 0.0001, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 37.005797101449275, |
|
"grad_norm": 0.01179414801299572, |
|
"learning_rate": 8.266237251744499e-06, |
|
"loss": 0.2815, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 37.00676328502416, |
|
"grad_norm": 0.0038128988817334175, |
|
"learning_rate": 8.255501878690286e-06, |
|
"loss": 0.092, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 37.00676328502416, |
|
"eval_accuracy": 0.7692307692307693, |
|
"eval_loss": 1.4979729652404785, |
|
"eval_runtime": 14.9225, |
|
"eval_samples_per_second": 5.227, |
|
"eval_steps_per_second": 1.34, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 38.00096618357488, |
|
"grad_norm": 1.8284555673599243, |
|
"learning_rate": 8.244766505636072e-06, |
|
"loss": 0.0003, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 38.001932367149756, |
|
"grad_norm": 0.010567315854132175, |
|
"learning_rate": 8.234031132581858e-06, |
|
"loss": 0.0006, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 38.00289855072464, |
|
"grad_norm": 0.0020523052662611008, |
|
"learning_rate": 8.223295759527645e-06, |
|
"loss": 0.0001, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 38.00386473429952, |
|
"grad_norm": 0.0025786529295146465, |
|
"learning_rate": 8.212560386473431e-06, |
|
"loss": 0.188, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 38.00483091787439, |
|
"grad_norm": 0.005663599353283644, |
|
"learning_rate": 8.201825013419216e-06, |
|
"loss": 0.2146, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 38.005797101449275, |
|
"grad_norm": 0.028957316651940346, |
|
"learning_rate": 8.191089640365003e-06, |
|
"loss": 0.0002, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 38.00676328502416, |
|
"grad_norm": 0.007074129767715931, |
|
"learning_rate": 8.180354267310789e-06, |
|
"loss": 0.2403, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 38.00676328502416, |
|
"eval_accuracy": 0.7435897435897436, |
|
"eval_loss": 2.099257230758667, |
|
"eval_runtime": 15.1057, |
|
"eval_samples_per_second": 5.164, |
|
"eval_steps_per_second": 1.324, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 39.00096618357488, |
|
"grad_norm": 0.0016518068732693791, |
|
"learning_rate": 8.169618894256575e-06, |
|
"loss": 0.0003, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 39.001932367149756, |
|
"grad_norm": 0.008634793572127819, |
|
"learning_rate": 8.158883521202362e-06, |
|
"loss": 0.0034, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 39.00289855072464, |
|
"grad_norm": 0.004315139725804329, |
|
"learning_rate": 8.148148148148148e-06, |
|
"loss": 0.1378, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 39.00386473429952, |
|
"grad_norm": 0.015739772468805313, |
|
"learning_rate": 8.137412775093935e-06, |
|
"loss": 0.1428, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 39.00483091787439, |
|
"grad_norm": 0.21879592537879944, |
|
"learning_rate": 8.126677402039721e-06, |
|
"loss": 0.0106, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 39.005797101449275, |
|
"grad_norm": 1.373992919921875, |
|
"learning_rate": 8.115942028985508e-06, |
|
"loss": 0.0003, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 39.00676328502416, |
|
"grad_norm": 0.008464109152555466, |
|
"learning_rate": 8.105206655931294e-06, |
|
"loss": 0.0001, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 39.00676328502416, |
|
"eval_accuracy": 0.7435897435897436, |
|
"eval_loss": 1.8645843267440796, |
|
"eval_runtime": 15.7999, |
|
"eval_samples_per_second": 4.937, |
|
"eval_steps_per_second": 1.266, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 40.00096618357488, |
|
"grad_norm": 0.01016903854906559, |
|
"learning_rate": 8.09447128287708e-06, |
|
"loss": 0.0001, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 40.001932367149756, |
|
"grad_norm": 0.006220806855708361, |
|
"learning_rate": 8.083735909822867e-06, |
|
"loss": 0.0011, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 40.00289855072464, |
|
"grad_norm": 0.36463844776153564, |
|
"learning_rate": 8.073000536768653e-06, |
|
"loss": 0.002, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 40.00386473429952, |
|
"grad_norm": 0.0067412094213068485, |
|
"learning_rate": 8.06226516371444e-06, |
|
"loss": 0.0001, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 40.00483091787439, |
|
"grad_norm": 0.002454320201650262, |
|
"learning_rate": 8.051529790660226e-06, |
|
"loss": 0.0001, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 40.005797101449275, |
|
"grad_norm": 0.019683407619595528, |
|
"learning_rate": 8.040794417606013e-06, |
|
"loss": 0.0005, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 40.00676328502416, |
|
"grad_norm": 0.012780338525772095, |
|
"learning_rate": 8.0300590445518e-06, |
|
"loss": 0.0001, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 40.00676328502416, |
|
"eval_accuracy": 0.7435897435897436, |
|
"eval_loss": 1.6642485857009888, |
|
"eval_runtime": 17.6893, |
|
"eval_samples_per_second": 4.409, |
|
"eval_steps_per_second": 1.131, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 41.00096618357488, |
|
"grad_norm": 0.0050564175471663475, |
|
"learning_rate": 8.019323671497586e-06, |
|
"loss": 0.3199, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 41.001932367149756, |
|
"grad_norm": 0.09579924494028091, |
|
"learning_rate": 8.008588298443372e-06, |
|
"loss": 0.0002, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 41.00289855072464, |
|
"grad_norm": 0.008467406034469604, |
|
"learning_rate": 7.997852925389159e-06, |
|
"loss": 0.0002, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 41.00386473429952, |
|
"grad_norm": 0.004457333590835333, |
|
"learning_rate": 7.987117552334945e-06, |
|
"loss": 0.0006, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 41.00483091787439, |
|
"grad_norm": 0.0018141858745366335, |
|
"learning_rate": 7.97638217928073e-06, |
|
"loss": 0.0001, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 41.005797101449275, |
|
"grad_norm": 0.021920841187238693, |
|
"learning_rate": 7.965646806226516e-06, |
|
"loss": 0.0147, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 41.00676328502416, |
|
"grad_norm": 0.006291843950748444, |
|
"learning_rate": 7.954911433172303e-06, |
|
"loss": 0.2108, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 41.00676328502416, |
|
"eval_accuracy": 0.7948717948717948, |
|
"eval_loss": 1.4565685987472534, |
|
"eval_runtime": 23.0447, |
|
"eval_samples_per_second": 3.385, |
|
"eval_steps_per_second": 0.868, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 42.00096618357488, |
|
"grad_norm": 0.0030977341812103987, |
|
"learning_rate": 7.94417606011809e-06, |
|
"loss": 0.1498, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 42.001932367149756, |
|
"grad_norm": 0.002562603447586298, |
|
"learning_rate": 7.933440687063876e-06, |
|
"loss": 0.0006, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 42.00289855072464, |
|
"grad_norm": 48.39828872680664, |
|
"learning_rate": 7.922705314009662e-06, |
|
"loss": 0.2303, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 42.00386473429952, |
|
"grad_norm": 0.058131780475378036, |
|
"learning_rate": 7.911969940955449e-06, |
|
"loss": 0.0002, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 42.00483091787439, |
|
"grad_norm": 0.656736433506012, |
|
"learning_rate": 7.901234567901235e-06, |
|
"loss": 0.083, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 42.005797101449275, |
|
"grad_norm": 0.0014849025756120682, |
|
"learning_rate": 7.890499194847021e-06, |
|
"loss": 0.0963, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 42.00676328502416, |
|
"grad_norm": 0.0057001574896276, |
|
"learning_rate": 7.879763821792808e-06, |
|
"loss": 0.1085, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 42.00676328502416, |
|
"eval_accuracy": 0.782051282051282, |
|
"eval_loss": 1.291454792022705, |
|
"eval_runtime": 20.8555, |
|
"eval_samples_per_second": 3.74, |
|
"eval_steps_per_second": 0.959, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 43.00096618357488, |
|
"grad_norm": 0.0038009320851415396, |
|
"learning_rate": 7.869028448738594e-06, |
|
"loss": 0.0035, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 43.001932367149756, |
|
"grad_norm": 0.049530353397130966, |
|
"learning_rate": 7.85829307568438e-06, |
|
"loss": 0.0001, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 43.00289855072464, |
|
"grad_norm": 0.007268795743584633, |
|
"learning_rate": 7.847557702630167e-06, |
|
"loss": 0.0615, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 43.00386473429952, |
|
"grad_norm": 0.022801999002695084, |
|
"learning_rate": 7.836822329575954e-06, |
|
"loss": 0.1555, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 43.00483091787439, |
|
"grad_norm": 0.004046064801514149, |
|
"learning_rate": 7.82608695652174e-06, |
|
"loss": 0.0001, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 43.005797101449275, |
|
"grad_norm": 0.006020054221153259, |
|
"learning_rate": 7.815351583467527e-06, |
|
"loss": 0.0006, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 43.00676328502416, |
|
"grad_norm": 0.0009516979334875941, |
|
"learning_rate": 7.804616210413313e-06, |
|
"loss": 0.0019, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 43.00676328502416, |
|
"eval_accuracy": 0.8333333333333334, |
|
"eval_loss": 1.2690619230270386, |
|
"eval_runtime": 21.1783, |
|
"eval_samples_per_second": 3.683, |
|
"eval_steps_per_second": 0.944, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 44.00096618357488, |
|
"grad_norm": 0.004416549112647772, |
|
"learning_rate": 7.7938808373591e-06, |
|
"loss": 0.0008, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 44.001932367149756, |
|
"grad_norm": 0.002933249343186617, |
|
"learning_rate": 7.783145464304886e-06, |
|
"loss": 0.0001, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 44.00289855072464, |
|
"grad_norm": 0.005305302329361439, |
|
"learning_rate": 7.772410091250672e-06, |
|
"loss": 0.0001, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 44.00386473429952, |
|
"grad_norm": 0.15504007041454315, |
|
"learning_rate": 7.761674718196459e-06, |
|
"loss": 0.0014, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 44.00483091787439, |
|
"grad_norm": 0.00801057554781437, |
|
"learning_rate": 7.750939345142244e-06, |
|
"loss": 0.1411, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 44.005797101449275, |
|
"grad_norm": 0.013626204803586006, |
|
"learning_rate": 7.74020397208803e-06, |
|
"loss": 0.0293, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 44.00676328502416, |
|
"grad_norm": 0.0010916618630290031, |
|
"learning_rate": 7.729468599033817e-06, |
|
"loss": 0.0072, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 44.00676328502416, |
|
"eval_accuracy": 0.717948717948718, |
|
"eval_loss": 2.270263195037842, |
|
"eval_runtime": 22.5558, |
|
"eval_samples_per_second": 3.458, |
|
"eval_steps_per_second": 0.887, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 45.00096618357488, |
|
"grad_norm": 357.7193908691406, |
|
"learning_rate": 7.718733225979603e-06, |
|
"loss": 0.4039, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 45.001932367149756, |
|
"grad_norm": 94.0329360961914, |
|
"learning_rate": 7.70799785292539e-06, |
|
"loss": 0.2366, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 45.00289855072464, |
|
"grad_norm": 0.08033226430416107, |
|
"learning_rate": 7.697262479871176e-06, |
|
"loss": 0.0739, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 45.00386473429952, |
|
"grad_norm": 0.017788292840123177, |
|
"learning_rate": 7.686527106816962e-06, |
|
"loss": 0.1424, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 45.00483091787439, |
|
"grad_norm": 0.009477581828832626, |
|
"learning_rate": 7.675791733762749e-06, |
|
"loss": 0.0002, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 45.005797101449275, |
|
"grad_norm": 0.007442084141075611, |
|
"learning_rate": 7.665056360708535e-06, |
|
"loss": 0.0002, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 45.00676328502416, |
|
"grad_norm": 0.031041273847222328, |
|
"learning_rate": 7.654320987654322e-06, |
|
"loss": 0.0002, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 45.00676328502416, |
|
"eval_accuracy": 0.7564102564102564, |
|
"eval_loss": 1.515126347541809, |
|
"eval_runtime": 23.2938, |
|
"eval_samples_per_second": 3.349, |
|
"eval_steps_per_second": 0.859, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 46.00096618357488, |
|
"grad_norm": 0.0019831734243780375, |
|
"learning_rate": 7.643585614600108e-06, |
|
"loss": 0.0004, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 46.001932367149756, |
|
"grad_norm": 0.006759632378816605, |
|
"learning_rate": 7.632850241545895e-06, |
|
"loss": 0.0002, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 46.00289855072464, |
|
"grad_norm": 84.79361724853516, |
|
"learning_rate": 7.622114868491681e-06, |
|
"loss": 0.3816, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 46.00386473429952, |
|
"grad_norm": 0.003224059008061886, |
|
"learning_rate": 7.6113794954374675e-06, |
|
"loss": 0.3455, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 46.00483091787439, |
|
"grad_norm": 0.007698831148445606, |
|
"learning_rate": 7.600644122383254e-06, |
|
"loss": 0.6513, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 46.005797101449275, |
|
"grad_norm": 0.10313475131988525, |
|
"learning_rate": 7.58990874932904e-06, |
|
"loss": 0.2645, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 46.00676328502416, |
|
"grad_norm": 0.015344126150012016, |
|
"learning_rate": 7.579173376274827e-06, |
|
"loss": 0.4057, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 46.00676328502416, |
|
"eval_accuracy": 0.7564102564102564, |
|
"eval_loss": 1.417032241821289, |
|
"eval_runtime": 20.3874, |
|
"eval_samples_per_second": 3.826, |
|
"eval_steps_per_second": 0.981, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 47.00096618357488, |
|
"grad_norm": 0.01232170406728983, |
|
"learning_rate": 7.568438003220613e-06, |
|
"loss": 0.0008, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 47.001932367149756, |
|
"grad_norm": 0.0009604791994206607, |
|
"learning_rate": 7.557702630166399e-06, |
|
"loss": 0.1195, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 47.00289855072464, |
|
"grad_norm": 0.06583663076162338, |
|
"learning_rate": 7.546967257112185e-06, |
|
"loss": 0.4292, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 47.00386473429952, |
|
"grad_norm": 1.0096828937530518, |
|
"learning_rate": 7.536231884057972e-06, |
|
"loss": 0.0005, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 47.00483091787439, |
|
"grad_norm": 18.510780334472656, |
|
"learning_rate": 7.525496511003758e-06, |
|
"loss": 0.0017, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 47.005797101449275, |
|
"grad_norm": 0.027283035218715668, |
|
"learning_rate": 7.514761137949545e-06, |
|
"loss": 0.0019, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 47.00676328502416, |
|
"grad_norm": 0.24246840178966522, |
|
"learning_rate": 7.504025764895331e-06, |
|
"loss": 0.0862, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 47.00676328502416, |
|
"eval_accuracy": 0.7307692307692307, |
|
"eval_loss": 2.1071906089782715, |
|
"eval_runtime": 17.1389, |
|
"eval_samples_per_second": 4.551, |
|
"eval_steps_per_second": 1.167, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 48.00096618357488, |
|
"grad_norm": 0.007916704751551151, |
|
"learning_rate": 7.493290391841117e-06, |
|
"loss": 0.0023, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 48.001932367149756, |
|
"grad_norm": 58.70967483520508, |
|
"learning_rate": 7.482555018786903e-06, |
|
"loss": 0.0026, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 48.00289855072464, |
|
"grad_norm": 0.005796114448457956, |
|
"learning_rate": 7.47181964573269e-06, |
|
"loss": 0.0003, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 48.00386473429952, |
|
"grad_norm": 0.0461343489587307, |
|
"learning_rate": 7.461084272678476e-06, |
|
"loss": 0.0002, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 48.00483091787439, |
|
"grad_norm": 0.0035491653252393007, |
|
"learning_rate": 7.4503488996242625e-06, |
|
"loss": 0.0092, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 48.005797101449275, |
|
"grad_norm": 274.37054443359375, |
|
"learning_rate": 7.439613526570049e-06, |
|
"loss": 0.0569, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 48.00676328502416, |
|
"grad_norm": 0.0009124857024289668, |
|
"learning_rate": 7.428878153515835e-06, |
|
"loss": 0.0478, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 48.00676328502416, |
|
"eval_accuracy": 0.7948717948717948, |
|
"eval_loss": 1.6825991868972778, |
|
"eval_runtime": 21.1396, |
|
"eval_samples_per_second": 3.69, |
|
"eval_steps_per_second": 0.946, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 49.00096618357488, |
|
"grad_norm": 0.020062750205397606, |
|
"learning_rate": 7.418142780461621e-06, |
|
"loss": 0.775, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 49.001932367149756, |
|
"grad_norm": 0.017441576346755028, |
|
"learning_rate": 7.4074074074074075e-06, |
|
"loss": 0.1064, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 49.00289855072464, |
|
"grad_norm": 0.022105790674686432, |
|
"learning_rate": 7.396672034353194e-06, |
|
"loss": 0.0002, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 49.00386473429952, |
|
"grad_norm": 0.006332984659820795, |
|
"learning_rate": 7.38593666129898e-06, |
|
"loss": 0.003, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 49.00483091787439, |
|
"grad_norm": 0.023586558178067207, |
|
"learning_rate": 7.375201288244767e-06, |
|
"loss": 0.0002, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 49.005797101449275, |
|
"grad_norm": 0.002561570843681693, |
|
"learning_rate": 7.364465915190554e-06, |
|
"loss": 0.287, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 49.00676328502416, |
|
"grad_norm": 0.02558230608701706, |
|
"learning_rate": 7.353730542136341e-06, |
|
"loss": 0.001, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 49.00676328502416, |
|
"eval_accuracy": 0.7564102564102564, |
|
"eval_loss": 1.4348629713058472, |
|
"eval_runtime": 17.0482, |
|
"eval_samples_per_second": 4.575, |
|
"eval_steps_per_second": 1.173, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 50.00096618357488, |
|
"grad_norm": 0.006134858354926109, |
|
"learning_rate": 7.342995169082127e-06, |
|
"loss": 0.0001, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 50.001932367149756, |
|
"grad_norm": 0.0029954123310744762, |
|
"learning_rate": 7.332259796027913e-06, |
|
"loss": 0.0001, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 50.00289855072464, |
|
"grad_norm": 0.0036094398237764835, |
|
"learning_rate": 7.321524422973699e-06, |
|
"loss": 0.157, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 50.00386473429952, |
|
"grad_norm": 0.0021507777273654938, |
|
"learning_rate": 7.3107890499194855e-06, |
|
"loss": 0.0001, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 50.00483091787439, |
|
"grad_norm": 0.003133822698146105, |
|
"learning_rate": 7.300053676865272e-06, |
|
"loss": 0.0001, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 50.005797101449275, |
|
"grad_norm": 0.0019445763900876045, |
|
"learning_rate": 7.2893183038110584e-06, |
|
"loss": 0.0001, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 50.00676328502416, |
|
"grad_norm": 0.013452505692839622, |
|
"learning_rate": 7.278582930756845e-06, |
|
"loss": 0.0001, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 50.00676328502416, |
|
"eval_accuracy": 0.7948717948717948, |
|
"eval_loss": 1.442294955253601, |
|
"eval_runtime": 17.0525, |
|
"eval_samples_per_second": 4.574, |
|
"eval_steps_per_second": 1.173, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 51.00096618357488, |
|
"grad_norm": 0.004946423228830099, |
|
"learning_rate": 7.2678475577026305e-06, |
|
"loss": 0.0001, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 51.001932367149756, |
|
"grad_norm": 0.003967686556279659, |
|
"learning_rate": 7.257112184648417e-06, |
|
"loss": 0.1159, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 51.00289855072464, |
|
"grad_norm": 0.0038182190619409084, |
|
"learning_rate": 7.246376811594203e-06, |
|
"loss": 0.0001, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 51.00386473429952, |
|
"grad_norm": 0.026833100244402885, |
|
"learning_rate": 7.23564143853999e-06, |
|
"loss": 0.0002, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 51.00483091787439, |
|
"grad_norm": 0.06868147104978561, |
|
"learning_rate": 7.224906065485776e-06, |
|
"loss": 0.0056, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 51.005797101449275, |
|
"grad_norm": 0.0036590364761650562, |
|
"learning_rate": 7.214170692431563e-06, |
|
"loss": 0.0001, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 51.00676328502416, |
|
"grad_norm": 0.007523122243583202, |
|
"learning_rate": 7.203435319377348e-06, |
|
"loss": 0.0056, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 51.00676328502416, |
|
"eval_accuracy": 0.7051282051282052, |
|
"eval_loss": 2.2833075523376465, |
|
"eval_runtime": 17.3876, |
|
"eval_samples_per_second": 4.486, |
|
"eval_steps_per_second": 1.15, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 52.00096618357488, |
|
"grad_norm": 0.012034900486469269, |
|
"learning_rate": 7.192699946323135e-06, |
|
"loss": 0.4707, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 52.001932367149756, |
|
"grad_norm": 0.011260480619966984, |
|
"learning_rate": 7.181964573268921e-06, |
|
"loss": 0.421, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 52.00289855072464, |
|
"grad_norm": 0.043340399861335754, |
|
"learning_rate": 7.171229200214708e-06, |
|
"loss": 0.1122, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 52.00386473429952, |
|
"grad_norm": 3.0821592807769775, |
|
"learning_rate": 7.160493827160494e-06, |
|
"loss": 0.0005, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 52.00483091787439, |
|
"grad_norm": 0.01043855119496584, |
|
"learning_rate": 7.149758454106281e-06, |
|
"loss": 0.067, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 52.005797101449275, |
|
"grad_norm": 0.0030516660772264004, |
|
"learning_rate": 7.139023081052067e-06, |
|
"loss": 0.0305, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 52.00676328502416, |
|
"grad_norm": 0.0027135934215039015, |
|
"learning_rate": 7.128287707997853e-06, |
|
"loss": 0.0004, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 52.00676328502416, |
|
"eval_accuracy": 0.8461538461538461, |
|
"eval_loss": 1.0547696352005005, |
|
"eval_runtime": 16.8224, |
|
"eval_samples_per_second": 4.637, |
|
"eval_steps_per_second": 1.189, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 53.00096618357488, |
|
"grad_norm": 0.0029095993377268314, |
|
"learning_rate": 7.117552334943641e-06, |
|
"loss": 0.0001, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 53.001932367149756, |
|
"grad_norm": 0.08320170640945435, |
|
"learning_rate": 7.106816961889426e-06, |
|
"loss": 0.0003, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 53.00289855072464, |
|
"grad_norm": 0.004328227136284113, |
|
"learning_rate": 7.096081588835213e-06, |
|
"loss": 0.0002, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 53.00386473429952, |
|
"grad_norm": 0.005063340999186039, |
|
"learning_rate": 7.085346215780999e-06, |
|
"loss": 0.0171, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 53.00483091787439, |
|
"grad_norm": 0.005174571648240089, |
|
"learning_rate": 7.074610842726786e-06, |
|
"loss": 0.0001, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 53.005797101449275, |
|
"grad_norm": 0.0030827228911221027, |
|
"learning_rate": 7.063875469672572e-06, |
|
"loss": 0.0001, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 53.00676328502416, |
|
"grad_norm": 0.01317480206489563, |
|
"learning_rate": 7.053140096618359e-06, |
|
"loss": 0.1768, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 53.00676328502416, |
|
"eval_accuracy": 0.8076923076923077, |
|
"eval_loss": 1.2974127531051636, |
|
"eval_runtime": 20.4623, |
|
"eval_samples_per_second": 3.812, |
|
"eval_steps_per_second": 0.977, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 54.00096618357488, |
|
"grad_norm": 0.00562130706384778, |
|
"learning_rate": 7.042404723564144e-06, |
|
"loss": 0.0001, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 54.001932367149756, |
|
"grad_norm": 0.009645653888583183, |
|
"learning_rate": 7.031669350509931e-06, |
|
"loss": 0.0001, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 54.00289855072464, |
|
"grad_norm": 0.006110002752393484, |
|
"learning_rate": 7.020933977455717e-06, |
|
"loss": 0.0001, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 54.00386473429952, |
|
"grad_norm": 0.002363593550398946, |
|
"learning_rate": 7.010198604401504e-06, |
|
"loss": 0.0174, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 54.00483091787439, |
|
"grad_norm": 0.0013595304917544127, |
|
"learning_rate": 6.99946323134729e-06, |
|
"loss": 0.017, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 54.005797101449275, |
|
"grad_norm": 0.003808892797678709, |
|
"learning_rate": 6.9887278582930765e-06, |
|
"loss": 0.0001, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 54.00676328502416, |
|
"grad_norm": 0.006192604545503855, |
|
"learning_rate": 6.977992485238862e-06, |
|
"loss": 0.0001, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 54.00676328502416, |
|
"eval_accuracy": 0.8333333333333334, |
|
"eval_loss": 1.2796709537506104, |
|
"eval_runtime": 14.9163, |
|
"eval_samples_per_second": 5.229, |
|
"eval_steps_per_second": 1.341, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 55.00096618357488, |
|
"grad_norm": 0.0006895597325637937, |
|
"learning_rate": 6.9672571121846486e-06, |
|
"loss": 0.0001, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 55.001932367149756, |
|
"grad_norm": 0.0019923022482544184, |
|
"learning_rate": 6.956521739130435e-06, |
|
"loss": 0.0001, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 55.00289855072464, |
|
"grad_norm": 0.006965520326048136, |
|
"learning_rate": 6.9457863660762215e-06, |
|
"loss": 0.0002, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 55.00386473429952, |
|
"grad_norm": 0.0015802200650796294, |
|
"learning_rate": 6.935050993022008e-06, |
|
"loss": 0.1119, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 55.00483091787439, |
|
"grad_norm": 0.006184786558151245, |
|
"learning_rate": 6.924315619967794e-06, |
|
"loss": 0.0001, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 55.005797101449275, |
|
"grad_norm": 0.01038430817425251, |
|
"learning_rate": 6.913580246913581e-06, |
|
"loss": 0.1995, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 55.00676328502416, |
|
"grad_norm": 0.003573243273422122, |
|
"learning_rate": 6.9028448738593664e-06, |
|
"loss": 0.1027, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 55.00676328502416, |
|
"eval_accuracy": 0.8076923076923077, |
|
"eval_loss": 1.515874981880188, |
|
"eval_runtime": 17.0594, |
|
"eval_samples_per_second": 4.572, |
|
"eval_steps_per_second": 1.172, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 56.00096618357488, |
|
"grad_norm": 0.008314134553074837, |
|
"learning_rate": 6.892109500805153e-06, |
|
"loss": 0.0001, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 56.001932367149756, |
|
"grad_norm": 0.03252334147691727, |
|
"learning_rate": 6.881374127750939e-06, |
|
"loss": 0.0002, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 56.00289855072464, |
|
"grad_norm": 0.0016289795748889446, |
|
"learning_rate": 6.870638754696727e-06, |
|
"loss": 0.0158, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 56.00386473429952, |
|
"grad_norm": 0.0037534793373197317, |
|
"learning_rate": 6.859903381642513e-06, |
|
"loss": 0.0023, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 56.00483091787439, |
|
"grad_norm": 0.025467460975050926, |
|
"learning_rate": 6.8491680085882995e-06, |
|
"loss": 0.178, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 56.005797101449275, |
|
"grad_norm": 44.050533294677734, |
|
"learning_rate": 6.838432635534086e-06, |
|
"loss": 0.3517, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 56.00676328502416, |
|
"grad_norm": 0.0007292564259842038, |
|
"learning_rate": 6.8276972624798724e-06, |
|
"loss": 0.1638, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 56.00676328502416, |
|
"eval_accuracy": 0.7564102564102564, |
|
"eval_loss": 1.9402605295181274, |
|
"eval_runtime": 16.8977, |
|
"eval_samples_per_second": 4.616, |
|
"eval_steps_per_second": 1.184, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 57.00096618357488, |
|
"grad_norm": 0.0029645762406289577, |
|
"learning_rate": 6.816961889425658e-06, |
|
"loss": 0.0016, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 57.001932367149756, |
|
"grad_norm": 0.002086229156702757, |
|
"learning_rate": 6.8062265163714445e-06, |
|
"loss": 0.2358, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 57.00289855072464, |
|
"grad_norm": 0.004452128428965807, |
|
"learning_rate": 6.795491143317231e-06, |
|
"loss": 0.0001, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 57.00386473429952, |
|
"grad_norm": 0.004634437616914511, |
|
"learning_rate": 6.784755770263017e-06, |
|
"loss": 0.0004, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 57.00483091787439, |
|
"grad_norm": 0.01934254914522171, |
|
"learning_rate": 6.774020397208804e-06, |
|
"loss": 0.0001, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 57.005797101449275, |
|
"grad_norm": 0.023365547880530357, |
|
"learning_rate": 6.76328502415459e-06, |
|
"loss": 0.3372, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 57.00676328502416, |
|
"grad_norm": 0.002822543028742075, |
|
"learning_rate": 6.752549651100376e-06, |
|
"loss": 0.0001, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 57.00676328502416, |
|
"eval_accuracy": 0.8076923076923077, |
|
"eval_loss": 1.5075114965438843, |
|
"eval_runtime": 20.119, |
|
"eval_samples_per_second": 3.877, |
|
"eval_steps_per_second": 0.994, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 58.00096618357488, |
|
"grad_norm": 0.07366270571947098, |
|
"learning_rate": 6.741814278046162e-06, |
|
"loss": 0.0001, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 58.001932367149756, |
|
"grad_norm": 0.0015621267957612872, |
|
"learning_rate": 6.731078904991949e-06, |
|
"loss": 0.0001, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 58.00289855072464, |
|
"grad_norm": 0.0036368737928569317, |
|
"learning_rate": 6.720343531937735e-06, |
|
"loss": 0.0002, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 58.00386473429952, |
|
"grad_norm": 0.037495680153369904, |
|
"learning_rate": 6.709608158883522e-06, |
|
"loss": 0.0001, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 58.00483091787439, |
|
"grad_norm": 0.0035316827706992626, |
|
"learning_rate": 6.698872785829308e-06, |
|
"loss": 0.0001, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 58.005797101449275, |
|
"grad_norm": 0.0035044823307543993, |
|
"learning_rate": 6.688137412775095e-06, |
|
"loss": 0.2844, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 58.00676328502416, |
|
"grad_norm": 0.00098858040291816, |
|
"learning_rate": 6.67740203972088e-06, |
|
"loss": 0.0003, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 58.00676328502416, |
|
"eval_accuracy": 0.7435897435897436, |
|
"eval_loss": 2.1290876865386963, |
|
"eval_runtime": 20.9101, |
|
"eval_samples_per_second": 3.73, |
|
"eval_steps_per_second": 0.956, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 59.00096618357488, |
|
"grad_norm": 0.02900398150086403, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 0.0014, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 59.001932367149756, |
|
"grad_norm": 0.004477238282561302, |
|
"learning_rate": 6.655931293612453e-06, |
|
"loss": 0.0001, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 59.00289855072464, |
|
"grad_norm": 0.6924675107002258, |
|
"learning_rate": 6.6451959205582395e-06, |
|
"loss": 0.0003, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 59.00386473429952, |
|
"grad_norm": 0.008186038583517075, |
|
"learning_rate": 6.634460547504026e-06, |
|
"loss": 0.0008, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 59.00483091787439, |
|
"grad_norm": 0.0031193087343126535, |
|
"learning_rate": 6.623725174449813e-06, |
|
"loss": 0.1875, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 59.005797101449275, |
|
"grad_norm": 0.0018959605367854238, |
|
"learning_rate": 6.6129898013956e-06, |
|
"loss": 0.0001, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 59.00676328502416, |
|
"grad_norm": 0.0005922632990404963, |
|
"learning_rate": 6.602254428341386e-06, |
|
"loss": 0.004, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 59.00676328502416, |
|
"eval_accuracy": 0.7948717948717948, |
|
"eval_loss": 1.6104003190994263, |
|
"eval_runtime": 20.6604, |
|
"eval_samples_per_second": 3.775, |
|
"eval_steps_per_second": 0.968, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 60.00096618357488, |
|
"grad_norm": 0.0038133368361741304, |
|
"learning_rate": 6.591519055287172e-06, |
|
"loss": 0.0001, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 60.001932367149756, |
|
"grad_norm": 0.0029480988159775734, |
|
"learning_rate": 6.580783682232958e-06, |
|
"loss": 0.0001, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 60.00289855072464, |
|
"grad_norm": 0.005030333995819092, |
|
"learning_rate": 6.570048309178745e-06, |
|
"loss": 0.0001, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 60.00386473429952, |
|
"grad_norm": 0.0027628433890640736, |
|
"learning_rate": 6.559312936124531e-06, |
|
"loss": 0.0001, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 60.00483091787439, |
|
"grad_norm": 0.002624318702146411, |
|
"learning_rate": 6.548577563070318e-06, |
|
"loss": 0.0001, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 60.005797101449275, |
|
"grad_norm": 0.003871429478749633, |
|
"learning_rate": 6.537842190016104e-06, |
|
"loss": 0.0069, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 60.00676328502416, |
|
"grad_norm": 0.00036783877294510603, |
|
"learning_rate": 6.52710681696189e-06, |
|
"loss": 0.0214, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 60.00676328502416, |
|
"eval_accuracy": 0.7948717948717948, |
|
"eval_loss": 1.7016513347625732, |
|
"eval_runtime": 21.0149, |
|
"eval_samples_per_second": 3.712, |
|
"eval_steps_per_second": 0.952, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 61.00096618357488, |
|
"grad_norm": 0.0021181628108024597, |
|
"learning_rate": 6.516371443907676e-06, |
|
"loss": 0.0001, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 61.001932367149756, |
|
"grad_norm": 0.001137953600846231, |
|
"learning_rate": 6.5056360708534626e-06, |
|
"loss": 0.0001, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 61.00289855072464, |
|
"grad_norm": 0.0026911040768027306, |
|
"learning_rate": 6.494900697799249e-06, |
|
"loss": 0.0, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 61.00386473429952, |
|
"grad_norm": 0.0015700599178671837, |
|
"learning_rate": 6.4841653247450355e-06, |
|
"loss": 0.0001, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 61.00483091787439, |
|
"grad_norm": 0.0005428745062090456, |
|
"learning_rate": 6.473429951690822e-06, |
|
"loss": 0.0, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 61.005797101449275, |
|
"grad_norm": 0.0032843085937201977, |
|
"learning_rate": 6.462694578636608e-06, |
|
"loss": 0.0001, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 61.00676328502416, |
|
"grad_norm": 0.00028793522506020963, |
|
"learning_rate": 6.451959205582394e-06, |
|
"loss": 0.0, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 61.00676328502416, |
|
"eval_accuracy": 0.8205128205128205, |
|
"eval_loss": 1.64847993850708, |
|
"eval_runtime": 24.004, |
|
"eval_samples_per_second": 3.249, |
|
"eval_steps_per_second": 0.833, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 62.00096618357488, |
|
"grad_norm": 0.0031514782458543777, |
|
"learning_rate": 6.44122383252818e-06, |
|
"loss": 0.0, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 62.001932367149756, |
|
"grad_norm": 0.002257132437080145, |
|
"learning_rate": 6.430488459473967e-06, |
|
"loss": 0.0, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 62.00289855072464, |
|
"grad_norm": 0.0012779063545167446, |
|
"learning_rate": 6.419753086419753e-06, |
|
"loss": 0.0001, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 62.00386473429952, |
|
"grad_norm": 0.0008087829919531941, |
|
"learning_rate": 6.40901771336554e-06, |
|
"loss": 0.0, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 62.00483091787439, |
|
"grad_norm": 0.004048364236950874, |
|
"learning_rate": 6.398282340311326e-06, |
|
"loss": 0.0, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 62.005797101449275, |
|
"grad_norm": 0.0023077051155269146, |
|
"learning_rate": 6.387546967257112e-06, |
|
"loss": 0.0001, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 62.00676328502416, |
|
"grad_norm": 0.0035665419418364763, |
|
"learning_rate": 6.376811594202898e-06, |
|
"loss": 0.0, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 62.00676328502416, |
|
"eval_accuracy": 0.8076923076923077, |
|
"eval_loss": 1.6667520999908447, |
|
"eval_runtime": 24.1821, |
|
"eval_samples_per_second": 3.226, |
|
"eval_steps_per_second": 0.827, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 63.00096618357488, |
|
"grad_norm": 0.0010491169523447752, |
|
"learning_rate": 6.3660762211486856e-06, |
|
"loss": 0.0, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 63.001932367149756, |
|
"grad_norm": 0.00069583480944857, |
|
"learning_rate": 6.355340848094472e-06, |
|
"loss": 0.0225, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 63.00289855072464, |
|
"grad_norm": 0.016136689111590385, |
|
"learning_rate": 6.3446054750402585e-06, |
|
"loss": 0.0185, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 63.00386473429952, |
|
"grad_norm": 0.002296636812388897, |
|
"learning_rate": 6.333870101986045e-06, |
|
"loss": 0.0001, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 63.00483091787439, |
|
"grad_norm": 0.0012935524573549628, |
|
"learning_rate": 6.323134728931831e-06, |
|
"loss": 0.0, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 63.005797101449275, |
|
"grad_norm": 0.00099344237241894, |
|
"learning_rate": 6.312399355877618e-06, |
|
"loss": 0.0, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 63.00676328502416, |
|
"grad_norm": 0.0032291836105287075, |
|
"learning_rate": 6.301663982823404e-06, |
|
"loss": 0.1604, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 63.00676328502416, |
|
"eval_accuracy": 0.7948717948717948, |
|
"eval_loss": 1.7437233924865723, |
|
"eval_runtime": 20.6625, |
|
"eval_samples_per_second": 3.775, |
|
"eval_steps_per_second": 0.968, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 64.00096618357487, |
|
"grad_norm": 0.006006534211337566, |
|
"learning_rate": 6.29092860976919e-06, |
|
"loss": 0.0001, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 64.00193236714976, |
|
"grad_norm": 0.0023545522708445787, |
|
"learning_rate": 6.280193236714976e-06, |
|
"loss": 0.0002, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 64.00289855072464, |
|
"grad_norm": 0.2263268679380417, |
|
"learning_rate": 6.269457863660763e-06, |
|
"loss": 0.1125, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 64.00386473429951, |
|
"grad_norm": 0.0006757387891411781, |
|
"learning_rate": 6.258722490606549e-06, |
|
"loss": 0.0403, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 64.0048309178744, |
|
"grad_norm": 0.001137339510023594, |
|
"learning_rate": 6.247987117552336e-06, |
|
"loss": 0.1675, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 64.00579710144928, |
|
"grad_norm": 0.0034416404087096453, |
|
"learning_rate": 6.237251744498122e-06, |
|
"loss": 0.0001, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 64.00676328502415, |
|
"grad_norm": 0.00035281089367344975, |
|
"learning_rate": 6.226516371443908e-06, |
|
"loss": 0.0002, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 64.00676328502415, |
|
"eval_accuracy": 0.782051282051282, |
|
"eval_loss": 1.676971197128296, |
|
"eval_runtime": 21.1031, |
|
"eval_samples_per_second": 3.696, |
|
"eval_steps_per_second": 0.948, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 65.00096618357487, |
|
"grad_norm": 0.13484346866607666, |
|
"learning_rate": 6.215780998389694e-06, |
|
"loss": 0.0001, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 65.00193236714976, |
|
"grad_norm": 0.024939000606536865, |
|
"learning_rate": 6.205045625335481e-06, |
|
"loss": 0.0001, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 65.00289855072464, |
|
"grad_norm": 0.0018361130496487021, |
|
"learning_rate": 6.194310252281267e-06, |
|
"loss": 0.0001, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 65.00386473429951, |
|
"grad_norm": 0.0014188734348863363, |
|
"learning_rate": 6.1835748792270535e-06, |
|
"loss": 0.0001, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 65.0048309178744, |
|
"grad_norm": 0.0007777014980092645, |
|
"learning_rate": 6.17283950617284e-06, |
|
"loss": 0.0, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 65.00579710144928, |
|
"grad_norm": 0.0012215470196679235, |
|
"learning_rate": 6.162104133118626e-06, |
|
"loss": 0.0175, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 65.00676328502415, |
|
"grad_norm": 0.002018085913732648, |
|
"learning_rate": 6.151368760064412e-06, |
|
"loss": 0.0, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 65.00676328502415, |
|
"eval_accuracy": 0.782051282051282, |
|
"eval_loss": 1.7766458988189697, |
|
"eval_runtime": 20.7581, |
|
"eval_samples_per_second": 3.758, |
|
"eval_steps_per_second": 0.963, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 66.00096618357487, |
|
"grad_norm": 0.005290956702083349, |
|
"learning_rate": 6.1406333870101985e-06, |
|
"loss": 0.0001, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 66.00193236714976, |
|
"grad_norm": 0.0006458330899477005, |
|
"learning_rate": 6.129898013955985e-06, |
|
"loss": 0.0329, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 66.00289855072464, |
|
"grad_norm": 0.0008798078051768243, |
|
"learning_rate": 6.119162640901772e-06, |
|
"loss": 0.0, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 66.00386473429951, |
|
"grad_norm": 0.000433132256148383, |
|
"learning_rate": 6.108427267847559e-06, |
|
"loss": 0.0, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 66.0048309178744, |
|
"grad_norm": 0.0006990334368310869, |
|
"learning_rate": 6.097691894793345e-06, |
|
"loss": 0.0001, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 66.00579710144928, |
|
"grad_norm": 0.001073139370419085, |
|
"learning_rate": 6.086956521739132e-06, |
|
"loss": 0.2237, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 66.00676328502415, |
|
"grad_norm": 0.001728722476400435, |
|
"learning_rate": 6.076221148684918e-06, |
|
"loss": 0.0214, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 66.00676328502415, |
|
"eval_accuracy": 0.782051282051282, |
|
"eval_loss": 1.635103464126587, |
|
"eval_runtime": 20.6814, |
|
"eval_samples_per_second": 3.771, |
|
"eval_steps_per_second": 0.967, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 67.00096618357487, |
|
"grad_norm": 0.001974219921976328, |
|
"learning_rate": 6.065485775630704e-06, |
|
"loss": 0.0, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 67.00193236714976, |
|
"grad_norm": 0.007904019206762314, |
|
"learning_rate": 6.05475040257649e-06, |
|
"loss": 0.0, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 67.00289855072464, |
|
"grad_norm": 0.007640121970325708, |
|
"learning_rate": 6.0440150295222766e-06, |
|
"loss": 0.0002, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 67.00386473429951, |
|
"grad_norm": 0.0008578920387662947, |
|
"learning_rate": 6.033279656468063e-06, |
|
"loss": 0.1054, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 67.0048309178744, |
|
"grad_norm": 0.0038633705116808414, |
|
"learning_rate": 6.0225442834138495e-06, |
|
"loss": 0.0, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 67.00579710144928, |
|
"grad_norm": 0.000722988392226398, |
|
"learning_rate": 6.011808910359636e-06, |
|
"loss": 0.1052, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 67.00676328502415, |
|
"grad_norm": 0.0019577303901314735, |
|
"learning_rate": 6.0010735373054215e-06, |
|
"loss": 0.0, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 67.00676328502415, |
|
"eval_accuracy": 0.782051282051282, |
|
"eval_loss": 1.687774896621704, |
|
"eval_runtime": 21.4599, |
|
"eval_samples_per_second": 3.635, |
|
"eval_steps_per_second": 0.932, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 68.00096618357487, |
|
"grad_norm": 0.0018796432996168733, |
|
"learning_rate": 5.990338164251208e-06, |
|
"loss": 0.0, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 68.00193236714976, |
|
"grad_norm": 0.04547438025474548, |
|
"learning_rate": 5.979602791196994e-06, |
|
"loss": 0.0001, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 68.00289855072464, |
|
"grad_norm": 0.0011448647128418088, |
|
"learning_rate": 5.968867418142781e-06, |
|
"loss": 0.0054, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 68.00386473429951, |
|
"grad_norm": 0.0004199968825560063, |
|
"learning_rate": 5.958132045088567e-06, |
|
"loss": 0.0001, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 68.0048309178744, |
|
"grad_norm": 0.0010871714912354946, |
|
"learning_rate": 5.947396672034354e-06, |
|
"loss": 0.0001, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 68.00579710144928, |
|
"grad_norm": 0.000813886639662087, |
|
"learning_rate": 5.936661298980139e-06, |
|
"loss": 0.0, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 68.00676328502415, |
|
"grad_norm": 0.0005087672616355121, |
|
"learning_rate": 5.925925925925926e-06, |
|
"loss": 0.0, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 68.00676328502415, |
|
"eval_accuracy": 0.7564102564102564, |
|
"eval_loss": 1.8763519525527954, |
|
"eval_runtime": 20.8251, |
|
"eval_samples_per_second": 3.745, |
|
"eval_steps_per_second": 0.96, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 69.00096618357487, |
|
"grad_norm": 0.0011922065168619156, |
|
"learning_rate": 5.915190552871712e-06, |
|
"loss": 0.0, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 69.00193236714976, |
|
"grad_norm": 0.0004207005840726197, |
|
"learning_rate": 5.904455179817499e-06, |
|
"loss": 0.1623, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 69.00289855072464, |
|
"grad_norm": 0.00039689309778623283, |
|
"learning_rate": 5.893719806763285e-06, |
|
"loss": 0.0, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 69.00386473429951, |
|
"grad_norm": 0.0007422741036862135, |
|
"learning_rate": 5.882984433709072e-06, |
|
"loss": 0.073, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 69.0048309178744, |
|
"grad_norm": 0.0005210656672716141, |
|
"learning_rate": 5.872249060654859e-06, |
|
"loss": 0.0, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 69.00579710144928, |
|
"grad_norm": 0.0009733252227306366, |
|
"learning_rate": 5.861513687600645e-06, |
|
"loss": 0.0003, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 69.00676328502415, |
|
"grad_norm": 0.0003511267132125795, |
|
"learning_rate": 5.850778314546432e-06, |
|
"loss": 0.2082, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 69.00676328502415, |
|
"eval_accuracy": 0.7564102564102564, |
|
"eval_loss": 1.779943585395813, |
|
"eval_runtime": 21.1124, |
|
"eval_samples_per_second": 3.695, |
|
"eval_steps_per_second": 0.947, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 70.00096618357487, |
|
"grad_norm": 0.0011452921899035573, |
|
"learning_rate": 5.840042941492217e-06, |
|
"loss": 0.0, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 70.00193236714976, |
|
"grad_norm": 0.004901723936200142, |
|
"learning_rate": 5.829307568438004e-06, |
|
"loss": 0.0024, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 70.00289855072464, |
|
"grad_norm": 0.006064664572477341, |
|
"learning_rate": 5.81857219538379e-06, |
|
"loss": 0.0, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 70.00386473429951, |
|
"grad_norm": 0.0014497352531179786, |
|
"learning_rate": 5.807836822329577e-06, |
|
"loss": 0.0065, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 70.0048309178744, |
|
"grad_norm": 0.0013388212537392974, |
|
"learning_rate": 5.797101449275363e-06, |
|
"loss": 0.0001, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 70.00579710144928, |
|
"grad_norm": 0.0011337369214743376, |
|
"learning_rate": 5.78636607622115e-06, |
|
"loss": 0.0, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 70.00676328502415, |
|
"grad_norm": 0.000552912475541234, |
|
"learning_rate": 5.775630703166935e-06, |
|
"loss": 0.0, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 70.00676328502415, |
|
"eval_accuracy": 0.7948717948717948, |
|
"eval_loss": 1.7388193607330322, |
|
"eval_runtime": 24.3328, |
|
"eval_samples_per_second": 3.206, |
|
"eval_steps_per_second": 0.822, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 71.00096618357487, |
|
"grad_norm": 0.0012520031305029988, |
|
"learning_rate": 5.764895330112722e-06, |
|
"loss": 0.0, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 71.00193236714976, |
|
"grad_norm": 0.0008118404657579958, |
|
"learning_rate": 5.754159957058508e-06, |
|
"loss": 0.0, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 71.00289855072464, |
|
"grad_norm": 0.0011605226900428534, |
|
"learning_rate": 5.743424584004295e-06, |
|
"loss": 0.0, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 71.00386473429951, |
|
"grad_norm": 0.0014740672195330262, |
|
"learning_rate": 5.732689210950081e-06, |
|
"loss": 0.0, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 71.0048309178744, |
|
"grad_norm": 0.0008719124598428607, |
|
"learning_rate": 5.7219538378958675e-06, |
|
"loss": 0.0001, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 71.00579710144928, |
|
"grad_norm": 0.0011694729328155518, |
|
"learning_rate": 5.711218464841653e-06, |
|
"loss": 0.0, |
|
"step": 5030 |
|
}, |
|
{ |
|
"epoch": 71.00676328502415, |
|
"grad_norm": 0.0014133198419585824, |
|
"learning_rate": 5.70048309178744e-06, |
|
"loss": 0.0, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 71.00676328502415, |
|
"eval_accuracy": 0.7948717948717948, |
|
"eval_loss": 1.6719121932983398, |
|
"eval_runtime": 15.9285, |
|
"eval_samples_per_second": 4.897, |
|
"eval_steps_per_second": 1.256, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 72.00096618357487, |
|
"grad_norm": 0.0005829191650263965, |
|
"learning_rate": 5.689747718733226e-06, |
|
"loss": 0.0, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 72.00193236714976, |
|
"grad_norm": 0.0113700395449996, |
|
"learning_rate": 5.6790123456790125e-06, |
|
"loss": 0.0, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 72.00289855072464, |
|
"grad_norm": 0.001089294906705618, |
|
"learning_rate": 5.668276972624799e-06, |
|
"loss": 0.0455, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 72.00386473429951, |
|
"grad_norm": 0.00036309906863607466, |
|
"learning_rate": 5.657541599570585e-06, |
|
"loss": 0.1185, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 72.0048309178744, |
|
"grad_norm": 0.0007753897807560861, |
|
"learning_rate": 5.646806226516372e-06, |
|
"loss": 0.0, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 72.00579710144928, |
|
"grad_norm": 0.0005843242397531867, |
|
"learning_rate": 5.6360708534621574e-06, |
|
"loss": 0.0, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 72.00676328502415, |
|
"grad_norm": 0.007903228513896465, |
|
"learning_rate": 5.625335480407944e-06, |
|
"loss": 0.0001, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 72.00676328502415, |
|
"eval_accuracy": 0.7948717948717948, |
|
"eval_loss": 1.6066139936447144, |
|
"eval_runtime": 15.2419, |
|
"eval_samples_per_second": 5.117, |
|
"eval_steps_per_second": 1.312, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 73.00096618357487, |
|
"grad_norm": 0.09604029357433319, |
|
"learning_rate": 5.614600107353731e-06, |
|
"loss": 0.0037, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 73.00193236714976, |
|
"grad_norm": 0.00048322087968699634, |
|
"learning_rate": 5.603864734299518e-06, |
|
"loss": 0.0, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 73.00289855072464, |
|
"grad_norm": 0.0017982334829866886, |
|
"learning_rate": 5.593129361245304e-06, |
|
"loss": 0.0, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 73.00386473429951, |
|
"grad_norm": 0.02436600998044014, |
|
"learning_rate": 5.5823939881910905e-06, |
|
"loss": 0.0951, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 73.0048309178744, |
|
"grad_norm": 0.0008387943380512297, |
|
"learning_rate": 5.571658615136877e-06, |
|
"loss": 0.0, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 73.00579710144928, |
|
"grad_norm": 0.0010867329547181726, |
|
"learning_rate": 5.5609232420826634e-06, |
|
"loss": 0.0049, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 73.00676328502415, |
|
"grad_norm": 0.002065508160740137, |
|
"learning_rate": 5.550187869028449e-06, |
|
"loss": 0.0001, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 73.00676328502415, |
|
"eval_accuracy": 0.7564102564102564, |
|
"eval_loss": 2.1180622577667236, |
|
"eval_runtime": 14.9441, |
|
"eval_samples_per_second": 5.219, |
|
"eval_steps_per_second": 1.338, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 74.00096618357487, |
|
"grad_norm": 0.001679830951616168, |
|
"learning_rate": 5.5394524959742355e-06, |
|
"loss": 0.0, |
|
"step": 5190 |
|
}, |
|
{ |
|
"epoch": 74.00193236714976, |
|
"grad_norm": 0.0013725162716582417, |
|
"learning_rate": 5.528717122920022e-06, |
|
"loss": 0.2049, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 74.00289855072464, |
|
"grad_norm": 0.0021361317485570908, |
|
"learning_rate": 5.517981749865808e-06, |
|
"loss": 0.0, |
|
"step": 5210 |
|
}, |
|
{ |
|
"epoch": 74.00386473429951, |
|
"grad_norm": 0.006481006741523743, |
|
"learning_rate": 5.507246376811595e-06, |
|
"loss": 0.0, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 74.0048309178744, |
|
"grad_norm": 0.037138450890779495, |
|
"learning_rate": 5.496511003757381e-06, |
|
"loss": 0.0, |
|
"step": 5230 |
|
}, |
|
{ |
|
"epoch": 74.00579710144928, |
|
"grad_norm": 0.001368780038319528, |
|
"learning_rate": 5.485775630703167e-06, |
|
"loss": 0.1058, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 74.00676328502415, |
|
"grad_norm": 0.0019940112251788378, |
|
"learning_rate": 5.475040257648953e-06, |
|
"loss": 0.0, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 74.00676328502415, |
|
"eval_accuracy": 0.7564102564102564, |
|
"eval_loss": 2.1772854328155518, |
|
"eval_runtime": 17.1681, |
|
"eval_samples_per_second": 4.543, |
|
"eval_steps_per_second": 1.165, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 75.00096618357487, |
|
"grad_norm": 0.0009713310282677412, |
|
"learning_rate": 5.46430488459474e-06, |
|
"loss": 0.0, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 75.00193236714976, |
|
"grad_norm": 0.0003569677355699241, |
|
"learning_rate": 5.453569511540526e-06, |
|
"loss": 0.2552, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 75.00289855072464, |
|
"grad_norm": 0.0033191258553415537, |
|
"learning_rate": 5.442834138486313e-06, |
|
"loss": 0.0, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 75.00386473429951, |
|
"grad_norm": 0.019612202420830727, |
|
"learning_rate": 5.432098765432099e-06, |
|
"loss": 0.0001, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 75.0048309178744, |
|
"grad_norm": 0.0007422531489282846, |
|
"learning_rate": 5.421363392377886e-06, |
|
"loss": 0.0001, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 75.00579710144928, |
|
"grad_norm": 0.0014051502803340554, |
|
"learning_rate": 5.410628019323671e-06, |
|
"loss": 0.0053, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 75.00676328502415, |
|
"grad_norm": 0.0010339098516851664, |
|
"learning_rate": 5.399892646269458e-06, |
|
"loss": 0.0, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 75.00676328502415, |
|
"eval_accuracy": 0.717948717948718, |
|
"eval_loss": 2.5632450580596924, |
|
"eval_runtime": 17.5653, |
|
"eval_samples_per_second": 4.441, |
|
"eval_steps_per_second": 1.139, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 76.00096618357487, |
|
"grad_norm": 0.0006680580554530025, |
|
"learning_rate": 5.389157273215244e-06, |
|
"loss": 0.1044, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 76.00193236714976, |
|
"grad_norm": 0.0012755277566611767, |
|
"learning_rate": 5.3784219001610306e-06, |
|
"loss": 0.2949, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 76.00289855072464, |
|
"grad_norm": 0.0015628045657649636, |
|
"learning_rate": 5.367686527106818e-06, |
|
"loss": 0.0003, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 76.00386473429951, |
|
"grad_norm": 0.0014394833706319332, |
|
"learning_rate": 5.356951154052604e-06, |
|
"loss": 0.0271, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 76.0048309178744, |
|
"grad_norm": 0.006877475883811712, |
|
"learning_rate": 5.346215780998391e-06, |
|
"loss": 0.0, |
|
"step": 5370 |
|
}, |
|
{ |
|
"epoch": 76.00579710144928, |
|
"grad_norm": 0.0015750023303553462, |
|
"learning_rate": 5.335480407944177e-06, |
|
"loss": 0.0, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 76.00676328502415, |
|
"grad_norm": 0.0003570659027900547, |
|
"learning_rate": 5.324745034889963e-06, |
|
"loss": 0.0, |
|
"step": 5390 |
|
}, |
|
{ |
|
"epoch": 76.00676328502415, |
|
"eval_accuracy": 0.8076923076923077, |
|
"eval_loss": 1.1549041271209717, |
|
"eval_runtime": 15.4003, |
|
"eval_samples_per_second": 5.065, |
|
"eval_steps_per_second": 1.299, |
|
"step": 5390 |
|
}, |
|
{ |
|
"epoch": 77.00096618357487, |
|
"grad_norm": 0.0005772082367911935, |
|
"learning_rate": 5.314009661835749e-06, |
|
"loss": 0.0, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 77.00193236714976, |
|
"grad_norm": 0.0010372302494943142, |
|
"learning_rate": 5.303274288781536e-06, |
|
"loss": 0.127, |
|
"step": 5410 |
|
}, |
|
{ |
|
"epoch": 77.00289855072464, |
|
"grad_norm": 0.0018043516902253032, |
|
"learning_rate": 5.292538915727322e-06, |
|
"loss": 0.0, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 77.00386473429951, |
|
"grad_norm": 0.0005784116219729185, |
|
"learning_rate": 5.281803542673109e-06, |
|
"loss": 0.2015, |
|
"step": 5430 |
|
}, |
|
{ |
|
"epoch": 77.0048309178744, |
|
"grad_norm": 0.006113346666097641, |
|
"learning_rate": 5.271068169618895e-06, |
|
"loss": 0.0, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 77.00579710144928, |
|
"grad_norm": 0.002624011831358075, |
|
"learning_rate": 5.2603327965646815e-06, |
|
"loss": 0.0003, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 77.00676328502415, |
|
"grad_norm": 0.0012411015341058373, |
|
"learning_rate": 5.249597423510467e-06, |
|
"loss": 0.0006, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 77.00676328502415, |
|
"eval_accuracy": 0.7435897435897436, |
|
"eval_loss": 2.229558229446411, |
|
"eval_runtime": 17.9739, |
|
"eval_samples_per_second": 4.34, |
|
"eval_steps_per_second": 1.113, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 78.00096618357487, |
|
"grad_norm": 0.0013451834674924612, |
|
"learning_rate": 5.2388620504562536e-06, |
|
"loss": 0.0023, |
|
"step": 5470 |
|
}, |
|
{ |
|
"epoch": 78.00193236714976, |
|
"grad_norm": 0.0013466801028698683, |
|
"learning_rate": 5.22812667740204e-06, |
|
"loss": 0.0005, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 78.00289855072464, |
|
"grad_norm": 0.2549854815006256, |
|
"learning_rate": 5.2173913043478265e-06, |
|
"loss": 0.1312, |
|
"step": 5490 |
|
}, |
|
{ |
|
"epoch": 78.00386473429951, |
|
"grad_norm": 0.011888462118804455, |
|
"learning_rate": 5.206655931293613e-06, |
|
"loss": 0.2297, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 78.0048309178744, |
|
"grad_norm": 0.006290052086114883, |
|
"learning_rate": 5.195920558239399e-06, |
|
"loss": 0.0001, |
|
"step": 5510 |
|
}, |
|
{ |
|
"epoch": 78.00579710144928, |
|
"grad_norm": 0.0024707193952053785, |
|
"learning_rate": 5.185185185185185e-06, |
|
"loss": 0.0196, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 78.00676328502415, |
|
"grad_norm": 0.0009130489779636264, |
|
"learning_rate": 5.1744498121309714e-06, |
|
"loss": 0.0, |
|
"step": 5530 |
|
}, |
|
{ |
|
"epoch": 78.00676328502415, |
|
"eval_accuracy": 0.7307692307692307, |
|
"eval_loss": 2.5381572246551514, |
|
"eval_runtime": 15.5391, |
|
"eval_samples_per_second": 5.02, |
|
"eval_steps_per_second": 1.287, |
|
"step": 5530 |
|
}, |
|
{ |
|
"epoch": 79.00096618357487, |
|
"grad_norm": 0.0007505626999773085, |
|
"learning_rate": 5.163714439076758e-06, |
|
"loss": 0.0064, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 79.00193236714976, |
|
"grad_norm": 0.0007742494344711304, |
|
"learning_rate": 5.152979066022544e-06, |
|
"loss": 0.0, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 79.00289855072464, |
|
"grad_norm": 0.0012070463271811604, |
|
"learning_rate": 5.142243692968331e-06, |
|
"loss": 0.1978, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 79.00386473429951, |
|
"grad_norm": 0.0034262288827449083, |
|
"learning_rate": 5.131508319914117e-06, |
|
"loss": 0.0, |
|
"step": 5570 |
|
}, |
|
{ |
|
"epoch": 79.0048309178744, |
|
"grad_norm": 0.0023414520546793938, |
|
"learning_rate": 5.1207729468599045e-06, |
|
"loss": 0.0003, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 79.00579710144928, |
|
"grad_norm": 0.0022120054345577955, |
|
"learning_rate": 5.110037573805691e-06, |
|
"loss": 0.1615, |
|
"step": 5590 |
|
}, |
|
{ |
|
"epoch": 79.00676328502415, |
|
"grad_norm": 0.0005375827313400805, |
|
"learning_rate": 5.099302200751477e-06, |
|
"loss": 0.0001, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 79.00676328502415, |
|
"eval_accuracy": 0.782051282051282, |
|
"eval_loss": 1.5726310014724731, |
|
"eval_runtime": 15.7655, |
|
"eval_samples_per_second": 4.948, |
|
"eval_steps_per_second": 1.269, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 80.00096618357487, |
|
"grad_norm": 0.0010982960229739547, |
|
"learning_rate": 5.088566827697263e-06, |
|
"loss": 0.0001, |
|
"step": 5610 |
|
}, |
|
{ |
|
"epoch": 80.00193236714976, |
|
"grad_norm": 0.0007016141898930073, |
|
"learning_rate": 5.0778314546430495e-06, |
|
"loss": 0.0, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 80.00289855072464, |
|
"grad_norm": 0.001342971809208393, |
|
"learning_rate": 5.067096081588836e-06, |
|
"loss": 0.0, |
|
"step": 5630 |
|
}, |
|
{ |
|
"epoch": 80.00386473429951, |
|
"grad_norm": 0.019569087773561478, |
|
"learning_rate": 5.056360708534622e-06, |
|
"loss": 0.0, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 80.0048309178744, |
|
"grad_norm": 0.00047855902812443674, |
|
"learning_rate": 5.045625335480409e-06, |
|
"loss": 0.0, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 80.00579710144928, |
|
"grad_norm": 0.0011246672365814447, |
|
"learning_rate": 5.034889962426195e-06, |
|
"loss": 0.0, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 80.00676328502415, |
|
"grad_norm": 0.0016472653951495886, |
|
"learning_rate": 5.024154589371981e-06, |
|
"loss": 0.0002, |
|
"step": 5670 |
|
}, |
|
{ |
|
"epoch": 80.00676328502415, |
|
"eval_accuracy": 0.8076923076923077, |
|
"eval_loss": 1.57036292552948, |
|
"eval_runtime": 14.8715, |
|
"eval_samples_per_second": 5.245, |
|
"eval_steps_per_second": 1.345, |
|
"step": 5670 |
|
}, |
|
{ |
|
"epoch": 81.00096618357487, |
|
"grad_norm": 0.01656234823167324, |
|
"learning_rate": 5.013419216317767e-06, |
|
"loss": 0.0001, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 81.00193236714976, |
|
"grad_norm": 0.002044126158580184, |
|
"learning_rate": 5.002683843263554e-06, |
|
"loss": 0.0, |
|
"step": 5690 |
|
}, |
|
{ |
|
"epoch": 81.00289855072464, |
|
"grad_norm": 0.0057681528851389885, |
|
"learning_rate": 4.99194847020934e-06, |
|
"loss": 0.0, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 81.00386473429951, |
|
"grad_norm": 0.006356089375913143, |
|
"learning_rate": 4.981213097155127e-06, |
|
"loss": 0.001, |
|
"step": 5710 |
|
}, |
|
{ |
|
"epoch": 81.0048309178744, |
|
"grad_norm": 0.0005163434543646872, |
|
"learning_rate": 4.970477724100913e-06, |
|
"loss": 0.198, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 81.00579710144928, |
|
"grad_norm": 0.0019573138561099768, |
|
"learning_rate": 4.959742351046699e-06, |
|
"loss": 0.0, |
|
"step": 5730 |
|
}, |
|
{ |
|
"epoch": 81.00676328502415, |
|
"grad_norm": 0.0007160149980336428, |
|
"learning_rate": 4.949006977992485e-06, |
|
"loss": 0.0199, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 81.00676328502415, |
|
"eval_accuracy": 0.8076923076923077, |
|
"eval_loss": 1.5503082275390625, |
|
"eval_runtime": 14.9894, |
|
"eval_samples_per_second": 5.204, |
|
"eval_steps_per_second": 1.334, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 82.00096618357487, |
|
"grad_norm": 0.000732768268790096, |
|
"learning_rate": 4.938271604938272e-06, |
|
"loss": 0.0001, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 82.00193236714976, |
|
"grad_norm": 0.0015916397096589208, |
|
"learning_rate": 4.927536231884059e-06, |
|
"loss": 0.0, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 82.00289855072464, |
|
"grad_norm": 0.0016159799415618181, |
|
"learning_rate": 4.9168008588298446e-06, |
|
"loss": 0.0006, |
|
"step": 5770 |
|
}, |
|
{ |
|
"epoch": 82.00386473429951, |
|
"grad_norm": 0.003708163509145379, |
|
"learning_rate": 4.906065485775631e-06, |
|
"loss": 0.0, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 82.0048309178744, |
|
"grad_norm": 0.0009543473133817315, |
|
"learning_rate": 4.8953301127214175e-06, |
|
"loss": 0.2218, |
|
"step": 5790 |
|
}, |
|
{ |
|
"epoch": 82.00579710144928, |
|
"grad_norm": 0.0033412850461900234, |
|
"learning_rate": 4.884594739667204e-06, |
|
"loss": 0.0, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 82.00676328502415, |
|
"grad_norm": 0.003886610036715865, |
|
"learning_rate": 4.87385936661299e-06, |
|
"loss": 0.0001, |
|
"step": 5810 |
|
}, |
|
{ |
|
"epoch": 82.00676328502415, |
|
"eval_accuracy": 0.8076923076923077, |
|
"eval_loss": 1.365352749824524, |
|
"eval_runtime": 15.0916, |
|
"eval_samples_per_second": 5.168, |
|
"eval_steps_per_second": 1.325, |
|
"step": 5810 |
|
}, |
|
{ |
|
"epoch": 83.00096618357487, |
|
"grad_norm": 0.01847156696021557, |
|
"learning_rate": 4.863123993558777e-06, |
|
"loss": 0.0855, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 83.00193236714976, |
|
"grad_norm": 0.0023459733929485083, |
|
"learning_rate": 4.852388620504562e-06, |
|
"loss": 0.0009, |
|
"step": 5830 |
|
}, |
|
{ |
|
"epoch": 83.00289855072464, |
|
"grad_norm": 0.0007858324679546058, |
|
"learning_rate": 4.841653247450349e-06, |
|
"loss": 0.0, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 83.00386473429951, |
|
"grad_norm": 0.0015262184897437692, |
|
"learning_rate": 4.830917874396135e-06, |
|
"loss": 0.0, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 83.0048309178744, |
|
"grad_norm": 0.0011216209968551993, |
|
"learning_rate": 4.820182501341922e-06, |
|
"loss": 0.0, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 83.00579710144928, |
|
"grad_norm": 0.010345948860049248, |
|
"learning_rate": 4.809447128287709e-06, |
|
"loss": 0.0, |
|
"step": 5870 |
|
}, |
|
{ |
|
"epoch": 83.00676328502415, |
|
"grad_norm": 0.00381542113609612, |
|
"learning_rate": 4.798711755233495e-06, |
|
"loss": 0.0, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 83.00676328502415, |
|
"eval_accuracy": 0.8076923076923077, |
|
"eval_loss": 1.4726909399032593, |
|
"eval_runtime": 14.7359, |
|
"eval_samples_per_second": 5.293, |
|
"eval_steps_per_second": 1.357, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 84.00096618357487, |
|
"grad_norm": 0.001281603006646037, |
|
"learning_rate": 4.787976382179281e-06, |
|
"loss": 0.0004, |
|
"step": 5890 |
|
}, |
|
{ |
|
"epoch": 84.00193236714976, |
|
"grad_norm": 0.0006520389579236507, |
|
"learning_rate": 4.7772410091250676e-06, |
|
"loss": 0.0, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 84.00289855072464, |
|
"grad_norm": 0.0008780735661275685, |
|
"learning_rate": 4.766505636070854e-06, |
|
"loss": 0.0, |
|
"step": 5910 |
|
}, |
|
{ |
|
"epoch": 84.00386473429951, |
|
"grad_norm": 0.00047110416926443577, |
|
"learning_rate": 4.7557702630166405e-06, |
|
"loss": 0.1301, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 84.0048309178744, |
|
"grad_norm": 0.0020175003446638584, |
|
"learning_rate": 4.745034889962427e-06, |
|
"loss": 0.0, |
|
"step": 5930 |
|
}, |
|
{ |
|
"epoch": 84.00579710144928, |
|
"grad_norm": 0.001587681588716805, |
|
"learning_rate": 4.7342995169082125e-06, |
|
"loss": 0.0585, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 84.00676328502415, |
|
"grad_norm": 0.006288263946771622, |
|
"learning_rate": 4.723564143853999e-06, |
|
"loss": 0.0001, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 84.00676328502415, |
|
"eval_accuracy": 0.717948717948718, |
|
"eval_loss": 2.2248222827911377, |
|
"eval_runtime": 15.8238, |
|
"eval_samples_per_second": 4.929, |
|
"eval_steps_per_second": 1.264, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 85.00096618357487, |
|
"grad_norm": 0.0007544896216131747, |
|
"learning_rate": 4.712828770799785e-06, |
|
"loss": 0.1264, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 85.00193236714976, |
|
"grad_norm": 0.015535094775259495, |
|
"learning_rate": 4.702093397745572e-06, |
|
"loss": 0.0, |
|
"step": 5970 |
|
}, |
|
{ |
|
"epoch": 85.00289855072464, |
|
"grad_norm": 0.0009989738464355469, |
|
"learning_rate": 4.691358024691358e-06, |
|
"loss": 0.0, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 85.00386473429951, |
|
"grad_norm": 0.0020626296754926443, |
|
"learning_rate": 4.680622651637145e-06, |
|
"loss": 0.0, |
|
"step": 5990 |
|
}, |
|
{ |
|
"epoch": 85.0048309178744, |
|
"grad_norm": 0.0007889845292083919, |
|
"learning_rate": 4.669887278582931e-06, |
|
"loss": 0.0, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 85.00579710144928, |
|
"grad_norm": 0.0008909267489798367, |
|
"learning_rate": 4.659151905528718e-06, |
|
"loss": 0.0001, |
|
"step": 6010 |
|
}, |
|
{ |
|
"epoch": 85.00676328502415, |
|
"grad_norm": 0.0013790903612971306, |
|
"learning_rate": 4.648416532474504e-06, |
|
"loss": 0.0, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 85.00676328502415, |
|
"eval_accuracy": 0.782051282051282, |
|
"eval_loss": 1.7187656164169312, |
|
"eval_runtime": 15.1982, |
|
"eval_samples_per_second": 5.132, |
|
"eval_steps_per_second": 1.316, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 86.00096618357487, |
|
"grad_norm": 0.0003605932288337499, |
|
"learning_rate": 4.637681159420291e-06, |
|
"loss": 0.2248, |
|
"step": 6030 |
|
}, |
|
{ |
|
"epoch": 86.00193236714976, |
|
"grad_norm": 0.002730882028117776, |
|
"learning_rate": 4.626945786366077e-06, |
|
"loss": 0.001, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 86.00289855072464, |
|
"grad_norm": 0.0010079998755827546, |
|
"learning_rate": 4.616210413311863e-06, |
|
"loss": 0.1221, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 86.00386473429951, |
|
"grad_norm": 0.0005993593367747962, |
|
"learning_rate": 4.605475040257649e-06, |
|
"loss": 0.0, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 86.0048309178744, |
|
"grad_norm": 0.0010675563244149089, |
|
"learning_rate": 4.5947396672034355e-06, |
|
"loss": 0.0, |
|
"step": 6070 |
|
}, |
|
{ |
|
"epoch": 86.00579710144928, |
|
"grad_norm": 0.0026733041740953922, |
|
"learning_rate": 4.584004294149222e-06, |
|
"loss": 0.0, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 86.00676328502415, |
|
"grad_norm": 0.0020553949289023876, |
|
"learning_rate": 4.5732689210950084e-06, |
|
"loss": 0.0101, |
|
"step": 6090 |
|
}, |
|
{ |
|
"epoch": 86.00676328502415, |
|
"eval_accuracy": 0.7435897435897436, |
|
"eval_loss": 1.927074909210205, |
|
"eval_runtime": 14.9313, |
|
"eval_samples_per_second": 5.224, |
|
"eval_steps_per_second": 1.339, |
|
"step": 6090 |
|
}, |
|
{ |
|
"epoch": 87.00096618357487, |
|
"grad_norm": 0.0008128826157189906, |
|
"learning_rate": 4.562533548040795e-06, |
|
"loss": 0.0024, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 87.00193236714976, |
|
"grad_norm": 0.0006628745468333364, |
|
"learning_rate": 4.551798174986581e-06, |
|
"loss": 0.0, |
|
"step": 6110 |
|
}, |
|
{ |
|
"epoch": 87.00289855072464, |
|
"grad_norm": 0.04763895273208618, |
|
"learning_rate": 4.541062801932368e-06, |
|
"loss": 0.0, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 87.00386473429951, |
|
"grad_norm": 0.0007302842568606138, |
|
"learning_rate": 4.530327428878154e-06, |
|
"loss": 0.0001, |
|
"step": 6130 |
|
}, |
|
{ |
|
"epoch": 87.0048309178744, |
|
"grad_norm": 0.0005195592530071735, |
|
"learning_rate": 4.519592055823941e-06, |
|
"loss": 0.0, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 87.00579710144928, |
|
"grad_norm": 0.001573888468556106, |
|
"learning_rate": 4.508856682769726e-06, |
|
"loss": 0.0, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 87.00676328502415, |
|
"grad_norm": 0.0013048958498984575, |
|
"learning_rate": 4.498121309715513e-06, |
|
"loss": 0.0001, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 87.00676328502415, |
|
"eval_accuracy": 0.8333333333333334, |
|
"eval_loss": 1.5355632305145264, |
|
"eval_runtime": 14.8356, |
|
"eval_samples_per_second": 5.258, |
|
"eval_steps_per_second": 1.348, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 88.00096618357487, |
|
"grad_norm": 0.0004708681080956012, |
|
"learning_rate": 4.487385936661299e-06, |
|
"loss": 0.0, |
|
"step": 6170 |
|
}, |
|
{ |
|
"epoch": 88.00193236714976, |
|
"grad_norm": 0.00878297071903944, |
|
"learning_rate": 4.476650563607086e-06, |
|
"loss": 0.0, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 88.00289855072464, |
|
"grad_norm": 0.0006720071542076766, |
|
"learning_rate": 4.465915190552872e-06, |
|
"loss": 0.0, |
|
"step": 6190 |
|
}, |
|
{ |
|
"epoch": 88.00386473429951, |
|
"grad_norm": 0.0007516877958551049, |
|
"learning_rate": 4.4551798174986585e-06, |
|
"loss": 0.051, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 88.0048309178744, |
|
"grad_norm": 0.001620340975932777, |
|
"learning_rate": 4.444444444444444e-06, |
|
"loss": 0.0, |
|
"step": 6210 |
|
}, |
|
{ |
|
"epoch": 88.00579710144928, |
|
"grad_norm": 0.0005804314860142767, |
|
"learning_rate": 4.4337090713902314e-06, |
|
"loss": 0.0005, |
|
"step": 6220 |
|
}, |
|
{ |
|
"epoch": 88.00676328502415, |
|
"grad_norm": 0.0008902663248591125, |
|
"learning_rate": 4.422973698336018e-06, |
|
"loss": 0.0, |
|
"step": 6230 |
|
}, |
|
{ |
|
"epoch": 88.00676328502415, |
|
"eval_accuracy": 0.8076923076923077, |
|
"eval_loss": 1.6682919263839722, |
|
"eval_runtime": 14.9271, |
|
"eval_samples_per_second": 5.225, |
|
"eval_steps_per_second": 1.34, |
|
"step": 6230 |
|
}, |
|
{ |
|
"epoch": 89.00096618357487, |
|
"grad_norm": 0.0005711275152862072, |
|
"learning_rate": 4.412238325281804e-06, |
|
"loss": 0.0, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 89.00193236714976, |
|
"grad_norm": 0.0005052529741078615, |
|
"learning_rate": 4.401502952227591e-06, |
|
"loss": 0.0, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 89.00289855072464, |
|
"grad_norm": 0.0013751588994637132, |
|
"learning_rate": 4.390767579173376e-06, |
|
"loss": 0.0, |
|
"step": 6260 |
|
}, |
|
{ |
|
"epoch": 89.00386473429951, |
|
"grad_norm": 0.0006107139051891863, |
|
"learning_rate": 4.380032206119163e-06, |
|
"loss": 0.0, |
|
"step": 6270 |
|
}, |
|
{ |
|
"epoch": 89.0048309178744, |
|
"grad_norm": 0.000693678914103657, |
|
"learning_rate": 4.369296833064949e-06, |
|
"loss": 0.0, |
|
"step": 6280 |
|
}, |
|
{ |
|
"epoch": 89.00579710144928, |
|
"grad_norm": 0.0005502082640305161, |
|
"learning_rate": 4.358561460010736e-06, |
|
"loss": 0.2555, |
|
"step": 6290 |
|
}, |
|
{ |
|
"epoch": 89.00676328502415, |
|
"grad_norm": 0.0003575296141207218, |
|
"learning_rate": 4.347826086956522e-06, |
|
"loss": 0.0, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 89.00676328502415, |
|
"eval_accuracy": 0.7307692307692307, |
|
"eval_loss": 2.5565624237060547, |
|
"eval_runtime": 16.7423, |
|
"eval_samples_per_second": 4.659, |
|
"eval_steps_per_second": 1.195, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 90.00096618357487, |
|
"grad_norm": 0.0003587829996831715, |
|
"learning_rate": 4.337090713902309e-06, |
|
"loss": 0.0006, |
|
"step": 6310 |
|
}, |
|
{ |
|
"epoch": 90.00193236714976, |
|
"grad_norm": 0.001737708691507578, |
|
"learning_rate": 4.326355340848094e-06, |
|
"loss": 0.0, |
|
"step": 6320 |
|
}, |
|
{ |
|
"epoch": 90.00289855072464, |
|
"grad_norm": 43.55590057373047, |
|
"learning_rate": 4.315619967793881e-06, |
|
"loss": 0.0018, |
|
"step": 6330 |
|
}, |
|
{ |
|
"epoch": 90.00386473429951, |
|
"grad_norm": 4.491352081298828, |
|
"learning_rate": 4.304884594739668e-06, |
|
"loss": 0.0003, |
|
"step": 6340 |
|
}, |
|
{ |
|
"epoch": 90.0048309178744, |
|
"grad_norm": 0.0008951724739745259, |
|
"learning_rate": 4.2941492216854545e-06, |
|
"loss": 0.0, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 90.00579710144928, |
|
"grad_norm": 0.008534472435712814, |
|
"learning_rate": 4.28341384863124e-06, |
|
"loss": 0.0, |
|
"step": 6360 |
|
}, |
|
{ |
|
"epoch": 90.00676328502415, |
|
"grad_norm": 0.00027089714421890676, |
|
"learning_rate": 4.2726784755770265e-06, |
|
"loss": 0.0, |
|
"step": 6370 |
|
}, |
|
{ |
|
"epoch": 90.00676328502415, |
|
"eval_accuracy": 0.8076923076923077, |
|
"eval_loss": 1.7776321172714233, |
|
"eval_runtime": 17.3577, |
|
"eval_samples_per_second": 4.494, |
|
"eval_steps_per_second": 1.152, |
|
"step": 6370 |
|
}, |
|
{ |
|
"epoch": 91.00096618357487, |
|
"grad_norm": 0.0012238288763910532, |
|
"learning_rate": 4.261943102522813e-06, |
|
"loss": 0.0, |
|
"step": 6380 |
|
}, |
|
{ |
|
"epoch": 91.00193236714976, |
|
"grad_norm": 0.0015267673879861832, |
|
"learning_rate": 4.251207729468599e-06, |
|
"loss": 0.0, |
|
"step": 6390 |
|
}, |
|
{ |
|
"epoch": 91.00289855072464, |
|
"grad_norm": 0.0006313137710094452, |
|
"learning_rate": 4.240472356414386e-06, |
|
"loss": 0.0, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 91.00386473429951, |
|
"grad_norm": 0.00024374033091589808, |
|
"learning_rate": 4.229736983360172e-06, |
|
"loss": 0.0, |
|
"step": 6410 |
|
}, |
|
{ |
|
"epoch": 91.0048309178744, |
|
"grad_norm": 0.000769898877479136, |
|
"learning_rate": 4.219001610305958e-06, |
|
"loss": 0.0, |
|
"step": 6420 |
|
}, |
|
{ |
|
"epoch": 91.00579710144928, |
|
"grad_norm": 0.0005884930142201483, |
|
"learning_rate": 4.208266237251744e-06, |
|
"loss": 0.0, |
|
"step": 6430 |
|
}, |
|
{ |
|
"epoch": 91.00676328502415, |
|
"grad_norm": 0.0004250435740686953, |
|
"learning_rate": 4.197530864197531e-06, |
|
"loss": 0.0028, |
|
"step": 6440 |
|
}, |
|
{ |
|
"epoch": 91.00676328502415, |
|
"eval_accuracy": 0.7948717948717948, |
|
"eval_loss": 1.749398112297058, |
|
"eval_runtime": 15.1757, |
|
"eval_samples_per_second": 5.14, |
|
"eval_steps_per_second": 1.318, |
|
"step": 6440 |
|
}, |
|
{ |
|
"epoch": 92.00096618357487, |
|
"grad_norm": 0.0007963450625538826, |
|
"learning_rate": 4.186795491143318e-06, |
|
"loss": 0.0, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 92.00193236714976, |
|
"grad_norm": 0.000419673859141767, |
|
"learning_rate": 4.1760601180891046e-06, |
|
"loss": 0.0, |
|
"step": 6460 |
|
}, |
|
{ |
|
"epoch": 92.00289855072464, |
|
"grad_norm": 0.0020634315442293882, |
|
"learning_rate": 4.16532474503489e-06, |
|
"loss": 0.0, |
|
"step": 6470 |
|
}, |
|
{ |
|
"epoch": 92.00386473429951, |
|
"grad_norm": 0.002074671443551779, |
|
"learning_rate": 4.154589371980677e-06, |
|
"loss": 0.0, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 92.0048309178744, |
|
"grad_norm": 0.0013138147769495845, |
|
"learning_rate": 4.143853998926463e-06, |
|
"loss": 0.0, |
|
"step": 6490 |
|
}, |
|
{ |
|
"epoch": 92.00579710144928, |
|
"grad_norm": 0.0006493937107734382, |
|
"learning_rate": 4.1331186258722495e-06, |
|
"loss": 0.0, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 92.00676328502415, |
|
"grad_norm": 0.00048424158012494445, |
|
"learning_rate": 4.122383252818036e-06, |
|
"loss": 0.0, |
|
"step": 6510 |
|
}, |
|
{ |
|
"epoch": 92.00676328502415, |
|
"eval_accuracy": 0.8076923076923077, |
|
"eval_loss": 1.614148736000061, |
|
"eval_runtime": 16.0064, |
|
"eval_samples_per_second": 4.873, |
|
"eval_steps_per_second": 1.249, |
|
"step": 6510 |
|
}, |
|
{ |
|
"epoch": 93.00096618357487, |
|
"grad_norm": 0.0007483679219149053, |
|
"learning_rate": 4.1116478797638224e-06, |
|
"loss": 0.0, |
|
"step": 6520 |
|
}, |
|
{ |
|
"epoch": 93.00193236714976, |
|
"grad_norm": 0.0005844564293511212, |
|
"learning_rate": 4.100912506709608e-06, |
|
"loss": 0.0, |
|
"step": 6530 |
|
}, |
|
{ |
|
"epoch": 93.00289855072464, |
|
"grad_norm": 0.0004827914817724377, |
|
"learning_rate": 4.0901771336553945e-06, |
|
"loss": 0.0, |
|
"step": 6540 |
|
}, |
|
{ |
|
"epoch": 93.00386473429951, |
|
"grad_norm": 0.0021162095945328474, |
|
"learning_rate": 4.079441760601181e-06, |
|
"loss": 0.0619, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 93.0048309178744, |
|
"grad_norm": 0.007160414941608906, |
|
"learning_rate": 4.068706387546967e-06, |
|
"loss": 0.0, |
|
"step": 6560 |
|
}, |
|
{ |
|
"epoch": 93.00579710144928, |
|
"grad_norm": 0.0008341994835063815, |
|
"learning_rate": 4.057971014492754e-06, |
|
"loss": 0.1666, |
|
"step": 6570 |
|
}, |
|
{ |
|
"epoch": 93.00676328502415, |
|
"grad_norm": 0.0007460052147507668, |
|
"learning_rate": 4.04723564143854e-06, |
|
"loss": 0.0, |
|
"step": 6580 |
|
}, |
|
{ |
|
"epoch": 93.00676328502415, |
|
"eval_accuracy": 0.717948717948718, |
|
"eval_loss": 2.82431697845459, |
|
"eval_runtime": 15.0775, |
|
"eval_samples_per_second": 5.173, |
|
"eval_steps_per_second": 1.326, |
|
"step": 6580 |
|
}, |
|
{ |
|
"epoch": 94.00096618357487, |
|
"grad_norm": 0.000210269630770199, |
|
"learning_rate": 4.036500268384327e-06, |
|
"loss": 0.0015, |
|
"step": 6590 |
|
}, |
|
{ |
|
"epoch": 94.00193236714976, |
|
"grad_norm": 0.00048051681369543076, |
|
"learning_rate": 4.025764895330113e-06, |
|
"loss": 0.1711, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 94.00289855072464, |
|
"grad_norm": 148.72152709960938, |
|
"learning_rate": 4.0150295222759e-06, |
|
"loss": 0.0117, |
|
"step": 6610 |
|
}, |
|
{ |
|
"epoch": 94.00386473429951, |
|
"grad_norm": 0.0029819430783391, |
|
"learning_rate": 4.004294149221686e-06, |
|
"loss": 0.0, |
|
"step": 6620 |
|
}, |
|
{ |
|
"epoch": 94.0048309178744, |
|
"grad_norm": 0.0169463362544775, |
|
"learning_rate": 3.9935587761674725e-06, |
|
"loss": 0.0001, |
|
"step": 6630 |
|
}, |
|
{ |
|
"epoch": 94.00579710144928, |
|
"grad_norm": 0.0007479405030608177, |
|
"learning_rate": 3.982823403113258e-06, |
|
"loss": 0.2424, |
|
"step": 6640 |
|
}, |
|
{ |
|
"epoch": 94.00676328502415, |
|
"grad_norm": 0.0004274906241334975, |
|
"learning_rate": 3.972088030059045e-06, |
|
"loss": 0.0, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 94.00676328502415, |
|
"eval_accuracy": 0.7051282051282052, |
|
"eval_loss": 1.9025695323944092, |
|
"eval_runtime": 14.9527, |
|
"eval_samples_per_second": 5.216, |
|
"eval_steps_per_second": 1.338, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 95.00096618357487, |
|
"grad_norm": 0.002755245892331004, |
|
"learning_rate": 3.961352657004831e-06, |
|
"loss": 0.0, |
|
"step": 6660 |
|
}, |
|
{ |
|
"epoch": 95.00193236714976, |
|
"grad_norm": 0.0005277282907627523, |
|
"learning_rate": 3.9506172839506175e-06, |
|
"loss": 0.0, |
|
"step": 6670 |
|
}, |
|
{ |
|
"epoch": 95.00289855072464, |
|
"grad_norm": 0.0004030791751574725, |
|
"learning_rate": 3.939881910896404e-06, |
|
"loss": 0.0, |
|
"step": 6680 |
|
}, |
|
{ |
|
"epoch": 95.00386473429951, |
|
"grad_norm": 0.0007481653010472655, |
|
"learning_rate": 3.92914653784219e-06, |
|
"loss": 0.0, |
|
"step": 6690 |
|
}, |
|
{ |
|
"epoch": 95.0048309178744, |
|
"grad_norm": 0.003996316809207201, |
|
"learning_rate": 3.918411164787977e-06, |
|
"loss": 0.0, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 95.00579710144928, |
|
"grad_norm": 0.0004657425743062049, |
|
"learning_rate": 3.907675791733763e-06, |
|
"loss": 0.0357, |
|
"step": 6710 |
|
}, |
|
{ |
|
"epoch": 95.00676328502415, |
|
"grad_norm": 0.0005334661109372973, |
|
"learning_rate": 3.89694041867955e-06, |
|
"loss": 0.0002, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 95.00676328502415, |
|
"eval_accuracy": 0.6666666666666666, |
|
"eval_loss": 2.236795663833618, |
|
"eval_runtime": 14.9418, |
|
"eval_samples_per_second": 5.22, |
|
"eval_steps_per_second": 1.339, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 96.00096618357487, |
|
"grad_norm": 0.000555448408704251, |
|
"learning_rate": 3.886205045625336e-06, |
|
"loss": 0.0, |
|
"step": 6730 |
|
}, |
|
{ |
|
"epoch": 96.00193236714976, |
|
"grad_norm": 0.22761359810829163, |
|
"learning_rate": 3.875469672571122e-06, |
|
"loss": 0.0009, |
|
"step": 6740 |
|
}, |
|
{ |
|
"epoch": 96.00289855072464, |
|
"grad_norm": 0.007025455124676228, |
|
"learning_rate": 3.864734299516908e-06, |
|
"loss": 0.0, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 96.00386473429951, |
|
"grad_norm": 0.00039805268170312047, |
|
"learning_rate": 3.853998926462695e-06, |
|
"loss": 0.0, |
|
"step": 6760 |
|
}, |
|
{ |
|
"epoch": 96.0048309178744, |
|
"grad_norm": 0.00040781483403407037, |
|
"learning_rate": 3.843263553408481e-06, |
|
"loss": 0.0, |
|
"step": 6770 |
|
}, |
|
{ |
|
"epoch": 96.00579710144928, |
|
"grad_norm": 0.0007238932885229588, |
|
"learning_rate": 3.832528180354268e-06, |
|
"loss": 0.0, |
|
"step": 6780 |
|
}, |
|
{ |
|
"epoch": 96.00676328502415, |
|
"grad_norm": 0.0008674617856740952, |
|
"learning_rate": 3.821792807300054e-06, |
|
"loss": 0.0998, |
|
"step": 6790 |
|
}, |
|
{ |
|
"epoch": 96.00676328502415, |
|
"eval_accuracy": 0.717948717948718, |
|
"eval_loss": 2.4984593391418457, |
|
"eval_runtime": 16.4073, |
|
"eval_samples_per_second": 4.754, |
|
"eval_steps_per_second": 1.219, |
|
"step": 6790 |
|
}, |
|
{ |
|
"epoch": 97.00096618357487, |
|
"grad_norm": 0.0004405768122524023, |
|
"learning_rate": 3.8110574342458405e-06, |
|
"loss": 0.0, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 97.00193236714976, |
|
"grad_norm": 0.001103090588003397, |
|
"learning_rate": 3.800322061191627e-06, |
|
"loss": 0.0, |
|
"step": 6810 |
|
}, |
|
{ |
|
"epoch": 97.00289855072464, |
|
"grad_norm": 0.0003414273087400943, |
|
"learning_rate": 3.7895866881374134e-06, |
|
"loss": 0.0, |
|
"step": 6820 |
|
}, |
|
{ |
|
"epoch": 97.00386473429951, |
|
"grad_norm": 0.00032865177490748465, |
|
"learning_rate": 3.7788513150831994e-06, |
|
"loss": 0.0, |
|
"step": 6830 |
|
}, |
|
{ |
|
"epoch": 97.0048309178744, |
|
"grad_norm": 0.0020416765473783016, |
|
"learning_rate": 3.768115942028986e-06, |
|
"loss": 0.0, |
|
"step": 6840 |
|
}, |
|
{ |
|
"epoch": 97.00579710144928, |
|
"grad_norm": 0.0008840957889333367, |
|
"learning_rate": 3.7573805689747723e-06, |
|
"loss": 0.0, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 97.00676328502415, |
|
"grad_norm": 0.000713666551746428, |
|
"learning_rate": 3.7466451959205584e-06, |
|
"loss": 0.0, |
|
"step": 6860 |
|
}, |
|
{ |
|
"epoch": 97.00676328502415, |
|
"eval_accuracy": 0.7435897435897436, |
|
"eval_loss": 2.0685932636260986, |
|
"eval_runtime": 17.7738, |
|
"eval_samples_per_second": 4.388, |
|
"eval_steps_per_second": 1.125, |
|
"step": 6860 |
|
}, |
|
{ |
|
"epoch": 98.00096618357487, |
|
"grad_norm": 0.00042999107972718775, |
|
"learning_rate": 3.735909822866345e-06, |
|
"loss": 0.0, |
|
"step": 6870 |
|
}, |
|
{ |
|
"epoch": 98.00193236714976, |
|
"grad_norm": 0.0008358382037840784, |
|
"learning_rate": 3.7251744498121313e-06, |
|
"loss": 0.0, |
|
"step": 6880 |
|
}, |
|
{ |
|
"epoch": 98.00289855072464, |
|
"grad_norm": 0.0005103634321130812, |
|
"learning_rate": 3.7144390767579173e-06, |
|
"loss": 0.0, |
|
"step": 6890 |
|
}, |
|
{ |
|
"epoch": 98.00386473429951, |
|
"grad_norm": 0.0003126839583273977, |
|
"learning_rate": 3.7037037037037037e-06, |
|
"loss": 0.0, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 98.0048309178744, |
|
"grad_norm": 0.00033128069480881095, |
|
"learning_rate": 3.69296833064949e-06, |
|
"loss": 0.0, |
|
"step": 6910 |
|
}, |
|
{ |
|
"epoch": 98.00579710144928, |
|
"grad_norm": 0.0004542351234704256, |
|
"learning_rate": 3.682232957595277e-06, |
|
"loss": 0.0002, |
|
"step": 6920 |
|
}, |
|
{ |
|
"epoch": 98.00676328502415, |
|
"grad_norm": 0.00022706468007527292, |
|
"learning_rate": 3.6714975845410635e-06, |
|
"loss": 0.0074, |
|
"step": 6930 |
|
}, |
|
{ |
|
"epoch": 98.00676328502415, |
|
"eval_accuracy": 0.7051282051282052, |
|
"eval_loss": 2.7098388671875, |
|
"eval_runtime": 22.2504, |
|
"eval_samples_per_second": 3.506, |
|
"eval_steps_per_second": 0.899, |
|
"step": 6930 |
|
}, |
|
{ |
|
"epoch": 99.00096618357487, |
|
"grad_norm": 0.0004110004520043731, |
|
"learning_rate": 3.6607622114868495e-06, |
|
"loss": 0.0, |
|
"step": 6940 |
|
}, |
|
{ |
|
"epoch": 99.00193236714976, |
|
"grad_norm": 0.0007272631046362221, |
|
"learning_rate": 3.650026838432636e-06, |
|
"loss": 0.0, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 99.00289855072464, |
|
"grad_norm": 0.0016168636502698064, |
|
"learning_rate": 3.6392914653784224e-06, |
|
"loss": 0.0, |
|
"step": 6960 |
|
}, |
|
{ |
|
"epoch": 99.00386473429951, |
|
"grad_norm": 0.0007112635066732764, |
|
"learning_rate": 3.6285560923242085e-06, |
|
"loss": 0.0, |
|
"step": 6970 |
|
}, |
|
{ |
|
"epoch": 99.0048309178744, |
|
"grad_norm": 0.00035714227124117315, |
|
"learning_rate": 3.617820719269995e-06, |
|
"loss": 0.0, |
|
"step": 6980 |
|
}, |
|
{ |
|
"epoch": 99.00579710144928, |
|
"grad_norm": 0.0005834894836880267, |
|
"learning_rate": 3.6070853462157814e-06, |
|
"loss": 0.0, |
|
"step": 6990 |
|
}, |
|
{ |
|
"epoch": 99.00676328502415, |
|
"grad_norm": 0.0008813188760541379, |
|
"learning_rate": 3.5963499731615674e-06, |
|
"loss": 0.0, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 99.00676328502415, |
|
"eval_accuracy": 0.7307692307692307, |
|
"eval_loss": 2.2764525413513184, |
|
"eval_runtime": 20.108, |
|
"eval_samples_per_second": 3.879, |
|
"eval_steps_per_second": 0.995, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 100.00096618357487, |
|
"grad_norm": 0.00035179706173948944, |
|
"learning_rate": 3.585614600107354e-06, |
|
"loss": 0.0, |
|
"step": 7010 |
|
}, |
|
{ |
|
"epoch": 100.00193236714976, |
|
"grad_norm": 0.0001833884307416156, |
|
"learning_rate": 3.5748792270531403e-06, |
|
"loss": 0.0, |
|
"step": 7020 |
|
}, |
|
{ |
|
"epoch": 100.00289855072464, |
|
"grad_norm": 0.0007354604313150048, |
|
"learning_rate": 3.5641438539989263e-06, |
|
"loss": 0.0, |
|
"step": 7030 |
|
}, |
|
{ |
|
"epoch": 100.00386473429951, |
|
"grad_norm": 0.0019487851532176137, |
|
"learning_rate": 3.553408480944713e-06, |
|
"loss": 0.0, |
|
"step": 7040 |
|
}, |
|
{ |
|
"epoch": 100.0048309178744, |
|
"grad_norm": 0.0004975392948836088, |
|
"learning_rate": 3.5426731078904997e-06, |
|
"loss": 0.0, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 100.00579710144928, |
|
"grad_norm": 0.00027223333017900586, |
|
"learning_rate": 3.531937734836286e-06, |
|
"loss": 0.0, |
|
"step": 7060 |
|
}, |
|
{ |
|
"epoch": 100.00676328502415, |
|
"grad_norm": 0.0006046319613233209, |
|
"learning_rate": 3.521202361782072e-06, |
|
"loss": 0.0, |
|
"step": 7070 |
|
}, |
|
{ |
|
"epoch": 100.00676328502415, |
|
"eval_accuracy": 0.7307692307692307, |
|
"eval_loss": 2.2792937755584717, |
|
"eval_runtime": 21.3442, |
|
"eval_samples_per_second": 3.654, |
|
"eval_steps_per_second": 0.937, |
|
"step": 7070 |
|
}, |
|
{ |
|
"epoch": 101.00096618357487, |
|
"grad_norm": 0.0004367881338112056, |
|
"learning_rate": 3.5104669887278586e-06, |
|
"loss": 0.0, |
|
"step": 7080 |
|
}, |
|
{ |
|
"epoch": 101.00193236714976, |
|
"grad_norm": 0.00014857234782539308, |
|
"learning_rate": 3.499731615673645e-06, |
|
"loss": 0.0, |
|
"step": 7090 |
|
}, |
|
{ |
|
"epoch": 101.00289855072464, |
|
"grad_norm": 0.000281750806607306, |
|
"learning_rate": 3.488996242619431e-06, |
|
"loss": 0.0, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 101.00386473429951, |
|
"grad_norm": 0.0006434857496060431, |
|
"learning_rate": 3.4782608695652175e-06, |
|
"loss": 0.0, |
|
"step": 7110 |
|
}, |
|
{ |
|
"epoch": 101.0048309178744, |
|
"grad_norm": 0.0004700597492046654, |
|
"learning_rate": 3.467525496511004e-06, |
|
"loss": 0.0, |
|
"step": 7120 |
|
}, |
|
{ |
|
"epoch": 101.00579710144928, |
|
"grad_norm": 0.0005693004932254553, |
|
"learning_rate": 3.4567901234567904e-06, |
|
"loss": 0.0, |
|
"step": 7130 |
|
}, |
|
{ |
|
"epoch": 101.00676328502415, |
|
"grad_norm": 0.003832819638773799, |
|
"learning_rate": 3.4460547504025764e-06, |
|
"loss": 0.0, |
|
"step": 7140 |
|
}, |
|
{ |
|
"epoch": 101.00676328502415, |
|
"eval_accuracy": 0.7307692307692307, |
|
"eval_loss": 2.2027432918548584, |
|
"eval_runtime": 19.1337, |
|
"eval_samples_per_second": 4.077, |
|
"eval_steps_per_second": 1.045, |
|
"step": 7140 |
|
}, |
|
{ |
|
"epoch": 102.00096618357487, |
|
"grad_norm": 0.0006276327185332775, |
|
"learning_rate": 3.4353193773483633e-06, |
|
"loss": 0.0, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 102.00193236714976, |
|
"grad_norm": 0.0005548395565710962, |
|
"learning_rate": 3.4245840042941498e-06, |
|
"loss": 0.0, |
|
"step": 7160 |
|
}, |
|
{ |
|
"epoch": 102.00289855072464, |
|
"grad_norm": 0.0007787638460285962, |
|
"learning_rate": 3.4138486312399362e-06, |
|
"loss": 0.0, |
|
"step": 7170 |
|
}, |
|
{ |
|
"epoch": 102.00386473429951, |
|
"grad_norm": 0.0005334399756975472, |
|
"learning_rate": 3.4031132581857222e-06, |
|
"loss": 0.0, |
|
"step": 7180 |
|
}, |
|
{ |
|
"epoch": 102.0048309178744, |
|
"grad_norm": 0.0004569404118228704, |
|
"learning_rate": 3.3923778851315087e-06, |
|
"loss": 0.0, |
|
"step": 7190 |
|
}, |
|
{ |
|
"epoch": 102.00579710144928, |
|
"grad_norm": 0.00034970356500707567, |
|
"learning_rate": 3.381642512077295e-06, |
|
"loss": 0.0, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 102.00676328502415, |
|
"grad_norm": 0.0002953490475192666, |
|
"learning_rate": 3.370907139023081e-06, |
|
"loss": 0.0, |
|
"step": 7210 |
|
}, |
|
{ |
|
"epoch": 102.00676328502415, |
|
"eval_accuracy": 0.7307692307692307, |
|
"eval_loss": 2.2387261390686035, |
|
"eval_runtime": 19.3569, |
|
"eval_samples_per_second": 4.03, |
|
"eval_steps_per_second": 1.033, |
|
"step": 7210 |
|
}, |
|
{ |
|
"epoch": 103.00096618357487, |
|
"grad_norm": 0.0010686102323234081, |
|
"learning_rate": 3.3601717659688676e-06, |
|
"loss": 0.0, |
|
"step": 7220 |
|
}, |
|
{ |
|
"epoch": 103.00193236714976, |
|
"grad_norm": 0.00021685549290850759, |
|
"learning_rate": 3.349436392914654e-06, |
|
"loss": 0.0, |
|
"step": 7230 |
|
}, |
|
{ |
|
"epoch": 103.00289855072464, |
|
"grad_norm": 0.0007019720505923033, |
|
"learning_rate": 3.33870101986044e-06, |
|
"loss": 0.0815, |
|
"step": 7240 |
|
}, |
|
{ |
|
"epoch": 103.00386473429951, |
|
"grad_norm": 0.030306629836559296, |
|
"learning_rate": 3.3279656468062265e-06, |
|
"loss": 0.0, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 103.0048309178744, |
|
"grad_norm": 0.0014363530790433288, |
|
"learning_rate": 3.317230273752013e-06, |
|
"loss": 0.1962, |
|
"step": 7260 |
|
}, |
|
{ |
|
"epoch": 103.00579710144928, |
|
"grad_norm": 0.00043242896208539605, |
|
"learning_rate": 3.3064949006978e-06, |
|
"loss": 0.0003, |
|
"step": 7270 |
|
}, |
|
{ |
|
"epoch": 103.00676328502415, |
|
"grad_norm": 0.0012811111519113183, |
|
"learning_rate": 3.295759527643586e-06, |
|
"loss": 0.0, |
|
"step": 7280 |
|
}, |
|
{ |
|
"epoch": 103.00676328502415, |
|
"eval_accuracy": 0.7307692307692307, |
|
"eval_loss": 2.197131395339966, |
|
"eval_runtime": 72.0117, |
|
"eval_samples_per_second": 1.083, |
|
"eval_steps_per_second": 0.278, |
|
"step": 7280 |
|
}, |
|
{ |
|
"epoch": 104.00096618357487, |
|
"grad_norm": 0.000573929282836616, |
|
"learning_rate": 3.2850241545893724e-06, |
|
"loss": 0.0, |
|
"step": 7290 |
|
}, |
|
{ |
|
"epoch": 104.00193236714976, |
|
"grad_norm": 0.01300587598234415, |
|
"learning_rate": 3.274288781535159e-06, |
|
"loss": 0.0, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 104.00289855072464, |
|
"grad_norm": 0.0005633372347801924, |
|
"learning_rate": 3.263553408480945e-06, |
|
"loss": 0.0, |
|
"step": 7310 |
|
}, |
|
{ |
|
"epoch": 104.00386473429951, |
|
"grad_norm": 0.0004255858657415956, |
|
"learning_rate": 3.2528180354267313e-06, |
|
"loss": 0.0, |
|
"step": 7320 |
|
}, |
|
{ |
|
"epoch": 104.0048309178744, |
|
"grad_norm": 0.000905704393517226, |
|
"learning_rate": 3.2420826623725177e-06, |
|
"loss": 0.0, |
|
"step": 7330 |
|
}, |
|
{ |
|
"epoch": 104.00579710144928, |
|
"grad_norm": 0.00022334480308927596, |
|
"learning_rate": 3.231347289318304e-06, |
|
"loss": 0.0, |
|
"step": 7340 |
|
}, |
|
{ |
|
"epoch": 104.00676328502415, |
|
"grad_norm": 0.0007362953037954867, |
|
"learning_rate": 3.22061191626409e-06, |
|
"loss": 0.0, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 104.00676328502415, |
|
"eval_accuracy": 0.717948717948718, |
|
"eval_loss": 2.324617862701416, |
|
"eval_runtime": 21.2746, |
|
"eval_samples_per_second": 3.666, |
|
"eval_steps_per_second": 0.94, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 105.00096618357487, |
|
"grad_norm": 0.0006282405229285359, |
|
"learning_rate": 3.2098765432098767e-06, |
|
"loss": 0.0, |
|
"step": 7360 |
|
}, |
|
{ |
|
"epoch": 105.00193236714976, |
|
"grad_norm": 0.00037624052492901683, |
|
"learning_rate": 3.199141170155663e-06, |
|
"loss": 0.0, |
|
"step": 7370 |
|
}, |
|
{ |
|
"epoch": 105.00289855072464, |
|
"grad_norm": 194.47848510742188, |
|
"learning_rate": 3.188405797101449e-06, |
|
"loss": 0.1884, |
|
"step": 7380 |
|
}, |
|
{ |
|
"epoch": 105.00386473429951, |
|
"grad_norm": 0.0003303911071270704, |
|
"learning_rate": 3.177670424047236e-06, |
|
"loss": 0.0002, |
|
"step": 7390 |
|
}, |
|
{ |
|
"epoch": 105.0048309178744, |
|
"grad_norm": 0.00046643143286928535, |
|
"learning_rate": 3.1669350509930225e-06, |
|
"loss": 0.0, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 105.00579710144928, |
|
"grad_norm": 0.0017922611441463232, |
|
"learning_rate": 3.156199677938809e-06, |
|
"loss": 0.0054, |
|
"step": 7410 |
|
}, |
|
{ |
|
"epoch": 105.00676328502415, |
|
"grad_norm": 0.0007599503733217716, |
|
"learning_rate": 3.145464304884595e-06, |
|
"loss": 0.0, |
|
"step": 7420 |
|
}, |
|
{ |
|
"epoch": 105.00676328502415, |
|
"eval_accuracy": 0.8076923076923077, |
|
"eval_loss": 1.5935262441635132, |
|
"eval_runtime": 17.2874, |
|
"eval_samples_per_second": 4.512, |
|
"eval_steps_per_second": 1.157, |
|
"step": 7420 |
|
}, |
|
{ |
|
"epoch": 106.00096618357487, |
|
"grad_norm": 0.0019118065247312188, |
|
"learning_rate": 3.1347289318303814e-06, |
|
"loss": 0.0011, |
|
"step": 7430 |
|
}, |
|
{ |
|
"epoch": 106.00193236714976, |
|
"grad_norm": 0.002028007060289383, |
|
"learning_rate": 3.123993558776168e-06, |
|
"loss": 0.2234, |
|
"step": 7440 |
|
}, |
|
{ |
|
"epoch": 106.00289855072464, |
|
"grad_norm": 0.002168497536331415, |
|
"learning_rate": 3.113258185721954e-06, |
|
"loss": 0.0, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 106.00386473429951, |
|
"grad_norm": 0.0005404739058576524, |
|
"learning_rate": 3.1025228126677403e-06, |
|
"loss": 0.0, |
|
"step": 7460 |
|
}, |
|
{ |
|
"epoch": 106.0048309178744, |
|
"grad_norm": 0.0015547404764220119, |
|
"learning_rate": 3.0917874396135268e-06, |
|
"loss": 0.0, |
|
"step": 7470 |
|
}, |
|
{ |
|
"epoch": 106.00579710144928, |
|
"grad_norm": 0.0010462005157023668, |
|
"learning_rate": 3.081052066559313e-06, |
|
"loss": 0.0, |
|
"step": 7480 |
|
}, |
|
{ |
|
"epoch": 106.00676328502415, |
|
"grad_norm": 0.001083563780412078, |
|
"learning_rate": 3.0703166935050992e-06, |
|
"loss": 0.0, |
|
"step": 7490 |
|
}, |
|
{ |
|
"epoch": 106.00676328502415, |
|
"eval_accuracy": 0.8076923076923077, |
|
"eval_loss": 1.4796079397201538, |
|
"eval_runtime": 20.0162, |
|
"eval_samples_per_second": 3.897, |
|
"eval_steps_per_second": 0.999, |
|
"step": 7490 |
|
}, |
|
{ |
|
"epoch": 107.00096618357487, |
|
"grad_norm": 0.0008712081471458077, |
|
"learning_rate": 3.059581320450886e-06, |
|
"loss": 0.0, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 107.00193236714976, |
|
"grad_norm": 0.0006524740601889789, |
|
"learning_rate": 3.0488459473966726e-06, |
|
"loss": 0.0, |
|
"step": 7510 |
|
}, |
|
{ |
|
"epoch": 107.00289855072464, |
|
"grad_norm": 0.001063215546309948, |
|
"learning_rate": 3.038110574342459e-06, |
|
"loss": 0.0, |
|
"step": 7520 |
|
}, |
|
{ |
|
"epoch": 107.00386473429951, |
|
"grad_norm": 0.0003702947578858584, |
|
"learning_rate": 3.027375201288245e-06, |
|
"loss": 0.0, |
|
"step": 7530 |
|
}, |
|
{ |
|
"epoch": 107.0048309178744, |
|
"grad_norm": 0.001226250664331019, |
|
"learning_rate": 3.0166398282340315e-06, |
|
"loss": 0.0043, |
|
"step": 7540 |
|
}, |
|
{ |
|
"epoch": 107.00579710144928, |
|
"grad_norm": 0.0021951072849333286, |
|
"learning_rate": 3.005904455179818e-06, |
|
"loss": 0.0, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 107.00676328502415, |
|
"grad_norm": 0.0006592991994693875, |
|
"learning_rate": 2.995169082125604e-06, |
|
"loss": 0.0001, |
|
"step": 7560 |
|
}, |
|
{ |
|
"epoch": 107.00676328502415, |
|
"eval_accuracy": 0.782051282051282, |
|
"eval_loss": 1.7051782608032227, |
|
"eval_runtime": 16.7621, |
|
"eval_samples_per_second": 4.653, |
|
"eval_steps_per_second": 1.193, |
|
"step": 7560 |
|
}, |
|
{ |
|
"epoch": 108.00096618357487, |
|
"grad_norm": 0.00048738584155216813, |
|
"learning_rate": 2.9844337090713904e-06, |
|
"loss": 0.0001, |
|
"step": 7570 |
|
}, |
|
{ |
|
"epoch": 108.00193236714976, |
|
"grad_norm": 0.0021665666718035936, |
|
"learning_rate": 2.973698336017177e-06, |
|
"loss": 0.0, |
|
"step": 7580 |
|
}, |
|
{ |
|
"epoch": 108.00289855072464, |
|
"grad_norm": 0.0005433819605968893, |
|
"learning_rate": 2.962962962962963e-06, |
|
"loss": 0.0, |
|
"step": 7590 |
|
}, |
|
{ |
|
"epoch": 108.00386473429951, |
|
"grad_norm": 0.0008435167255811393, |
|
"learning_rate": 2.9522275899087494e-06, |
|
"loss": 0.0, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 108.0048309178744, |
|
"grad_norm": 0.00032446393743157387, |
|
"learning_rate": 2.941492216854536e-06, |
|
"loss": 0.0, |
|
"step": 7610 |
|
}, |
|
{ |
|
"epoch": 108.00579710144928, |
|
"grad_norm": 0.00036059069680050015, |
|
"learning_rate": 2.9307568438003227e-06, |
|
"loss": 0.0, |
|
"step": 7620 |
|
}, |
|
{ |
|
"epoch": 108.00676328502415, |
|
"grad_norm": 0.0009860590798780322, |
|
"learning_rate": 2.9200214707461087e-06, |
|
"loss": 0.0, |
|
"step": 7630 |
|
}, |
|
{ |
|
"epoch": 108.00676328502415, |
|
"eval_accuracy": 0.8076923076923077, |
|
"eval_loss": 1.6021957397460938, |
|
"eval_runtime": 21.0408, |
|
"eval_samples_per_second": 3.707, |
|
"eval_steps_per_second": 0.951, |
|
"step": 7630 |
|
}, |
|
{ |
|
"epoch": 109.00096618357487, |
|
"grad_norm": 0.0004186592123005539, |
|
"learning_rate": 2.909286097691895e-06, |
|
"loss": 0.0, |
|
"step": 7640 |
|
}, |
|
{ |
|
"epoch": 109.00193236714976, |
|
"grad_norm": 0.0013808052754029632, |
|
"learning_rate": 2.8985507246376816e-06, |
|
"loss": 0.0, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 109.00289855072464, |
|
"grad_norm": 0.000451247877208516, |
|
"learning_rate": 2.8878153515834676e-06, |
|
"loss": 0.0, |
|
"step": 7660 |
|
}, |
|
{ |
|
"epoch": 109.00386473429951, |
|
"grad_norm": 0.0003690690209623426, |
|
"learning_rate": 2.877079978529254e-06, |
|
"loss": 0.0, |
|
"step": 7670 |
|
}, |
|
{ |
|
"epoch": 109.0048309178744, |
|
"grad_norm": 0.002147768158465624, |
|
"learning_rate": 2.8663446054750405e-06, |
|
"loss": 0.0, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 109.00579710144928, |
|
"grad_norm": 0.00025449952227063477, |
|
"learning_rate": 2.8556092324208266e-06, |
|
"loss": 0.0, |
|
"step": 7690 |
|
}, |
|
{ |
|
"epoch": 109.00676328502415, |
|
"grad_norm": 2.826117753982544, |
|
"learning_rate": 2.844873859366613e-06, |
|
"loss": 0.0002, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 109.00676328502415, |
|
"eval_accuracy": 0.8076923076923077, |
|
"eval_loss": 1.674877643585205, |
|
"eval_runtime": 16.7713, |
|
"eval_samples_per_second": 4.651, |
|
"eval_steps_per_second": 1.193, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 110.00096618357487, |
|
"grad_norm": 0.0015585910296067595, |
|
"learning_rate": 2.8341384863123995e-06, |
|
"loss": 0.0, |
|
"step": 7710 |
|
}, |
|
{ |
|
"epoch": 110.00193236714976, |
|
"grad_norm": 0.001015316229313612, |
|
"learning_rate": 2.823403113258186e-06, |
|
"loss": 0.0, |
|
"step": 7720 |
|
}, |
|
{ |
|
"epoch": 110.00289855072464, |
|
"grad_norm": 0.00042512797517701983, |
|
"learning_rate": 2.812667740203972e-06, |
|
"loss": 0.0, |
|
"step": 7730 |
|
}, |
|
{ |
|
"epoch": 110.00386473429951, |
|
"grad_norm": 0.0018657109467312694, |
|
"learning_rate": 2.801932367149759e-06, |
|
"loss": 0.0, |
|
"step": 7740 |
|
}, |
|
{ |
|
"epoch": 110.0048309178744, |
|
"grad_norm": 0.0018489286303520203, |
|
"learning_rate": 2.7911969940955453e-06, |
|
"loss": 0.0, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 110.00579710144928, |
|
"grad_norm": 0.0005694982828572392, |
|
"learning_rate": 2.7804616210413317e-06, |
|
"loss": 0.0, |
|
"step": 7760 |
|
}, |
|
{ |
|
"epoch": 110.00676328502415, |
|
"grad_norm": 0.0007545383996330202, |
|
"learning_rate": 2.7697262479871177e-06, |
|
"loss": 0.0, |
|
"step": 7770 |
|
}, |
|
{ |
|
"epoch": 110.00676328502415, |
|
"eval_accuracy": 0.7435897435897436, |
|
"eval_loss": 1.7948025465011597, |
|
"eval_runtime": 21.1507, |
|
"eval_samples_per_second": 3.688, |
|
"eval_steps_per_second": 0.946, |
|
"step": 7770 |
|
}, |
|
{ |
|
"epoch": 111.00096618357487, |
|
"grad_norm": 0.0008683151681907475, |
|
"learning_rate": 2.758990874932904e-06, |
|
"loss": 0.0, |
|
"step": 7780 |
|
}, |
|
{ |
|
"epoch": 111.00193236714976, |
|
"grad_norm": 0.0006877990090288222, |
|
"learning_rate": 2.7482555018786907e-06, |
|
"loss": 0.0, |
|
"step": 7790 |
|
}, |
|
{ |
|
"epoch": 111.00289855072464, |
|
"grad_norm": 0.000607641413807869, |
|
"learning_rate": 2.7375201288244767e-06, |
|
"loss": 0.0, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 111.00386473429951, |
|
"grad_norm": 0.0011498964158818126, |
|
"learning_rate": 2.726784755770263e-06, |
|
"loss": 0.0, |
|
"step": 7810 |
|
}, |
|
{ |
|
"epoch": 111.0048309178744, |
|
"grad_norm": 0.0019288529874756932, |
|
"learning_rate": 2.7160493827160496e-06, |
|
"loss": 0.0, |
|
"step": 7820 |
|
}, |
|
{ |
|
"epoch": 111.00579710144928, |
|
"grad_norm": 0.0004927438567392528, |
|
"learning_rate": 2.7053140096618356e-06, |
|
"loss": 0.0, |
|
"step": 7830 |
|
}, |
|
{ |
|
"epoch": 111.00676328502415, |
|
"grad_norm": 0.0006650699651800096, |
|
"learning_rate": 2.694578636607622e-06, |
|
"loss": 0.0, |
|
"step": 7840 |
|
}, |
|
{ |
|
"epoch": 111.00676328502415, |
|
"eval_accuracy": 0.7948717948717948, |
|
"eval_loss": 1.8455309867858887, |
|
"eval_runtime": 20.0998, |
|
"eval_samples_per_second": 3.881, |
|
"eval_steps_per_second": 0.995, |
|
"step": 7840 |
|
}, |
|
{ |
|
"epoch": 112.00096618357487, |
|
"grad_norm": 0.0009932676330208778, |
|
"learning_rate": 2.683843263553409e-06, |
|
"loss": 0.0, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 112.00193236714976, |
|
"grad_norm": 0.0004688102926593274, |
|
"learning_rate": 2.6731078904991954e-06, |
|
"loss": 0.0, |
|
"step": 7860 |
|
}, |
|
{ |
|
"epoch": 112.00289855072464, |
|
"grad_norm": 0.0006550211692228913, |
|
"learning_rate": 2.6623725174449814e-06, |
|
"loss": 0.0, |
|
"step": 7870 |
|
}, |
|
{ |
|
"epoch": 112.00386473429951, |
|
"grad_norm": 0.0024278289638459682, |
|
"learning_rate": 2.651637144390768e-06, |
|
"loss": 0.0, |
|
"step": 7880 |
|
}, |
|
{ |
|
"epoch": 112.0048309178744, |
|
"grad_norm": 0.0003039553703274578, |
|
"learning_rate": 2.6409017713365543e-06, |
|
"loss": 0.0, |
|
"step": 7890 |
|
}, |
|
{ |
|
"epoch": 112.00579710144928, |
|
"grad_norm": 0.0003849596541840583, |
|
"learning_rate": 2.6301663982823408e-06, |
|
"loss": 0.0, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 112.00676328502415, |
|
"grad_norm": 0.002140798605978489, |
|
"learning_rate": 2.6194310252281268e-06, |
|
"loss": 0.0, |
|
"step": 7910 |
|
}, |
|
{ |
|
"epoch": 112.00676328502415, |
|
"eval_accuracy": 0.7948717948717948, |
|
"eval_loss": 1.8600115776062012, |
|
"eval_runtime": 16.4137, |
|
"eval_samples_per_second": 4.752, |
|
"eval_steps_per_second": 1.218, |
|
"step": 7910 |
|
}, |
|
{ |
|
"epoch": 113.00096618357487, |
|
"grad_norm": 0.0019010236719623208, |
|
"learning_rate": 2.6086956521739132e-06, |
|
"loss": 0.0, |
|
"step": 7920 |
|
}, |
|
{ |
|
"epoch": 113.00193236714976, |
|
"grad_norm": 0.00038240986759774387, |
|
"learning_rate": 2.5979602791196997e-06, |
|
"loss": 0.0, |
|
"step": 7930 |
|
}, |
|
{ |
|
"epoch": 113.00289855072464, |
|
"grad_norm": 0.0014828953426331282, |
|
"learning_rate": 2.5872249060654857e-06, |
|
"loss": 0.0, |
|
"step": 7940 |
|
}, |
|
{ |
|
"epoch": 113.00386473429951, |
|
"grad_norm": 0.00037424080073833466, |
|
"learning_rate": 2.576489533011272e-06, |
|
"loss": 0.0, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 113.0048309178744, |
|
"grad_norm": 0.0005094591761007905, |
|
"learning_rate": 2.5657541599570586e-06, |
|
"loss": 0.0, |
|
"step": 7960 |
|
}, |
|
{ |
|
"epoch": 113.00579710144928, |
|
"grad_norm": 0.0018624026561155915, |
|
"learning_rate": 2.5550187869028455e-06, |
|
"loss": 0.0, |
|
"step": 7970 |
|
}, |
|
{ |
|
"epoch": 113.00676328502415, |
|
"grad_norm": 0.00043899298179894686, |
|
"learning_rate": 2.5442834138486315e-06, |
|
"loss": 0.0, |
|
"step": 7980 |
|
}, |
|
{ |
|
"epoch": 113.00676328502415, |
|
"eval_accuracy": 0.7948717948717948, |
|
"eval_loss": 1.8182809352874756, |
|
"eval_runtime": 16.1873, |
|
"eval_samples_per_second": 4.819, |
|
"eval_steps_per_second": 1.236, |
|
"step": 7980 |
|
}, |
|
{ |
|
"epoch": 114.00096618357487, |
|
"grad_norm": 0.0022914118599146605, |
|
"learning_rate": 2.533548040794418e-06, |
|
"loss": 0.0, |
|
"step": 7990 |
|
}, |
|
{ |
|
"epoch": 114.00193236714976, |
|
"grad_norm": 0.0003928690275643021, |
|
"learning_rate": 2.5228126677402044e-06, |
|
"loss": 0.0, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 114.00289855072464, |
|
"grad_norm": 0.0008931594784371555, |
|
"learning_rate": 2.5120772946859904e-06, |
|
"loss": 0.0, |
|
"step": 8010 |
|
}, |
|
{ |
|
"epoch": 114.00386473429951, |
|
"grad_norm": 0.00023587950272485614, |
|
"learning_rate": 2.501341921631777e-06, |
|
"loss": 0.0, |
|
"step": 8020 |
|
}, |
|
{ |
|
"epoch": 114.0048309178744, |
|
"grad_norm": 0.0005247633089311421, |
|
"learning_rate": 2.4906065485775633e-06, |
|
"loss": 0.0, |
|
"step": 8030 |
|
}, |
|
{ |
|
"epoch": 114.00579710144928, |
|
"grad_norm": 0.00018518311844673008, |
|
"learning_rate": 2.4798711755233494e-06, |
|
"loss": 0.0, |
|
"step": 8040 |
|
}, |
|
{ |
|
"epoch": 114.00676328502415, |
|
"grad_norm": 0.0007796736899763346, |
|
"learning_rate": 2.469135802469136e-06, |
|
"loss": 0.0, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 114.00676328502415, |
|
"eval_accuracy": 0.7948717948717948, |
|
"eval_loss": 1.7861602306365967, |
|
"eval_runtime": 16.6133, |
|
"eval_samples_per_second": 4.695, |
|
"eval_steps_per_second": 1.204, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 115.00096618357487, |
|
"grad_norm": 0.00030822984990663826, |
|
"learning_rate": 2.4584004294149223e-06, |
|
"loss": 0.0, |
|
"step": 8060 |
|
}, |
|
{ |
|
"epoch": 115.00193236714976, |
|
"grad_norm": 3.4960391521453857, |
|
"learning_rate": 2.4476650563607087e-06, |
|
"loss": 0.0001, |
|
"step": 8070 |
|
}, |
|
{ |
|
"epoch": 115.00289855072464, |
|
"grad_norm": 0.00040909554809331894, |
|
"learning_rate": 2.436929683306495e-06, |
|
"loss": 0.2293, |
|
"step": 8080 |
|
}, |
|
{ |
|
"epoch": 115.00386473429951, |
|
"grad_norm": 0.0004301641311030835, |
|
"learning_rate": 2.426194310252281e-06, |
|
"loss": 0.0, |
|
"step": 8090 |
|
}, |
|
{ |
|
"epoch": 115.0048309178744, |
|
"grad_norm": 0.00037556581082753837, |
|
"learning_rate": 2.4154589371980677e-06, |
|
"loss": 0.0, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 115.00579710144928, |
|
"grad_norm": 0.00048973155207932, |
|
"learning_rate": 2.4047235641438545e-06, |
|
"loss": 0.0, |
|
"step": 8110 |
|
}, |
|
{ |
|
"epoch": 115.00676328502415, |
|
"grad_norm": 0.00013631054025609046, |
|
"learning_rate": 2.3939881910896406e-06, |
|
"loss": 0.0, |
|
"step": 8120 |
|
}, |
|
{ |
|
"epoch": 115.00676328502415, |
|
"eval_accuracy": 0.782051282051282, |
|
"eval_loss": 1.8597110509872437, |
|
"eval_runtime": 16.1665, |
|
"eval_samples_per_second": 4.825, |
|
"eval_steps_per_second": 1.237, |
|
"step": 8120 |
|
}, |
|
{ |
|
"epoch": 116.00096618357487, |
|
"grad_norm": 0.00032639241544529796, |
|
"learning_rate": 2.383252818035427e-06, |
|
"loss": 0.0, |
|
"step": 8130 |
|
}, |
|
{ |
|
"epoch": 116.00193236714976, |
|
"grad_norm": 0.000251809018664062, |
|
"learning_rate": 2.3725174449812135e-06, |
|
"loss": 0.0, |
|
"step": 8140 |
|
}, |
|
{ |
|
"epoch": 116.00289855072464, |
|
"grad_norm": 0.00035566542646847665, |
|
"learning_rate": 2.3617820719269995e-06, |
|
"loss": 0.0, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 116.00386473429951, |
|
"grad_norm": 0.00013090217544231564, |
|
"learning_rate": 2.351046698872786e-06, |
|
"loss": 0.0, |
|
"step": 8160 |
|
}, |
|
{ |
|
"epoch": 116.0048309178744, |
|
"grad_norm": 0.0003571859560906887, |
|
"learning_rate": 2.3403113258185724e-06, |
|
"loss": 0.0, |
|
"step": 8170 |
|
}, |
|
{ |
|
"epoch": 116.00579710144928, |
|
"grad_norm": 0.0007861918420530856, |
|
"learning_rate": 2.329575952764359e-06, |
|
"loss": 0.0, |
|
"step": 8180 |
|
}, |
|
{ |
|
"epoch": 116.00676328502415, |
|
"grad_norm": 0.00013054760347586125, |
|
"learning_rate": 2.3188405797101453e-06, |
|
"loss": 0.0, |
|
"step": 8190 |
|
}, |
|
{ |
|
"epoch": 116.00676328502415, |
|
"eval_accuracy": 0.782051282051282, |
|
"eval_loss": 1.820298433303833, |
|
"eval_runtime": 19.7868, |
|
"eval_samples_per_second": 3.942, |
|
"eval_steps_per_second": 1.011, |
|
"step": 8190 |
|
}, |
|
{ |
|
"epoch": 117.00096618357487, |
|
"grad_norm": 0.0005784878740087152, |
|
"learning_rate": 2.3081052066559313e-06, |
|
"loss": 0.0, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 117.00193236714976, |
|
"grad_norm": 0.0004840072651859373, |
|
"learning_rate": 2.2973698336017178e-06, |
|
"loss": 0.0, |
|
"step": 8210 |
|
}, |
|
{ |
|
"epoch": 117.00289855072464, |
|
"grad_norm": 0.0005657835863530636, |
|
"learning_rate": 2.2866344605475042e-06, |
|
"loss": 0.0, |
|
"step": 8220 |
|
}, |
|
{ |
|
"epoch": 117.00386473429951, |
|
"grad_norm": 0.0003718466032296419, |
|
"learning_rate": 2.2758990874932907e-06, |
|
"loss": 0.0, |
|
"step": 8230 |
|
}, |
|
{ |
|
"epoch": 117.0048309178744, |
|
"grad_norm": 0.0004277382104191929, |
|
"learning_rate": 2.265163714439077e-06, |
|
"loss": 0.0, |
|
"step": 8240 |
|
}, |
|
{ |
|
"epoch": 117.00579710144928, |
|
"grad_norm": 0.005644855089485645, |
|
"learning_rate": 2.254428341384863e-06, |
|
"loss": 0.0, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 117.00676328502415, |
|
"grad_norm": 0.00029681739397346973, |
|
"learning_rate": 2.2436929683306496e-06, |
|
"loss": 0.0, |
|
"step": 8260 |
|
}, |
|
{ |
|
"epoch": 117.00676328502415, |
|
"eval_accuracy": 0.7948717948717948, |
|
"eval_loss": 1.8342883586883545, |
|
"eval_runtime": 17.1194, |
|
"eval_samples_per_second": 4.556, |
|
"eval_steps_per_second": 1.168, |
|
"step": 8260 |
|
}, |
|
{ |
|
"epoch": 118.00096618357487, |
|
"grad_norm": 0.00037026609061285853, |
|
"learning_rate": 2.232957595276436e-06, |
|
"loss": 0.0, |
|
"step": 8270 |
|
}, |
|
{ |
|
"epoch": 118.00193236714976, |
|
"grad_norm": 0.00031471162219531834, |
|
"learning_rate": 2.222222222222222e-06, |
|
"loss": 0.0, |
|
"step": 8280 |
|
}, |
|
{ |
|
"epoch": 118.00289855072464, |
|
"grad_norm": 0.00044593116035684943, |
|
"learning_rate": 2.211486849168009e-06, |
|
"loss": 0.0, |
|
"step": 8290 |
|
}, |
|
{ |
|
"epoch": 118.00386473429951, |
|
"grad_norm": 0.001200909842737019, |
|
"learning_rate": 2.2007514761137954e-06, |
|
"loss": 0.0, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 118.0048309178744, |
|
"grad_norm": 0.00038560116081498563, |
|
"learning_rate": 2.1900161030595814e-06, |
|
"loss": 0.0, |
|
"step": 8310 |
|
}, |
|
{ |
|
"epoch": 118.00579710144928, |
|
"grad_norm": 0.0014041089452803135, |
|
"learning_rate": 2.179280730005368e-06, |
|
"loss": 0.0, |
|
"step": 8320 |
|
}, |
|
{ |
|
"epoch": 118.00676328502415, |
|
"grad_norm": 0.0004680801066569984, |
|
"learning_rate": 2.1685453569511543e-06, |
|
"loss": 0.0, |
|
"step": 8330 |
|
}, |
|
{ |
|
"epoch": 118.00676328502415, |
|
"eval_accuracy": 0.7948717948717948, |
|
"eval_loss": 1.8416794538497925, |
|
"eval_runtime": 16.87, |
|
"eval_samples_per_second": 4.624, |
|
"eval_steps_per_second": 1.186, |
|
"step": 8330 |
|
}, |
|
{ |
|
"epoch": 119.00096618357487, |
|
"grad_norm": 0.0004902587970718741, |
|
"learning_rate": 2.1578099838969404e-06, |
|
"loss": 0.0, |
|
"step": 8340 |
|
}, |
|
{ |
|
"epoch": 119.00193236714976, |
|
"grad_norm": 91.31414794921875, |
|
"learning_rate": 2.1470746108427272e-06, |
|
"loss": 0.2185, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 119.00289855072464, |
|
"grad_norm": 0.00014069379540160298, |
|
"learning_rate": 2.1363392377885133e-06, |
|
"loss": 0.0, |
|
"step": 8360 |
|
}, |
|
{ |
|
"epoch": 119.00386473429951, |
|
"grad_norm": 0.000278379739029333, |
|
"learning_rate": 2.1256038647342997e-06, |
|
"loss": 0.0, |
|
"step": 8370 |
|
}, |
|
{ |
|
"epoch": 119.0048309178744, |
|
"grad_norm": 0.019783247262239456, |
|
"learning_rate": 2.114868491680086e-06, |
|
"loss": 0.0, |
|
"step": 8380 |
|
}, |
|
{ |
|
"epoch": 119.00579710144928, |
|
"grad_norm": 0.002190082333981991, |
|
"learning_rate": 2.104133118625872e-06, |
|
"loss": 0.0617, |
|
"step": 8390 |
|
}, |
|
{ |
|
"epoch": 119.00676328502415, |
|
"grad_norm": 0.0004942225059494376, |
|
"learning_rate": 2.093397745571659e-06, |
|
"loss": 0.0, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 119.00676328502415, |
|
"eval_accuracy": 0.782051282051282, |
|
"eval_loss": 1.7663315534591675, |
|
"eval_runtime": 20.9495, |
|
"eval_samples_per_second": 3.723, |
|
"eval_steps_per_second": 0.955, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 120.00096618357487, |
|
"grad_norm": 0.0003238871868234128, |
|
"learning_rate": 2.082662372517445e-06, |
|
"loss": 0.0, |
|
"step": 8410 |
|
}, |
|
{ |
|
"epoch": 120.00193236714976, |
|
"grad_norm": 0.00031754918745718896, |
|
"learning_rate": 2.0719269994632315e-06, |
|
"loss": 0.0, |
|
"step": 8420 |
|
}, |
|
{ |
|
"epoch": 120.00289855072464, |
|
"grad_norm": 0.0003926875942852348, |
|
"learning_rate": 2.061191626409018e-06, |
|
"loss": 0.0, |
|
"step": 8430 |
|
}, |
|
{ |
|
"epoch": 120.00386473429951, |
|
"grad_norm": 0.0003822804428637028, |
|
"learning_rate": 2.050456253354804e-06, |
|
"loss": 0.0, |
|
"step": 8440 |
|
}, |
|
{ |
|
"epoch": 120.0048309178744, |
|
"grad_norm": 0.0006702033570036292, |
|
"learning_rate": 2.0397208803005905e-06, |
|
"loss": 0.0, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 120.00579710144928, |
|
"grad_norm": 0.0023265674244612455, |
|
"learning_rate": 2.028985507246377e-06, |
|
"loss": 0.0, |
|
"step": 8460 |
|
}, |
|
{ |
|
"epoch": 120.00676328502415, |
|
"grad_norm": 0.0004378359590191394, |
|
"learning_rate": 2.0182501341921634e-06, |
|
"loss": 0.0, |
|
"step": 8470 |
|
}, |
|
{ |
|
"epoch": 120.00676328502415, |
|
"eval_accuracy": 0.782051282051282, |
|
"eval_loss": 1.9610668420791626, |
|
"eval_runtime": 23.002, |
|
"eval_samples_per_second": 3.391, |
|
"eval_steps_per_second": 0.869, |
|
"step": 8470 |
|
}, |
|
{ |
|
"epoch": 121.00096618357487, |
|
"grad_norm": 0.0014331090496852994, |
|
"learning_rate": 2.00751476113795e-06, |
|
"loss": 0.0, |
|
"step": 8480 |
|
}, |
|
{ |
|
"epoch": 121.00193236714976, |
|
"grad_norm": 0.0001426087401341647, |
|
"learning_rate": 1.9967793880837363e-06, |
|
"loss": 0.0, |
|
"step": 8490 |
|
}, |
|
{ |
|
"epoch": 121.00289855072464, |
|
"grad_norm": 0.00037875105044804513, |
|
"learning_rate": 1.9860440150295223e-06, |
|
"loss": 0.0, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 121.00386473429951, |
|
"grad_norm": 0.0009291688329540193, |
|
"learning_rate": 1.9753086419753087e-06, |
|
"loss": 0.0, |
|
"step": 8510 |
|
}, |
|
{ |
|
"epoch": 121.0048309178744, |
|
"grad_norm": 0.0003902267199009657, |
|
"learning_rate": 1.964573268921095e-06, |
|
"loss": 0.0, |
|
"step": 8520 |
|
}, |
|
{ |
|
"epoch": 121.00579710144928, |
|
"grad_norm": 0.00017865381960291415, |
|
"learning_rate": 1.9538378958668816e-06, |
|
"loss": 0.0, |
|
"step": 8530 |
|
}, |
|
{ |
|
"epoch": 121.00676328502415, |
|
"grad_norm": 0.0015924603212624788, |
|
"learning_rate": 1.943102522812668e-06, |
|
"loss": 0.0, |
|
"step": 8540 |
|
}, |
|
{ |
|
"epoch": 121.00676328502415, |
|
"eval_accuracy": 0.7948717948717948, |
|
"eval_loss": 1.9584029912948608, |
|
"eval_runtime": 22.508, |
|
"eval_samples_per_second": 3.465, |
|
"eval_steps_per_second": 0.889, |
|
"step": 8540 |
|
}, |
|
{ |
|
"epoch": 122.00096618357487, |
|
"grad_norm": 0.0007341349264606833, |
|
"learning_rate": 1.932367149758454e-06, |
|
"loss": 0.0, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 122.00193236714976, |
|
"grad_norm": 0.0006165707600302994, |
|
"learning_rate": 1.9216317767042406e-06, |
|
"loss": 0.0, |
|
"step": 8560 |
|
}, |
|
{ |
|
"epoch": 122.00289855072464, |
|
"grad_norm": 0.0003926469071302563, |
|
"learning_rate": 1.910896403650027e-06, |
|
"loss": 0.0015, |
|
"step": 8570 |
|
}, |
|
{ |
|
"epoch": 122.00386473429951, |
|
"grad_norm": 0.0003498373262118548, |
|
"learning_rate": 1.9001610305958135e-06, |
|
"loss": 0.0, |
|
"step": 8580 |
|
}, |
|
{ |
|
"epoch": 122.0048309178744, |
|
"grad_norm": 0.0002549053751863539, |
|
"learning_rate": 1.8894256575415997e-06, |
|
"loss": 0.0, |
|
"step": 8590 |
|
}, |
|
{ |
|
"epoch": 122.00579710144928, |
|
"grad_norm": 0.008167664520442486, |
|
"learning_rate": 1.8786902844873862e-06, |
|
"loss": 0.0, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 122.00676328502415, |
|
"grad_norm": 0.00036383105907589197, |
|
"learning_rate": 1.8679549114331724e-06, |
|
"loss": 0.0, |
|
"step": 8610 |
|
}, |
|
{ |
|
"epoch": 122.00676328502415, |
|
"eval_accuracy": 0.8205128205128205, |
|
"eval_loss": 1.5670628547668457, |
|
"eval_runtime": 20.8607, |
|
"eval_samples_per_second": 3.739, |
|
"eval_steps_per_second": 0.959, |
|
"step": 8610 |
|
}, |
|
{ |
|
"epoch": 123.00096618357487, |
|
"grad_norm": 0.0004942956147715449, |
|
"learning_rate": 1.8572195383789586e-06, |
|
"loss": 0.0, |
|
"step": 8620 |
|
}, |
|
{ |
|
"epoch": 123.00193236714976, |
|
"grad_norm": 0.0004234071238897741, |
|
"learning_rate": 1.846484165324745e-06, |
|
"loss": 0.0004, |
|
"step": 8630 |
|
}, |
|
{ |
|
"epoch": 123.00289855072464, |
|
"grad_norm": 0.0005698536988347769, |
|
"learning_rate": 1.8357487922705318e-06, |
|
"loss": 0.0, |
|
"step": 8640 |
|
}, |
|
{ |
|
"epoch": 123.00386473429951, |
|
"grad_norm": 0.0009004671010188758, |
|
"learning_rate": 1.825013419216318e-06, |
|
"loss": 0.0, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 123.0048309178744, |
|
"grad_norm": 0.000181853465619497, |
|
"learning_rate": 1.8142780461621042e-06, |
|
"loss": 0.0, |
|
"step": 8660 |
|
}, |
|
{ |
|
"epoch": 123.00579710144928, |
|
"grad_norm": 0.0006418362027034163, |
|
"learning_rate": 1.8035426731078907e-06, |
|
"loss": 0.0, |
|
"step": 8670 |
|
}, |
|
{ |
|
"epoch": 123.00676328502415, |
|
"grad_norm": 0.00031043903436511755, |
|
"learning_rate": 1.792807300053677e-06, |
|
"loss": 0.0, |
|
"step": 8680 |
|
}, |
|
{ |
|
"epoch": 123.00676328502415, |
|
"eval_accuracy": 0.7564102564102564, |
|
"eval_loss": 2.345607042312622, |
|
"eval_runtime": 24.4869, |
|
"eval_samples_per_second": 3.185, |
|
"eval_steps_per_second": 0.817, |
|
"step": 8680 |
|
}, |
|
{ |
|
"epoch": 124.00096618357487, |
|
"grad_norm": 0.00024151946126949042, |
|
"learning_rate": 1.7820719269994632e-06, |
|
"loss": 0.0, |
|
"step": 8690 |
|
}, |
|
{ |
|
"epoch": 124.00193236714976, |
|
"grad_norm": 0.00013747204502578825, |
|
"learning_rate": 1.7713365539452498e-06, |
|
"loss": 0.0, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 124.00289855072464, |
|
"grad_norm": 0.0007334667025133967, |
|
"learning_rate": 1.760601180891036e-06, |
|
"loss": 0.0, |
|
"step": 8710 |
|
}, |
|
{ |
|
"epoch": 124.00386473429951, |
|
"grad_norm": 0.0001992643519770354, |
|
"learning_rate": 1.7498658078368225e-06, |
|
"loss": 0.0, |
|
"step": 8720 |
|
}, |
|
{ |
|
"epoch": 124.0048309178744, |
|
"grad_norm": 0.007898527197539806, |
|
"learning_rate": 1.7391304347826088e-06, |
|
"loss": 0.0, |
|
"step": 8730 |
|
}, |
|
{ |
|
"epoch": 124.00579710144928, |
|
"grad_norm": 0.00034817663254216313, |
|
"learning_rate": 1.7283950617283952e-06, |
|
"loss": 0.0, |
|
"step": 8740 |
|
}, |
|
{ |
|
"epoch": 124.00676328502415, |
|
"grad_norm": 0.0022646754514425993, |
|
"learning_rate": 1.7176596886741817e-06, |
|
"loss": 0.0, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 124.00676328502415, |
|
"eval_accuracy": 0.7564102564102564, |
|
"eval_loss": 2.34527587890625, |
|
"eval_runtime": 17.9708, |
|
"eval_samples_per_second": 4.34, |
|
"eval_steps_per_second": 1.113, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 125.00096618357487, |
|
"grad_norm": 0.0003508002555463463, |
|
"learning_rate": 1.7069243156199681e-06, |
|
"loss": 0.0, |
|
"step": 8760 |
|
}, |
|
{ |
|
"epoch": 125.00193236714976, |
|
"grad_norm": 0.00030854917713440955, |
|
"learning_rate": 1.6961889425657543e-06, |
|
"loss": 0.0, |
|
"step": 8770 |
|
}, |
|
{ |
|
"epoch": 125.00289855072464, |
|
"grad_norm": 0.0007584840641357005, |
|
"learning_rate": 1.6854535695115406e-06, |
|
"loss": 0.0, |
|
"step": 8780 |
|
}, |
|
{ |
|
"epoch": 125.00386473429951, |
|
"grad_norm": 0.0006423608283512294, |
|
"learning_rate": 1.674718196457327e-06, |
|
"loss": 0.0, |
|
"step": 8790 |
|
}, |
|
{ |
|
"epoch": 125.0048309178744, |
|
"grad_norm": 0.00041385908843949437, |
|
"learning_rate": 1.6639828234031133e-06, |
|
"loss": 0.0, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 125.00579710144928, |
|
"grad_norm": 0.0002265427610836923, |
|
"learning_rate": 1.6532474503489e-06, |
|
"loss": 0.0, |
|
"step": 8810 |
|
}, |
|
{ |
|
"epoch": 125.00676328502415, |
|
"grad_norm": 0.00011521170381456614, |
|
"learning_rate": 1.6425120772946862e-06, |
|
"loss": 0.0, |
|
"step": 8820 |
|
}, |
|
{ |
|
"epoch": 125.00676328502415, |
|
"eval_accuracy": 0.7435897435897436, |
|
"eval_loss": 2.4119598865509033, |
|
"eval_runtime": 19.241, |
|
"eval_samples_per_second": 4.054, |
|
"eval_steps_per_second": 1.039, |
|
"step": 8820 |
|
}, |
|
{ |
|
"epoch": 126.00096618357487, |
|
"grad_norm": 0.00031076266895979643, |
|
"learning_rate": 1.6317767042404724e-06, |
|
"loss": 0.0, |
|
"step": 8830 |
|
}, |
|
{ |
|
"epoch": 126.00193236714976, |
|
"grad_norm": 0.0004146753635723144, |
|
"learning_rate": 1.6210413311862589e-06, |
|
"loss": 0.0, |
|
"step": 8840 |
|
}, |
|
{ |
|
"epoch": 126.00289855072464, |
|
"grad_norm": 0.00010951059084618464, |
|
"learning_rate": 1.610305958132045e-06, |
|
"loss": 0.0, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 126.00386473429951, |
|
"grad_norm": 0.0006361532141454518, |
|
"learning_rate": 1.5995705850778316e-06, |
|
"loss": 0.0, |
|
"step": 8860 |
|
}, |
|
{ |
|
"epoch": 126.0048309178744, |
|
"grad_norm": 0.00019192650506738573, |
|
"learning_rate": 1.588835212023618e-06, |
|
"loss": 0.0, |
|
"step": 8870 |
|
}, |
|
{ |
|
"epoch": 126.00579710144928, |
|
"grad_norm": 0.00010023318463936448, |
|
"learning_rate": 1.5780998389694045e-06, |
|
"loss": 0.0, |
|
"step": 8880 |
|
}, |
|
{ |
|
"epoch": 126.00676328502415, |
|
"grad_norm": 0.00033185991924256086, |
|
"learning_rate": 1.5673644659151907e-06, |
|
"loss": 0.0, |
|
"step": 8890 |
|
}, |
|
{ |
|
"epoch": 126.00676328502415, |
|
"eval_accuracy": 0.7435897435897436, |
|
"eval_loss": 2.3774123191833496, |
|
"eval_runtime": 20.0988, |
|
"eval_samples_per_second": 3.881, |
|
"eval_steps_per_second": 0.995, |
|
"step": 8890 |
|
}, |
|
{ |
|
"epoch": 127.00096618357487, |
|
"grad_norm": 0.00048786698607727885, |
|
"learning_rate": 1.556629092860977e-06, |
|
"loss": 0.0, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 127.00193236714976, |
|
"grad_norm": 0.0003051810781471431, |
|
"learning_rate": 1.5458937198067634e-06, |
|
"loss": 0.0, |
|
"step": 8910 |
|
}, |
|
{ |
|
"epoch": 127.00289855072464, |
|
"grad_norm": 0.0003112946287728846, |
|
"learning_rate": 1.5351583467525496e-06, |
|
"loss": 0.0, |
|
"step": 8920 |
|
}, |
|
{ |
|
"epoch": 127.00386473429951, |
|
"grad_norm": 0.00031099646002985537, |
|
"learning_rate": 1.5244229736983363e-06, |
|
"loss": 0.0, |
|
"step": 8930 |
|
}, |
|
{ |
|
"epoch": 127.0048309178744, |
|
"grad_norm": 0.0002164303296012804, |
|
"learning_rate": 1.5136876006441225e-06, |
|
"loss": 0.0, |
|
"step": 8940 |
|
}, |
|
{ |
|
"epoch": 127.00579710144928, |
|
"grad_norm": 0.0003160216729156673, |
|
"learning_rate": 1.502952227589909e-06, |
|
"loss": 0.0, |
|
"step": 8950 |
|
}, |
|
{ |
|
"epoch": 127.00676328502415, |
|
"grad_norm": 0.00017373952141497284, |
|
"learning_rate": 1.4922168545356952e-06, |
|
"loss": 0.0, |
|
"step": 8960 |
|
}, |
|
{ |
|
"epoch": 127.00676328502415, |
|
"eval_accuracy": 0.7435897435897436, |
|
"eval_loss": 2.3609466552734375, |
|
"eval_runtime": 23.3283, |
|
"eval_samples_per_second": 3.344, |
|
"eval_steps_per_second": 0.857, |
|
"step": 8960 |
|
}, |
|
{ |
|
"epoch": 128.0009661835749, |
|
"grad_norm": 0.0002286910021211952, |
|
"learning_rate": 1.4814814814814815e-06, |
|
"loss": 0.0, |
|
"step": 8970 |
|
}, |
|
{ |
|
"epoch": 128.00193236714975, |
|
"grad_norm": 0.0015229767886921763, |
|
"learning_rate": 1.470746108427268e-06, |
|
"loss": 0.0, |
|
"step": 8980 |
|
}, |
|
{ |
|
"epoch": 128.00289855072464, |
|
"grad_norm": 0.00010285177268087864, |
|
"learning_rate": 1.4600107353730544e-06, |
|
"loss": 0.0, |
|
"step": 8990 |
|
}, |
|
{ |
|
"epoch": 128.00386473429953, |
|
"grad_norm": 0.00026322112535126507, |
|
"learning_rate": 1.4492753623188408e-06, |
|
"loss": 0.0, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 128.0048309178744, |
|
"grad_norm": 0.00018546039063949138, |
|
"learning_rate": 1.438539989264627e-06, |
|
"loss": 0.0, |
|
"step": 9010 |
|
}, |
|
{ |
|
"epoch": 128.00579710144928, |
|
"grad_norm": 0.00019054964650422335, |
|
"learning_rate": 1.4278046162104133e-06, |
|
"loss": 0.0, |
|
"step": 9020 |
|
}, |
|
{ |
|
"epoch": 128.00676328502416, |
|
"grad_norm": 0.0009014433017000556, |
|
"learning_rate": 1.4170692431561997e-06, |
|
"loss": 0.0, |
|
"step": 9030 |
|
}, |
|
{ |
|
"epoch": 128.00676328502416, |
|
"eval_accuracy": 0.7564102564102564, |
|
"eval_loss": 2.353100061416626, |
|
"eval_runtime": 19.8354, |
|
"eval_samples_per_second": 3.932, |
|
"eval_steps_per_second": 1.008, |
|
"step": 9030 |
|
}, |
|
{ |
|
"epoch": 129.0009661835749, |
|
"grad_norm": 0.0002870812895707786, |
|
"learning_rate": 1.406333870101986e-06, |
|
"loss": 0.0, |
|
"step": 9040 |
|
}, |
|
{ |
|
"epoch": 129.00193236714975, |
|
"grad_norm": 0.00018334065680392087, |
|
"learning_rate": 1.3955984970477726e-06, |
|
"loss": 0.0, |
|
"step": 9050 |
|
}, |
|
{ |
|
"epoch": 129.00289855072464, |
|
"grad_norm": 0.00016877887537702918, |
|
"learning_rate": 1.3848631239935589e-06, |
|
"loss": 0.0, |
|
"step": 9060 |
|
}, |
|
{ |
|
"epoch": 129.00386473429953, |
|
"grad_norm": 0.0004532422171905637, |
|
"learning_rate": 1.3741277509393453e-06, |
|
"loss": 0.0, |
|
"step": 9070 |
|
}, |
|
{ |
|
"epoch": 129.0048309178744, |
|
"grad_norm": 0.0008466316503472626, |
|
"learning_rate": 1.3633923778851316e-06, |
|
"loss": 0.0, |
|
"step": 9080 |
|
}, |
|
{ |
|
"epoch": 129.00579710144928, |
|
"grad_norm": 0.00024063742603175342, |
|
"learning_rate": 1.3526570048309178e-06, |
|
"loss": 0.0, |
|
"step": 9090 |
|
}, |
|
{ |
|
"epoch": 129.00676328502416, |
|
"grad_norm": 0.0019017226295545697, |
|
"learning_rate": 1.3419216317767045e-06, |
|
"loss": 0.0, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 129.00676328502416, |
|
"eval_accuracy": 0.782051282051282, |
|
"eval_loss": 1.991046667098999, |
|
"eval_runtime": 19.7334, |
|
"eval_samples_per_second": 3.953, |
|
"eval_steps_per_second": 1.014, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 130.0009661835749, |
|
"grad_norm": 0.0004907360416837037, |
|
"learning_rate": 1.3311862587224907e-06, |
|
"loss": 0.0, |
|
"step": 9110 |
|
}, |
|
{ |
|
"epoch": 130.00193236714975, |
|
"grad_norm": 0.00020176426914986223, |
|
"learning_rate": 1.3204508856682772e-06, |
|
"loss": 0.0, |
|
"step": 9120 |
|
}, |
|
{ |
|
"epoch": 130.00289855072464, |
|
"grad_norm": 0.0003256254130974412, |
|
"learning_rate": 1.3097155126140634e-06, |
|
"loss": 0.0, |
|
"step": 9130 |
|
}, |
|
{ |
|
"epoch": 130.00386473429953, |
|
"grad_norm": 0.0005137431435286999, |
|
"learning_rate": 1.2989801395598498e-06, |
|
"loss": 0.0, |
|
"step": 9140 |
|
}, |
|
{ |
|
"epoch": 130.0048309178744, |
|
"grad_norm": 0.00027728540590032935, |
|
"learning_rate": 1.288244766505636e-06, |
|
"loss": 0.0, |
|
"step": 9150 |
|
}, |
|
{ |
|
"epoch": 130.00579710144928, |
|
"grad_norm": 0.00032328334054909647, |
|
"learning_rate": 1.2775093934514227e-06, |
|
"loss": 0.0, |
|
"step": 9160 |
|
}, |
|
{ |
|
"epoch": 130.00676328502416, |
|
"grad_norm": 0.0008982921717688441, |
|
"learning_rate": 1.266774020397209e-06, |
|
"loss": 0.0, |
|
"step": 9170 |
|
}, |
|
{ |
|
"epoch": 130.00676328502416, |
|
"eval_accuracy": 0.782051282051282, |
|
"eval_loss": 2.003160238265991, |
|
"eval_runtime": 20.5795, |
|
"eval_samples_per_second": 3.79, |
|
"eval_steps_per_second": 0.972, |
|
"step": 9170 |
|
}, |
|
{ |
|
"epoch": 131.0009661835749, |
|
"grad_norm": 0.0008950904011726379, |
|
"learning_rate": 1.2560386473429952e-06, |
|
"loss": 0.0, |
|
"step": 9180 |
|
}, |
|
{ |
|
"epoch": 131.00193236714975, |
|
"grad_norm": 0.00045019862591288984, |
|
"learning_rate": 1.2453032742887817e-06, |
|
"loss": 0.0, |
|
"step": 9190 |
|
}, |
|
{ |
|
"epoch": 131.00289855072464, |
|
"grad_norm": 0.00017776140884961933, |
|
"learning_rate": 1.234567901234568e-06, |
|
"loss": 0.0, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 131.00386473429953, |
|
"grad_norm": 0.0013761166483163834, |
|
"learning_rate": 1.2238325281803544e-06, |
|
"loss": 0.0, |
|
"step": 9210 |
|
}, |
|
{ |
|
"epoch": 131.0048309178744, |
|
"grad_norm": 0.00021241333161015064, |
|
"learning_rate": 1.2130971551261406e-06, |
|
"loss": 0.0, |
|
"step": 9220 |
|
}, |
|
{ |
|
"epoch": 131.00579710144928, |
|
"grad_norm": 0.025066981092095375, |
|
"learning_rate": 1.2023617820719273e-06, |
|
"loss": 0.0, |
|
"step": 9230 |
|
}, |
|
{ |
|
"epoch": 131.00676328502416, |
|
"grad_norm": 0.0006026471965014935, |
|
"learning_rate": 1.1916264090177135e-06, |
|
"loss": 0.0, |
|
"step": 9240 |
|
}, |
|
{ |
|
"epoch": 131.00676328502416, |
|
"eval_accuracy": 0.7692307692307693, |
|
"eval_loss": 2.0645220279693604, |
|
"eval_runtime": 20.8831, |
|
"eval_samples_per_second": 3.735, |
|
"eval_steps_per_second": 0.958, |
|
"step": 9240 |
|
}, |
|
{ |
|
"epoch": 132.0009661835749, |
|
"grad_norm": 0.00033965182956308126, |
|
"learning_rate": 1.1808910359634997e-06, |
|
"loss": 0.0, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 132.00193236714975, |
|
"grad_norm": 0.00014069851022213697, |
|
"learning_rate": 1.1701556629092862e-06, |
|
"loss": 0.0, |
|
"step": 9260 |
|
}, |
|
{ |
|
"epoch": 132.00289855072464, |
|
"grad_norm": 0.00030967529164627194, |
|
"learning_rate": 1.1594202898550726e-06, |
|
"loss": 0.0, |
|
"step": 9270 |
|
}, |
|
{ |
|
"epoch": 132.00386473429953, |
|
"grad_norm": 0.000856581493280828, |
|
"learning_rate": 1.1486849168008589e-06, |
|
"loss": 0.0, |
|
"step": 9280 |
|
}, |
|
{ |
|
"epoch": 132.0048309178744, |
|
"grad_norm": 0.0006151138222776353, |
|
"learning_rate": 1.1379495437466453e-06, |
|
"loss": 0.0, |
|
"step": 9290 |
|
}, |
|
{ |
|
"epoch": 132.00579710144928, |
|
"grad_norm": 0.00021990279492456466, |
|
"learning_rate": 1.1272141706924316e-06, |
|
"loss": 0.0, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 132.00676328502416, |
|
"grad_norm": 0.0009203171939589083, |
|
"learning_rate": 1.116478797638218e-06, |
|
"loss": 0.0, |
|
"step": 9310 |
|
}, |
|
{ |
|
"epoch": 132.00676328502416, |
|
"eval_accuracy": 0.7692307692307693, |
|
"eval_loss": 2.0597715377807617, |
|
"eval_runtime": 14.3229, |
|
"eval_samples_per_second": 5.446, |
|
"eval_steps_per_second": 1.396, |
|
"step": 9310 |
|
}, |
|
{ |
|
"epoch": 133.0009661835749, |
|
"grad_norm": 0.0005204367334954441, |
|
"learning_rate": 1.1057434245840045e-06, |
|
"loss": 0.0, |
|
"step": 9320 |
|
}, |
|
{ |
|
"epoch": 133.00193236714975, |
|
"grad_norm": 0.00021785238641314209, |
|
"learning_rate": 1.0950080515297907e-06, |
|
"loss": 0.0, |
|
"step": 9330 |
|
}, |
|
{ |
|
"epoch": 133.00289855072464, |
|
"grad_norm": 0.000165929232025519, |
|
"learning_rate": 1.0842726784755772e-06, |
|
"loss": 0.0, |
|
"step": 9340 |
|
}, |
|
{ |
|
"epoch": 133.00386473429953, |
|
"grad_norm": 0.00023569545010104775, |
|
"learning_rate": 1.0735373054213636e-06, |
|
"loss": 0.0, |
|
"step": 9350 |
|
}, |
|
{ |
|
"epoch": 133.0048309178744, |
|
"grad_norm": 0.00020854534523095936, |
|
"learning_rate": 1.0628019323671499e-06, |
|
"loss": 0.0, |
|
"step": 9360 |
|
}, |
|
{ |
|
"epoch": 133.00579710144928, |
|
"grad_norm": 0.000279417319688946, |
|
"learning_rate": 1.052066559312936e-06, |
|
"loss": 0.0, |
|
"step": 9370 |
|
}, |
|
{ |
|
"epoch": 133.00676328502416, |
|
"grad_norm": 0.0008123007137328386, |
|
"learning_rate": 1.0413311862587225e-06, |
|
"loss": 0.0, |
|
"step": 9380 |
|
}, |
|
{ |
|
"epoch": 133.00676328502416, |
|
"eval_accuracy": 0.7692307692307693, |
|
"eval_loss": 2.05936598777771, |
|
"eval_runtime": 15.7378, |
|
"eval_samples_per_second": 4.956, |
|
"eval_steps_per_second": 1.271, |
|
"step": 9380 |
|
}, |
|
{ |
|
"epoch": 134.0009661835749, |
|
"grad_norm": 0.0008277930901385844, |
|
"learning_rate": 1.030595813204509e-06, |
|
"loss": 0.0, |
|
"step": 9390 |
|
}, |
|
{ |
|
"epoch": 134.00193236714975, |
|
"grad_norm": 0.00015786287258379161, |
|
"learning_rate": 1.0198604401502952e-06, |
|
"loss": 0.0, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 134.00289855072464, |
|
"grad_norm": 0.00024702053633518517, |
|
"learning_rate": 1.0091250670960817e-06, |
|
"loss": 0.0, |
|
"step": 9410 |
|
}, |
|
{ |
|
"epoch": 134.00386473429953, |
|
"grad_norm": 0.0004332618846092373, |
|
"learning_rate": 9.983896940418681e-07, |
|
"loss": 0.0, |
|
"step": 9420 |
|
}, |
|
{ |
|
"epoch": 134.0048309178744, |
|
"grad_norm": 0.00013526807015296072, |
|
"learning_rate": 9.876543209876544e-07, |
|
"loss": 0.0, |
|
"step": 9430 |
|
}, |
|
{ |
|
"epoch": 134.00579710144928, |
|
"grad_norm": 0.0002207817160524428, |
|
"learning_rate": 9.769189479334408e-07, |
|
"loss": 0.0, |
|
"step": 9440 |
|
}, |
|
{ |
|
"epoch": 134.00676328502416, |
|
"grad_norm": 0.0002987273037433624, |
|
"learning_rate": 9.66183574879227e-07, |
|
"loss": 0.0, |
|
"step": 9450 |
|
}, |
|
{ |
|
"epoch": 134.00676328502416, |
|
"eval_accuracy": 0.7692307692307693, |
|
"eval_loss": 2.0567851066589355, |
|
"eval_runtime": 18.6289, |
|
"eval_samples_per_second": 4.187, |
|
"eval_steps_per_second": 1.074, |
|
"step": 9450 |
|
}, |
|
{ |
|
"epoch": 135.0009661835749, |
|
"grad_norm": 0.00019443879136815667, |
|
"learning_rate": 9.554482018250135e-07, |
|
"loss": 0.0, |
|
"step": 9460 |
|
}, |
|
{ |
|
"epoch": 135.00193236714975, |
|
"grad_norm": 0.0002544256276451051, |
|
"learning_rate": 9.447128287707999e-07, |
|
"loss": 0.0, |
|
"step": 9470 |
|
}, |
|
{ |
|
"epoch": 135.00289855072464, |
|
"grad_norm": 0.008219041861593723, |
|
"learning_rate": 9.339774557165862e-07, |
|
"loss": 0.0, |
|
"step": 9480 |
|
}, |
|
{ |
|
"epoch": 135.00386473429953, |
|
"grad_norm": 0.00030363400583155453, |
|
"learning_rate": 9.232420826623725e-07, |
|
"loss": 0.0, |
|
"step": 9490 |
|
}, |
|
{ |
|
"epoch": 135.0048309178744, |
|
"grad_norm": 0.00029681427986361086, |
|
"learning_rate": 9.12506709608159e-07, |
|
"loss": 0.0, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 135.00579710144928, |
|
"grad_norm": 0.00039895492955110967, |
|
"learning_rate": 9.017713365539453e-07, |
|
"loss": 0.0, |
|
"step": 9510 |
|
}, |
|
{ |
|
"epoch": 135.00676328502416, |
|
"grad_norm": 0.0014713755808770657, |
|
"learning_rate": 8.910359634997316e-07, |
|
"loss": 0.0, |
|
"step": 9520 |
|
}, |
|
{ |
|
"epoch": 135.00676328502416, |
|
"eval_accuracy": 0.782051282051282, |
|
"eval_loss": 2.0521583557128906, |
|
"eval_runtime": 16.9486, |
|
"eval_samples_per_second": 4.602, |
|
"eval_steps_per_second": 1.18, |
|
"step": 9520 |
|
}, |
|
{ |
|
"epoch": 136.0009661835749, |
|
"grad_norm": 0.0003343697462696582, |
|
"learning_rate": 8.80300590445518e-07, |
|
"loss": 0.0, |
|
"step": 9530 |
|
}, |
|
{ |
|
"epoch": 136.00193236714975, |
|
"grad_norm": 0.00029433504096232355, |
|
"learning_rate": 8.695652173913044e-07, |
|
"loss": 0.0, |
|
"step": 9540 |
|
}, |
|
{ |
|
"epoch": 136.00289855072464, |
|
"grad_norm": 0.0001634250656934455, |
|
"learning_rate": 8.588298443370908e-07, |
|
"loss": 0.0, |
|
"step": 9550 |
|
}, |
|
{ |
|
"epoch": 136.00386473429953, |
|
"grad_norm": 0.00036819910746999085, |
|
"learning_rate": 8.480944712828772e-07, |
|
"loss": 0.0, |
|
"step": 9560 |
|
}, |
|
{ |
|
"epoch": 136.0048309178744, |
|
"grad_norm": 0.0008551109349355102, |
|
"learning_rate": 8.373590982286635e-07, |
|
"loss": 0.0, |
|
"step": 9570 |
|
}, |
|
{ |
|
"epoch": 136.00579710144928, |
|
"grad_norm": 0.00022303566220216453, |
|
"learning_rate": 8.2662372517445e-07, |
|
"loss": 0.0, |
|
"step": 9580 |
|
}, |
|
{ |
|
"epoch": 136.00676328502416, |
|
"grad_norm": 0.0008858810761012137, |
|
"learning_rate": 8.158883521202362e-07, |
|
"loss": 0.0, |
|
"step": 9590 |
|
}, |
|
{ |
|
"epoch": 136.00676328502416, |
|
"eval_accuracy": 0.7564102564102564, |
|
"eval_loss": 1.9970703125, |
|
"eval_runtime": 16.5991, |
|
"eval_samples_per_second": 4.699, |
|
"eval_steps_per_second": 1.205, |
|
"step": 9590 |
|
}, |
|
{ |
|
"epoch": 137.0009661835749, |
|
"grad_norm": 0.0007048854604363441, |
|
"learning_rate": 8.051529790660226e-07, |
|
"loss": 0.0, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 137.00193236714975, |
|
"grad_norm": 0.00012740909005515277, |
|
"learning_rate": 7.94417606011809e-07, |
|
"loss": 0.0, |
|
"step": 9610 |
|
}, |
|
{ |
|
"epoch": 137.00289855072464, |
|
"grad_norm": 0.0002288664982188493, |
|
"learning_rate": 7.836822329575953e-07, |
|
"loss": 0.0, |
|
"step": 9620 |
|
}, |
|
{ |
|
"epoch": 137.00386473429953, |
|
"grad_norm": 0.00017713684064801782, |
|
"learning_rate": 7.729468599033817e-07, |
|
"loss": 0.0, |
|
"step": 9630 |
|
}, |
|
{ |
|
"epoch": 137.0048309178744, |
|
"grad_norm": 0.0006587646203115582, |
|
"learning_rate": 7.622114868491681e-07, |
|
"loss": 0.0, |
|
"step": 9640 |
|
}, |
|
{ |
|
"epoch": 137.00579710144928, |
|
"grad_norm": 9.106951620196924e-05, |
|
"learning_rate": 7.514761137949545e-07, |
|
"loss": 0.0, |
|
"step": 9650 |
|
}, |
|
{ |
|
"epoch": 137.00676328502416, |
|
"grad_norm": 0.00013192169717513025, |
|
"learning_rate": 7.407407407407407e-07, |
|
"loss": 0.0, |
|
"step": 9660 |
|
}, |
|
{ |
|
"epoch": 137.00676328502416, |
|
"eval_accuracy": 0.7564102564102564, |
|
"eval_loss": 1.997692346572876, |
|
"eval_runtime": 17.822, |
|
"eval_samples_per_second": 4.377, |
|
"eval_steps_per_second": 1.122, |
|
"step": 9660 |
|
}, |
|
{ |
|
"epoch": 138.0009661835749, |
|
"grad_norm": 0.0002446017460897565, |
|
"learning_rate": 7.300053676865272e-07, |
|
"loss": 0.0, |
|
"step": 9670 |
|
}, |
|
{ |
|
"epoch": 138.00193236714975, |
|
"grad_norm": 0.001299283467233181, |
|
"learning_rate": 7.192699946323135e-07, |
|
"loss": 0.0, |
|
"step": 9680 |
|
}, |
|
{ |
|
"epoch": 138.00289855072464, |
|
"grad_norm": 0.0001351604878436774, |
|
"learning_rate": 7.085346215780999e-07, |
|
"loss": 0.0, |
|
"step": 9690 |
|
}, |
|
{ |
|
"epoch": 138.00386473429953, |
|
"grad_norm": 0.0011268679518252611, |
|
"learning_rate": 6.977992485238863e-07, |
|
"loss": 0.0, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 138.0048309178744, |
|
"grad_norm": 0.0016840985044836998, |
|
"learning_rate": 6.870638754696727e-07, |
|
"loss": 0.0, |
|
"step": 9710 |
|
}, |
|
{ |
|
"epoch": 138.00579710144928, |
|
"grad_norm": 0.0004186121514067054, |
|
"learning_rate": 6.763285024154589e-07, |
|
"loss": 0.0, |
|
"step": 9720 |
|
}, |
|
{ |
|
"epoch": 138.00676328502416, |
|
"grad_norm": 9.602143109077588e-05, |
|
"learning_rate": 6.655931293612454e-07, |
|
"loss": 0.0, |
|
"step": 9730 |
|
}, |
|
{ |
|
"epoch": 138.00676328502416, |
|
"eval_accuracy": 0.7692307692307693, |
|
"eval_loss": 2.089564800262451, |
|
"eval_runtime": 14.6925, |
|
"eval_samples_per_second": 5.309, |
|
"eval_steps_per_second": 1.361, |
|
"step": 9730 |
|
}, |
|
{ |
|
"epoch": 139.0009661835749, |
|
"grad_norm": 0.0003402529691811651, |
|
"learning_rate": 6.548577563070317e-07, |
|
"loss": 0.0, |
|
"step": 9740 |
|
}, |
|
{ |
|
"epoch": 139.00193236714975, |
|
"grad_norm": 0.0008135391981340945, |
|
"learning_rate": 6.44122383252818e-07, |
|
"loss": 0.0, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 139.00289855072464, |
|
"grad_norm": 0.00022346228070091456, |
|
"learning_rate": 6.333870101986045e-07, |
|
"loss": 0.0, |
|
"step": 9760 |
|
}, |
|
{ |
|
"epoch": 139.00386473429953, |
|
"grad_norm": 0.0002428313164273277, |
|
"learning_rate": 6.226516371443908e-07, |
|
"loss": 0.0, |
|
"step": 9770 |
|
}, |
|
{ |
|
"epoch": 139.0048309178744, |
|
"grad_norm": 0.00042313747690059245, |
|
"learning_rate": 6.119162640901772e-07, |
|
"loss": 0.0, |
|
"step": 9780 |
|
}, |
|
{ |
|
"epoch": 139.00579710144928, |
|
"grad_norm": 0.00024634020519442856, |
|
"learning_rate": 6.011808910359636e-07, |
|
"loss": 0.0, |
|
"step": 9790 |
|
}, |
|
{ |
|
"epoch": 139.00676328502416, |
|
"grad_norm": 0.0003057706926483661, |
|
"learning_rate": 5.904455179817499e-07, |
|
"loss": 0.0, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 139.00676328502416, |
|
"eval_accuracy": 0.7692307692307693, |
|
"eval_loss": 2.1549952030181885, |
|
"eval_runtime": 14.6543, |
|
"eval_samples_per_second": 5.323, |
|
"eval_steps_per_second": 1.365, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 140.0009661835749, |
|
"grad_norm": 0.00018014867964666337, |
|
"learning_rate": 5.797101449275363e-07, |
|
"loss": 0.0, |
|
"step": 9810 |
|
}, |
|
{ |
|
"epoch": 140.00193236714975, |
|
"grad_norm": 0.0005324236699379981, |
|
"learning_rate": 5.689747718733227e-07, |
|
"loss": 0.0014, |
|
"step": 9820 |
|
}, |
|
{ |
|
"epoch": 140.00289855072464, |
|
"grad_norm": 0.00020519823010545224, |
|
"learning_rate": 5.58239398819109e-07, |
|
"loss": 0.0, |
|
"step": 9830 |
|
}, |
|
{ |
|
"epoch": 140.00386473429953, |
|
"grad_norm": 0.00020373713050503284, |
|
"learning_rate": 5.475040257648954e-07, |
|
"loss": 0.0, |
|
"step": 9840 |
|
}, |
|
{ |
|
"epoch": 140.0048309178744, |
|
"grad_norm": 0.00020074652275070548, |
|
"learning_rate": 5.367686527106818e-07, |
|
"loss": 0.0, |
|
"step": 9850 |
|
}, |
|
{ |
|
"epoch": 140.00579710144928, |
|
"grad_norm": 0.0001167950322269462, |
|
"learning_rate": 5.26033279656468e-07, |
|
"loss": 0.0, |
|
"step": 9860 |
|
}, |
|
{ |
|
"epoch": 140.00676328502416, |
|
"grad_norm": 0.0004204206052236259, |
|
"learning_rate": 5.152979066022545e-07, |
|
"loss": 0.0, |
|
"step": 9870 |
|
}, |
|
{ |
|
"epoch": 140.00676328502416, |
|
"eval_accuracy": 0.7692307692307693, |
|
"eval_loss": 2.1875460147857666, |
|
"eval_runtime": 13.9179, |
|
"eval_samples_per_second": 5.604, |
|
"eval_steps_per_second": 1.437, |
|
"step": 9870 |
|
}, |
|
{ |
|
"epoch": 141.0009661835749, |
|
"grad_norm": 0.0008608843781985343, |
|
"learning_rate": 5.045625335480408e-07, |
|
"loss": 0.0, |
|
"step": 9880 |
|
}, |
|
{ |
|
"epoch": 141.00193236714975, |
|
"grad_norm": 0.0035127594601362944, |
|
"learning_rate": 4.938271604938272e-07, |
|
"loss": 0.0, |
|
"step": 9890 |
|
}, |
|
{ |
|
"epoch": 141.00289855072464, |
|
"grad_norm": 0.00016304482414852828, |
|
"learning_rate": 4.830917874396135e-07, |
|
"loss": 0.0, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 141.00386473429953, |
|
"grad_norm": 0.00017550381016917527, |
|
"learning_rate": 4.7235641438539993e-07, |
|
"loss": 0.0, |
|
"step": 9910 |
|
}, |
|
{ |
|
"epoch": 141.0048309178744, |
|
"grad_norm": 0.00027925425092689693, |
|
"learning_rate": 4.616210413311863e-07, |
|
"loss": 0.0, |
|
"step": 9920 |
|
}, |
|
{ |
|
"epoch": 141.00579710144928, |
|
"grad_norm": 0.00017096209921874106, |
|
"learning_rate": 4.5088566827697267e-07, |
|
"loss": 0.0, |
|
"step": 9930 |
|
}, |
|
{ |
|
"epoch": 141.00676328502416, |
|
"grad_norm": 0.0006372460629791021, |
|
"learning_rate": 4.40150295222759e-07, |
|
"loss": 0.0, |
|
"step": 9940 |
|
}, |
|
{ |
|
"epoch": 141.00676328502416, |
|
"eval_accuracy": 0.7692307692307693, |
|
"eval_loss": 2.1873533725738525, |
|
"eval_runtime": 13.9433, |
|
"eval_samples_per_second": 5.594, |
|
"eval_steps_per_second": 1.434, |
|
"step": 9940 |
|
}, |
|
{ |
|
"epoch": 142.0009661835749, |
|
"grad_norm": 0.0002361346414545551, |
|
"learning_rate": 4.294149221685454e-07, |
|
"loss": 0.0, |
|
"step": 9950 |
|
}, |
|
{ |
|
"epoch": 142.00193236714975, |
|
"grad_norm": 0.0004928818088956177, |
|
"learning_rate": 4.1867954911433176e-07, |
|
"loss": 0.0, |
|
"step": 9960 |
|
}, |
|
{ |
|
"epoch": 142.00289855072464, |
|
"grad_norm": 0.00033226932282559574, |
|
"learning_rate": 4.079441760601181e-07, |
|
"loss": 0.0, |
|
"step": 9970 |
|
}, |
|
{ |
|
"epoch": 142.00386473429953, |
|
"grad_norm": 0.00016688242612872273, |
|
"learning_rate": 3.972088030059045e-07, |
|
"loss": 0.0, |
|
"step": 9980 |
|
}, |
|
{ |
|
"epoch": 142.0048309178744, |
|
"grad_norm": 0.0001479385537095368, |
|
"learning_rate": 3.8647342995169085e-07, |
|
"loss": 0.0, |
|
"step": 9990 |
|
}, |
|
{ |
|
"epoch": 142.00579710144928, |
|
"grad_norm": 0.00019251066260039806, |
|
"learning_rate": 3.7573805689747724e-07, |
|
"loss": 0.0, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 142.00676328502416, |
|
"grad_norm": 0.000183121781446971, |
|
"learning_rate": 3.650026838432636e-07, |
|
"loss": 0.0, |
|
"step": 10010 |
|
}, |
|
{ |
|
"epoch": 142.00676328502416, |
|
"eval_accuracy": 0.7692307692307693, |
|
"eval_loss": 2.1822385787963867, |
|
"eval_runtime": 14.308, |
|
"eval_samples_per_second": 5.452, |
|
"eval_steps_per_second": 1.398, |
|
"step": 10010 |
|
}, |
|
{ |
|
"epoch": 143.0009661835749, |
|
"grad_norm": 0.00021158168965484947, |
|
"learning_rate": 3.5426731078904993e-07, |
|
"loss": 0.0, |
|
"step": 10020 |
|
}, |
|
{ |
|
"epoch": 143.00193236714975, |
|
"grad_norm": 0.0001671000791247934, |
|
"learning_rate": 3.4353193773483633e-07, |
|
"loss": 0.0, |
|
"step": 10030 |
|
}, |
|
{ |
|
"epoch": 143.00289855072464, |
|
"grad_norm": 0.00016769721696618944, |
|
"learning_rate": 3.327965646806227e-07, |
|
"loss": 0.0, |
|
"step": 10040 |
|
}, |
|
{ |
|
"epoch": 143.00386473429953, |
|
"grad_norm": 0.00015122311015147716, |
|
"learning_rate": 3.22061191626409e-07, |
|
"loss": 0.0, |
|
"step": 10050 |
|
}, |
|
{ |
|
"epoch": 143.0048309178744, |
|
"grad_norm": 0.00033184216590598226, |
|
"learning_rate": 3.113258185721954e-07, |
|
"loss": 0.0, |
|
"step": 10060 |
|
}, |
|
{ |
|
"epoch": 143.00579710144928, |
|
"grad_norm": 0.0002991911314893514, |
|
"learning_rate": 3.005904455179818e-07, |
|
"loss": 0.0, |
|
"step": 10070 |
|
}, |
|
{ |
|
"epoch": 143.00676328502416, |
|
"grad_norm": 0.00013400911120697856, |
|
"learning_rate": 2.8985507246376816e-07, |
|
"loss": 0.0, |
|
"step": 10080 |
|
}, |
|
{ |
|
"epoch": 143.00676328502416, |
|
"eval_accuracy": 0.7692307692307693, |
|
"eval_loss": 2.181835412979126, |
|
"eval_runtime": 14.6353, |
|
"eval_samples_per_second": 5.33, |
|
"eval_steps_per_second": 1.367, |
|
"step": 10080 |
|
}, |
|
{ |
|
"epoch": 144.0009661835749, |
|
"grad_norm": 0.0001004399309749715, |
|
"learning_rate": 2.791196994095545e-07, |
|
"loss": 0.0, |
|
"step": 10090 |
|
}, |
|
{ |
|
"epoch": 144.00193236714975, |
|
"grad_norm": 0.00020495509670581669, |
|
"learning_rate": 2.683843263553409e-07, |
|
"loss": 0.0, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 144.00289855072464, |
|
"grad_norm": 0.00013104511890560389, |
|
"learning_rate": 2.5764895330112725e-07, |
|
"loss": 0.0, |
|
"step": 10110 |
|
}, |
|
{ |
|
"epoch": 144.00386473429953, |
|
"grad_norm": 0.0002047176967607811, |
|
"learning_rate": 2.469135802469136e-07, |
|
"loss": 0.0, |
|
"step": 10120 |
|
}, |
|
{ |
|
"epoch": 144.0048309178744, |
|
"grad_norm": 0.000391674431739375, |
|
"learning_rate": 2.3617820719269996e-07, |
|
"loss": 0.0, |
|
"step": 10130 |
|
}, |
|
{ |
|
"epoch": 144.00579710144928, |
|
"grad_norm": 0.0001891203864943236, |
|
"learning_rate": 2.2544283413848634e-07, |
|
"loss": 0.0, |
|
"step": 10140 |
|
}, |
|
{ |
|
"epoch": 144.00676328502416, |
|
"grad_norm": 0.00012977355800103396, |
|
"learning_rate": 2.147074610842727e-07, |
|
"loss": 0.0, |
|
"step": 10150 |
|
}, |
|
{ |
|
"epoch": 144.00676328502416, |
|
"eval_accuracy": 0.7692307692307693, |
|
"eval_loss": 2.1805622577667236, |
|
"eval_runtime": 13.9887, |
|
"eval_samples_per_second": 5.576, |
|
"eval_steps_per_second": 1.43, |
|
"step": 10150 |
|
}, |
|
{ |
|
"epoch": 145.0009661835749, |
|
"grad_norm": 0.0001362557668471709, |
|
"learning_rate": 2.0397208803005905e-07, |
|
"loss": 0.0, |
|
"step": 10160 |
|
}, |
|
{ |
|
"epoch": 145.00193236714975, |
|
"grad_norm": 0.0005237432778812945, |
|
"learning_rate": 1.9323671497584542e-07, |
|
"loss": 0.0, |
|
"step": 10170 |
|
}, |
|
{ |
|
"epoch": 145.00289855072464, |
|
"grad_norm": 0.0002967139007523656, |
|
"learning_rate": 1.825013419216318e-07, |
|
"loss": 0.0, |
|
"step": 10180 |
|
}, |
|
{ |
|
"epoch": 145.00386473429953, |
|
"grad_norm": 0.00015602614439558238, |
|
"learning_rate": 1.7176596886741817e-07, |
|
"loss": 0.0, |
|
"step": 10190 |
|
}, |
|
{ |
|
"epoch": 145.0048309178744, |
|
"grad_norm": 0.0002610948868095875, |
|
"learning_rate": 1.610305958132045e-07, |
|
"loss": 0.0, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 145.00579710144928, |
|
"grad_norm": 0.00014871751773171127, |
|
"learning_rate": 1.502952227589909e-07, |
|
"loss": 0.0, |
|
"step": 10210 |
|
}, |
|
{ |
|
"epoch": 145.00676328502416, |
|
"grad_norm": 0.00018696517508942634, |
|
"learning_rate": 1.3955984970477725e-07, |
|
"loss": 0.0, |
|
"step": 10220 |
|
}, |
|
{ |
|
"epoch": 145.00676328502416, |
|
"eval_accuracy": 0.7692307692307693, |
|
"eval_loss": 2.1803364753723145, |
|
"eval_runtime": 14.0396, |
|
"eval_samples_per_second": 5.556, |
|
"eval_steps_per_second": 1.425, |
|
"step": 10220 |
|
}, |
|
{ |
|
"epoch": 146.0009661835749, |
|
"grad_norm": 0.0006746940198354423, |
|
"learning_rate": 1.2882447665056362e-07, |
|
"loss": 0.0, |
|
"step": 10230 |
|
}, |
|
{ |
|
"epoch": 146.00193236714975, |
|
"grad_norm": 0.0010222846176475286, |
|
"learning_rate": 1.1808910359634998e-07, |
|
"loss": 0.0, |
|
"step": 10240 |
|
}, |
|
{ |
|
"epoch": 146.00289855072464, |
|
"grad_norm": 0.00016020600742194802, |
|
"learning_rate": 1.0735373054213635e-07, |
|
"loss": 0.0, |
|
"step": 10250 |
|
}, |
|
{ |
|
"epoch": 146.00386473429953, |
|
"grad_norm": 0.00013268800103105605, |
|
"learning_rate": 9.661835748792271e-08, |
|
"loss": 0.0, |
|
"step": 10260 |
|
}, |
|
{ |
|
"epoch": 146.0048309178744, |
|
"grad_norm": 0.0003070248058065772, |
|
"learning_rate": 8.588298443370908e-08, |
|
"loss": 0.0, |
|
"step": 10270 |
|
}, |
|
{ |
|
"epoch": 146.00579710144928, |
|
"grad_norm": 0.00037805610918439925, |
|
"learning_rate": 7.514761137949545e-08, |
|
"loss": 0.0, |
|
"step": 10280 |
|
}, |
|
{ |
|
"epoch": 146.00676328502416, |
|
"grad_norm": 0.0001392628182657063, |
|
"learning_rate": 6.441223832528181e-08, |
|
"loss": 0.0, |
|
"step": 10290 |
|
}, |
|
{ |
|
"epoch": 146.00676328502416, |
|
"eval_accuracy": 0.7692307692307693, |
|
"eval_loss": 2.180433988571167, |
|
"eval_runtime": 14.6632, |
|
"eval_samples_per_second": 5.319, |
|
"eval_steps_per_second": 1.364, |
|
"step": 10290 |
|
}, |
|
{ |
|
"epoch": 147.0009661835749, |
|
"grad_norm": 0.0004906918038614094, |
|
"learning_rate": 5.367686527106818e-08, |
|
"loss": 0.0, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 147.00193236714975, |
|
"grad_norm": 0.0002476818044669926, |
|
"learning_rate": 4.294149221685454e-08, |
|
"loss": 0.0, |
|
"step": 10310 |
|
}, |
|
{ |
|
"epoch": 147.00289855072464, |
|
"grad_norm": 0.0003119547909591347, |
|
"learning_rate": 3.2206119162640906e-08, |
|
"loss": 0.0, |
|
"step": 10320 |
|
}, |
|
{ |
|
"epoch": 147.00386473429953, |
|
"grad_norm": 0.00017419341020286083, |
|
"learning_rate": 2.147074610842727e-08, |
|
"loss": 0.0, |
|
"step": 10330 |
|
}, |
|
{ |
|
"epoch": 147.0048309178744, |
|
"grad_norm": 0.00013093346206005663, |
|
"learning_rate": 1.0735373054213635e-08, |
|
"loss": 0.0, |
|
"step": 10340 |
|
}, |
|
{ |
|
"epoch": 147.00579710144928, |
|
"grad_norm": 0.0008992272196337581, |
|
"learning_rate": 0.0, |
|
"loss": 0.0, |
|
"step": 10350 |
|
}, |
|
{ |
|
"epoch": 147.00579710144928, |
|
"eval_accuracy": 0.7692307692307693, |
|
"eval_loss": 2.1802780628204346, |
|
"eval_runtime": 14.9692, |
|
"eval_samples_per_second": 5.211, |
|
"eval_steps_per_second": 1.336, |
|
"step": 10350 |
|
}, |
|
{ |
|
"epoch": 147.00579710144928, |
|
"step": 10350, |
|
"total_flos": 1.7985204684824032e+20, |
|
"train_loss": 0.10165577027017988, |
|
"train_runtime": 21165.7287, |
|
"train_samples_per_second": 1.956, |
|
"train_steps_per_second": 0.489 |
|
}, |
|
{ |
|
"epoch": 147.00579710144928, |
|
"eval_accuracy": 0.8461538461538461, |
|
"eval_loss": 1.0547696352005005, |
|
"eval_runtime": 15.5882, |
|
"eval_samples_per_second": 5.004, |
|
"eval_steps_per_second": 1.283, |
|
"step": 10350 |
|
}, |
|
{ |
|
"epoch": 147.00579710144928, |
|
"eval_accuracy": 0.8461538461538461, |
|
"eval_loss": 1.054769515991211, |
|
"eval_runtime": 14.4609, |
|
"eval_samples_per_second": 5.394, |
|
"eval_steps_per_second": 1.383, |
|
"step": 10350 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 10350, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 9223372036854775807, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.7985204684824032e+20, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|