Upload folder using huggingface_hub

d01c286 verified 15 days ago

42 kB

	{
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 0.9968051118210862,
	"eval_steps": 59,
	"global_step": 234,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.004259850905218318,
	"grad_norm": 0.42660918831825256,
	"learning_rate": 2e-05,
	"loss": 1.4194,
	"step": 1
	},
	{
	"epoch": 0.004259850905218318,
	"eval_loss": 1.3981385231018066,
	"eval_runtime": 17.5749,
	"eval_samples_per_second": 22.475,
	"eval_steps_per_second": 2.845,
	"step": 1
	},
	{
	"epoch": 0.008519701810436636,
	"grad_norm": 0.38132771849632263,
	"learning_rate": 4e-05,
	"loss": 1.4291,
	"step": 2
	},
	{
	"epoch": 0.012779552715654952,
	"grad_norm": 0.4677501916885376,
	"learning_rate": 6e-05,
	"loss": 1.606,
	"step": 3
	},
	{
	"epoch": 0.01703940362087327,
	"grad_norm": 0.4839603900909424,
	"learning_rate": 8e-05,
	"loss": 1.5193,
	"step": 4
	},
	{
	"epoch": 0.021299254526091587,
	"grad_norm": 0.52900630235672,
	"learning_rate": 0.0001,
	"loss": 1.7253,
	"step": 5
	},
	{
	"epoch": 0.025559105431309903,
	"grad_norm": 0.4611320495605469,
	"learning_rate": 0.00012,
	"loss": 1.4042,
	"step": 6
	},
	{
	"epoch": 0.029818956336528223,
	"grad_norm": 0.5078997611999512,
	"learning_rate": 0.00014,
	"loss": 1.8641,
	"step": 7
	},
	{
	"epoch": 0.03407880724174654,
	"grad_norm": 0.5692968368530273,
	"learning_rate": 0.00016,
	"loss": 1.0603,
	"step": 8
	},
	{
	"epoch": 0.038338658146964855,
	"grad_norm": 0.5424911379814148,
	"learning_rate": 0.00018,
	"loss": 0.9217,
	"step": 9
	},
	{
	"epoch": 0.042598509052183174,
	"grad_norm": 0.6595712304115295,
	"learning_rate": 0.0002,
	"loss": 1.0443,
	"step": 10
	},
	{
	"epoch": 0.046858359957401494,
	"grad_norm": 0.552948534488678,
	"learning_rate": 0.00019999016517595753,
	"loss": 0.9727,
	"step": 11
	},
	{
	"epoch": 0.051118210862619806,
	"grad_norm": 0.523713231086731,
	"learning_rate": 0.00019996066263830531,
	"loss": 1.0042,
	"step": 12
	},
	{
	"epoch": 0.055378061767838126,
	"grad_norm": 0.3326718807220459,
	"learning_rate": 0.0001999114981900887,
	"loss": 0.6851,
	"step": 13
	},
	{
	"epoch": 0.059637912673056445,
	"grad_norm": 0.40246546268463135,
	"learning_rate": 0.00019984268150178167,
	"loss": 0.6865,
	"step": 14
	},
	{
	"epoch": 0.06389776357827476,
	"grad_norm": 0.3299888074398041,
	"learning_rate": 0.00019975422610938462,
	"loss": 0.6413,
	"step": 15
	},
	{
	"epoch": 0.06815761448349308,
	"grad_norm": 0.321532666683197,
	"learning_rate": 0.00019964614941176195,
	"loss": 0.6425,
	"step": 16
	},
	{
	"epoch": 0.0724174653887114,
	"grad_norm": 0.30551549792289734,
	"learning_rate": 0.0001995184726672197,
	"loss": 0.6573,
	"step": 17
	},
	{
	"epoch": 0.07667731629392971,
	"grad_norm": 0.3162730932235718,
	"learning_rate": 0.00019937122098932428,
	"loss": 0.7957,
	"step": 18
	},
	{
	"epoch": 0.08093716719914804,
	"grad_norm": 0.2646523714065552,
	"learning_rate": 0.00019920442334196248,
	"loss": 0.6842,
	"step": 19
	},
	{
	"epoch": 0.08519701810436635,
	"grad_norm": 0.35260164737701416,
	"learning_rate": 0.00019901811253364456,
	"loss": 0.7045,
	"step": 20
	},
	{
	"epoch": 0.08945686900958466,
	"grad_norm": 0.36994901299476624,
	"learning_rate": 0.00019881232521105089,
	"loss": 0.7506,
	"step": 21
	},
	{
	"epoch": 0.09371671991480299,
	"grad_norm": 0.3145638108253479,
	"learning_rate": 0.0001985871018518236,
	"loss": 0.6522,
	"step": 22
	},
	{
	"epoch": 0.0979765708200213,
	"grad_norm": 0.28740495443344116,
	"learning_rate": 0.00019834248675660486,
	"loss": 0.5763,
	"step": 23
	},
	{
	"epoch": 0.10223642172523961,
	"grad_norm": 0.29527685046195984,
	"learning_rate": 0.00019807852804032305,
	"loss": 0.8533,
	"step": 24
	},
	{
	"epoch": 0.10649627263045794,
	"grad_norm": 0.3023378849029541,
	"learning_rate": 0.00019779527762272877,
	"loss": 0.738,
	"step": 25
	},
	{
	"epoch": 0.11075612353567625,
	"grad_norm": 0.2749658524990082,
	"learning_rate": 0.00019749279121818235,
	"loss": 0.6354,
	"step": 26
	},
	{
	"epoch": 0.11501597444089456,
	"grad_norm": 0.3914307951927185,
	"learning_rate": 0.0001971711283246951,
	"loss": 0.8604,
	"step": 27
	},
	{
	"epoch": 0.11927582534611289,
	"grad_norm": 0.47873714566230774,
	"learning_rate": 0.00019683035221222618,
	"loss": 0.7972,
	"step": 28
	},
	{
	"epoch": 0.1235356762513312,
	"grad_norm": 0.22174575924873352,
	"learning_rate": 0.0001964705299102376,
	"loss": 0.4385,
	"step": 29
	},
	{
	"epoch": 0.12779552715654952,
	"grad_norm": 0.244963139295578,
	"learning_rate": 0.00019609173219450998,
	"loss": 0.7168,
	"step": 30
	},
	{
	"epoch": 0.13205537806176784,
	"grad_norm": 0.32758575677871704,
	"learning_rate": 0.0001956940335732209,
	"loss": 0.7231,
	"step": 31
	},
	{
	"epoch": 0.13631522896698617,
	"grad_norm": 0.21992172300815582,
	"learning_rate": 0.00019527751227228963,
	"loss": 0.662,
	"step": 32
	},
	{
	"epoch": 0.14057507987220447,
	"grad_norm": 0.2899262309074402,
	"learning_rate": 0.0001948422502199903,
	"loss": 0.4651,
	"step": 33
	},
	{
	"epoch": 0.1448349307774228,
	"grad_norm": 0.23878340423107147,
	"learning_rate": 0.00019438833303083678,
	"loss": 0.5367,
	"step": 34
	},
	{
	"epoch": 0.14909478168264112,
	"grad_norm": 0.20475314557552338,
	"learning_rate": 0.0001939158499887428,
	"loss": 0.4024,
	"step": 35
	},
	{
	"epoch": 0.15335463258785942,
	"grad_norm": 0.25068745017051697,
	"learning_rate": 0.00019342489402945998,
	"loss": 0.6575,
	"step": 36
	},
	{
	"epoch": 0.15761448349307774,
	"grad_norm": 0.3811924159526825,
	"learning_rate": 0.00019291556172229785,
	"loss": 0.6405,
	"step": 37
	},
	{
	"epoch": 0.16187433439829607,
	"grad_norm": 0.2627577483654022,
	"learning_rate": 0.0001923879532511287,
	"loss": 0.6961,
	"step": 38
	},
	{
	"epoch": 0.16613418530351437,
	"grad_norm": 0.32665154337882996,
	"learning_rate": 0.00019184217239468212,
	"loss": 0.6983,
	"step": 39
	},
	{
	"epoch": 0.1703940362087327,
	"grad_norm": 0.24597743153572083,
	"learning_rate": 0.00019127832650613189,
	"loss": 0.54,
	"step": 40
	},
	{
	"epoch": 0.17465388711395102,
	"grad_norm": 0.2611660361289978,
	"learning_rate": 0.00019069652649198005,
	"loss": 0.6281,
	"step": 41
	},
	{
	"epoch": 0.17891373801916932,
	"grad_norm": 0.2969326078891754,
	"learning_rate": 0.0001900968867902419,
	"loss": 0.6817,
	"step": 42
	},
	{
	"epoch": 0.18317358892438765,
	"grad_norm": 0.27561935782432556,
	"learning_rate": 0.00018947952534793661,
	"loss": 0.626,
	"step": 43
	},
	{
	"epoch": 0.18743343982960597,
	"grad_norm": 0.33468887209892273,
	"learning_rate": 0.00018884456359788724,
	"loss": 0.7383,
	"step": 44
	},
	{
	"epoch": 0.19169329073482427,
	"grad_norm": 0.2937297224998474,
	"learning_rate": 0.0001881921264348355,
	"loss": 0.6972,
	"step": 45
	},
	{
	"epoch": 0.1959531416400426,
	"grad_norm": 0.33218011260032654,
	"learning_rate": 0.00018752234219087538,
	"loss": 0.6749,
	"step": 46
	},
	{
	"epoch": 0.20021299254526093,
	"grad_norm": 0.2661404311656952,
	"learning_rate": 0.00018683534261021057,
	"loss": 0.4882,
	"step": 47
	},
	{
	"epoch": 0.20447284345047922,
	"grad_norm": 0.2451002150774002,
	"learning_rate": 0.00018613126282324092,
	"loss": 0.637,
	"step": 48
	},
	{
	"epoch": 0.20873269435569755,
	"grad_norm": 0.27517661452293396,
	"learning_rate": 0.00018541024131998274,
	"loss": 0.5483,
	"step": 49
	},
	{
	"epoch": 0.21299254526091588,
	"grad_norm": 0.24373459815979004,
	"learning_rate": 0.00018467241992282843,
	"loss": 0.5112,
	"step": 50
	},
	{
	"epoch": 0.21725239616613418,
	"grad_norm": 0.3239864408969879,
	"learning_rate": 0.00018391794375865024,
	"loss": 0.8005,
	"step": 51
	},
	{
	"epoch": 0.2215122470713525,
	"grad_norm": 0.29262682795524597,
	"learning_rate": 0.00018314696123025454,
	"loss": 0.6769,
	"step": 52
	},
	{
	"epoch": 0.22577209797657083,
	"grad_norm": 0.28277888894081116,
	"learning_rate": 0.00018235962398719147,
	"loss": 0.6892,
	"step": 53
	},
	{
	"epoch": 0.23003194888178913,
	"grad_norm": 0.41741546988487244,
	"learning_rate": 0.00018155608689592604,
	"loss": 0.6763,
	"step": 54
	},
	{
	"epoch": 0.23429179978700745,
	"grad_norm": 0.2734082043170929,
	"learning_rate": 0.00018073650800937624,
	"loss": 0.697,
	"step": 55
	},
	{
	"epoch": 0.23855165069222578,
	"grad_norm": 0.2646290957927704,
	"learning_rate": 0.00017990104853582493,
	"loss": 0.5936,
	"step": 56
	},
	{
	"epoch": 0.24281150159744408,
	"grad_norm": 0.27723610401153564,
	"learning_rate": 0.00017904987280721035,
	"loss": 0.5875,
	"step": 57
	},
	{
	"epoch": 0.2470713525026624,
	"grad_norm": 0.2668153643608093,
	"learning_rate": 0.000178183148246803,
	"loss": 0.5219,
	"step": 58
	},
	{
	"epoch": 0.25133120340788073,
	"grad_norm": 0.29033368825912476,
	"learning_rate": 0.0001773010453362737,
	"loss": 0.5997,
	"step": 59
	},
	{
	"epoch": 0.25133120340788073,
	"eval_loss": 0.5784963965415955,
	"eval_runtime": 17.4317,
	"eval_samples_per_second": 22.66,
	"eval_steps_per_second": 2.868,
	"step": 59
	},
	{
	"epoch": 0.25559105431309903,
	"grad_norm": 0.2783537209033966,
	"learning_rate": 0.00017640373758216077,
	"loss": 0.483,
	"step": 60
	},
	{
	"epoch": 0.2598509052183174,
	"grad_norm": 0.31082215905189514,
	"learning_rate": 0.0001754914014817416,
	"loss": 0.6473,
	"step": 61
	},
	{
	"epoch": 0.2641107561235357,
	"grad_norm": 0.3206618130207062,
	"learning_rate": 0.00017456421648831655,
	"loss": 0.6289,
	"step": 62
	},
	{
	"epoch": 0.268370607028754,
	"grad_norm": 0.2875254154205322,
	"learning_rate": 0.00017362236497591094,
	"loss": 0.594,
	"step": 63
	},
	{
	"epoch": 0.27263045793397234,
	"grad_norm": 0.22950579226016998,
	"learning_rate": 0.0001726660322034027,
	"loss": 0.3886,
	"step": 64
	},
	{
	"epoch": 0.27689030883919064,
	"grad_norm": 0.24293649196624756,
	"learning_rate": 0.00017169540627808274,
	"loss": 0.6129,
	"step": 65
	},
	{
	"epoch": 0.28115015974440893,
	"grad_norm": 0.2611636519432068,
	"learning_rate": 0.00017071067811865476,
	"loss": 0.6891,
	"step": 66
	},
	{
	"epoch": 0.2854100106496273,
	"grad_norm": 0.284407377243042,
	"learning_rate": 0.00016971204141768233,
	"loss": 0.516,
	"step": 67
	},
	{
	"epoch": 0.2896698615548456,
	"grad_norm": 0.21485944092273712,
	"learning_rate": 0.00016869969260349018,
	"loss": 0.3826,
	"step": 68
	},
	{
	"epoch": 0.2939297124600639,
	"grad_norm": 0.29337963461875916,
	"learning_rate": 0.00016767383080152742,
	"loss": 0.5696,
	"step": 69
	},
	{
	"epoch": 0.29818956336528224,
	"grad_norm": 0.27099764347076416,
	"learning_rate": 0.0001666346577952004,
	"loss": 0.4708,
	"step": 70
	},
	{
	"epoch": 0.30244941427050054,
	"grad_norm": 0.29055824875831604,
	"learning_rate": 0.00016558237798618245,
	"loss": 0.5844,
	"step": 71
	},
	{
	"epoch": 0.30670926517571884,
	"grad_norm": 0.22874757647514343,
	"learning_rate": 0.00016451719835420877,
	"loss": 0.4412,
	"step": 72
	},
	{
	"epoch": 0.3109691160809372,
	"grad_norm": 0.2926221489906311,
	"learning_rate": 0.00016343932841636456,
	"loss": 0.5757,
	"step": 73
	},
	{
	"epoch": 0.3152289669861555,
	"grad_norm": 0.30070438981056213,
	"learning_rate": 0.00016234898018587337,
	"loss": 0.6063,
	"step": 74
	},
	{
	"epoch": 0.3194888178913738,
	"grad_norm": 0.2475481927394867,
	"learning_rate": 0.00016124636813039502,
	"loss": 0.5056,
	"step": 75
	},
	{
	"epoch": 0.32374866879659214,
	"grad_norm": 0.2851349711418152,
	"learning_rate": 0.00016013170912984058,
	"loss": 0.7547,
	"step": 76
	},
	{
	"epoch": 0.32800851970181044,
	"grad_norm": 0.25569260120391846,
	"learning_rate": 0.00015900522243371282,
	"loss": 0.5168,
	"step": 77
	},
	{
	"epoch": 0.33226837060702874,
	"grad_norm": 0.3774610757827759,
	"learning_rate": 0.0001578671296179806,
	"loss": 0.6691,
	"step": 78
	},
	{
	"epoch": 0.3365282215122471,
	"grad_norm": 0.2339468151330948,
	"learning_rate": 0.00015671765454149559,
	"loss": 0.5021,
	"step": 79
	},
	{
	"epoch": 0.3407880724174654,
	"grad_norm": 0.3066350519657135,
	"learning_rate": 0.00015555702330196023,
	"loss": 0.6838,
	"step": 80
	},
	{
	"epoch": 0.3450479233226837,
	"grad_norm": 0.271908164024353,
	"learning_rate": 0.00015438546419145488,
	"loss": 0.4837,
	"step": 81
	},
	{
	"epoch": 0.34930777422790205,
	"grad_norm": 0.304290771484375,
	"learning_rate": 0.00015320320765153367,
	"loss": 0.6768,
	"step": 82
	},
	{
	"epoch": 0.35356762513312034,
	"grad_norm": 0.25685280561447144,
	"learning_rate": 0.00015201048622789747,
	"loss": 0.4335,
	"step": 83
	},
	{
	"epoch": 0.35782747603833864,
	"grad_norm": 0.3003567159175873,
	"learning_rate": 0.00015080753452465296,
	"loss": 0.5836,
	"step": 84
	},
	{
	"epoch": 0.362087326943557,
	"grad_norm": 0.2585873007774353,
	"learning_rate": 0.0001495945891581668,
	"loss": 0.5391,
	"step": 85
	},
	{
	"epoch": 0.3663471778487753,
	"grad_norm": 0.30791282653808594,
	"learning_rate": 0.000148371888710524,
	"loss": 0.5103,
	"step": 86
	},
	{
	"epoch": 0.3706070287539936,
	"grad_norm": 0.23016773164272308,
	"learning_rate": 0.0001471396736825998,
	"loss": 0.4269,
	"step": 87
	},
	{
	"epoch": 0.37486687965921195,
	"grad_norm": 0.3137454390525818,
	"learning_rate": 0.00014589818644675378,
	"loss": 0.5116,
	"step": 88
	},
	{
	"epoch": 0.37912673056443025,
	"grad_norm": 0.28078484535217285,
	"learning_rate": 0.00014464767119915629,
	"loss": 0.4388,
	"step": 89
	},
	{
	"epoch": 0.38338658146964855,
	"grad_norm": 0.3163893222808838,
	"learning_rate": 0.00014338837391175582,
	"loss": 0.6122,
	"step": 90
	},
	{
	"epoch": 0.3876464323748669,
	"grad_norm": 0.34674668312072754,
	"learning_rate": 0.0001421205422838971,
	"loss": 0.7114,
	"step": 91
	},
	{
	"epoch": 0.3919062832800852,
	"grad_norm": 0.2210942953824997,
	"learning_rate": 0.00014084442569359964,
	"loss": 0.3351,
	"step": 92
	},
	{
	"epoch": 0.3961661341853035,
	"grad_norm": 0.30586308240890503,
	"learning_rate": 0.0001395602751485059,
	"loss": 0.4845,
	"step": 93
	},
	{
	"epoch": 0.40042598509052185,
	"grad_norm": 0.2695784568786621,
	"learning_rate": 0.000138268343236509,
	"loss": 0.4992,
	"step": 94
	},
	{
	"epoch": 0.40468583599574015,
	"grad_norm": 0.2989813983440399,
	"learning_rate": 0.00013696888407606952,
	"loss": 0.585,
	"step": 95
	},
	{
	"epoch": 0.40894568690095845,
	"grad_norm": 0.2759920656681061,
	"learning_rate": 0.0001356621532662313,
	"loss": 0.4492,
	"step": 96
	},
	{
	"epoch": 0.4132055378061768,
	"grad_norm": 0.33117353916168213,
	"learning_rate": 0.0001343484078363461,
	"loss": 0.5606,
	"step": 97
	},
	{
	"epoch": 0.4174653887113951,
	"grad_norm": 0.24572253227233887,
	"learning_rate": 0.00013302790619551674,
	"loss": 0.3261,
	"step": 98
	},
	{
	"epoch": 0.4217252396166134,
	"grad_norm": 0.322480171918869,
	"learning_rate": 0.00013170090808176883,
	"loss": 0.5527,
	"step": 99
	},
	{
	"epoch": 0.42598509052183176,
	"grad_norm": 0.3101179301738739,
	"learning_rate": 0.00013036767451096148,
	"loss": 0.5419,
	"step": 100
	},
	{
	"epoch": 0.43024494142705005,
	"grad_norm": 0.3218703269958496,
	"learning_rate": 0.00012902846772544624,
	"loss": 0.5441,
	"step": 101
	},
	{
	"epoch": 0.43450479233226835,
	"grad_norm": 0.26214686036109924,
	"learning_rate": 0.00012768355114248494,
	"loss": 0.5388,
	"step": 102
	},
	{
	"epoch": 0.4387646432374867,
	"grad_norm": 0.421612411737442,
	"learning_rate": 0.00012633318930243648,
	"loss": 0.7557,
	"step": 103
	},
	{
	"epoch": 0.443024494142705,
	"grad_norm": 0.5120344758033752,
	"learning_rate": 0.0001249776478167227,
	"loss": 0.7028,
	"step": 104
	},
	{
	"epoch": 0.4472843450479233,
	"grad_norm": 0.27614736557006836,
	"learning_rate": 0.00012361719331558345,
	"loss": 0.3954,
	"step": 105
	},
	{
	"epoch": 0.45154419595314166,
	"grad_norm": 0.269520103931427,
	"learning_rate": 0.00012225209339563145,
	"loss": 0.4851,
	"step": 106
	},
	{
	"epoch": 0.45580404685835996,
	"grad_norm": 0.2739225924015045,
	"learning_rate": 0.000120882616567217,
	"loss": 0.4907,
	"step": 107
	},
	{
	"epoch": 0.46006389776357826,
	"grad_norm": 0.33920663595199585,
	"learning_rate": 0.00011950903220161285,
	"loss": 0.6288,
	"step": 108
	},
	{
	"epoch": 0.4643237486687966,
	"grad_norm": 0.279832124710083,
	"learning_rate": 0.00011813161047802985,
	"loss": 0.447,
	"step": 109
	},
	{
	"epoch": 0.4685835995740149,
	"grad_norm": 0.31790605187416077,
	"learning_rate": 0.00011675062233047364,
	"loss": 0.5933,
	"step": 110
	},
	{
	"epoch": 0.4728434504792332,
	"grad_norm": 0.24926939606666565,
	"learning_rate": 0.000115366339394453,
	"loss": 0.4061,
	"step": 111
	},
	{
	"epoch": 0.47710330138445156,
	"grad_norm": 0.3327280282974243,
	"learning_rate": 0.00011397903395354996,
	"loss": 0.484,
	"step": 112
	},
	{
	"epoch": 0.48136315228966986,
	"grad_norm": 0.37822094559669495,
	"learning_rate": 0.00011258897888586255,
	"loss": 0.6416,
	"step": 113
	},
	{
	"epoch": 0.48562300319488816,
	"grad_norm": 0.35605669021606445,
	"learning_rate": 0.00011119644761033078,
	"loss": 0.6136,
	"step": 114
	},
	{
	"epoch": 0.4898828541001065,
	"grad_norm": 0.3513132929801941,
	"learning_rate": 0.0001098017140329561,
	"loss": 0.6299,
	"step": 115
	},
	{
	"epoch": 0.4941427050053248,
	"grad_norm": 0.3040708899497986,
	"learning_rate": 0.00010840505249292476,
	"loss": 0.4658,
	"step": 116
	},
	{
	"epoch": 0.4984025559105431,
	"grad_norm": 0.19006308913230896,
	"learning_rate": 0.00010700673770864673,
	"loss": 0.2694,
	"step": 117
	},
	{
	"epoch": 0.5026624068157615,
	"grad_norm": 0.30643633008003235,
	"learning_rate": 0.00010560704472371919,
	"loss": 0.4492,
	"step": 118
	},
	{
	"epoch": 0.5026624068157615,
	"eval_loss": 0.5326976180076599,
	"eval_runtime": 17.5872,
	"eval_samples_per_second": 22.46,
	"eval_steps_per_second": 2.843,
	"step": 118
	},
	{
	"epoch": 0.5069222577209798,
	"grad_norm": 0.3698013722896576,
	"learning_rate": 0.00010420624885282653,
	"loss": 0.6993,
	"step": 119
	},
	{
	"epoch": 0.5111821086261981,
	"grad_norm": 0.2801634967327118,
	"learning_rate": 0.0001028046256275869,
	"loss": 0.4059,
	"step": 120
	},
	{
	"epoch": 0.5154419595314164,
	"grad_norm": 0.2864643931388855,
	"learning_rate": 0.00010140245074235624,
	"loss": 0.5024,
	"step": 121
	},
	{
	"epoch": 0.5197018104366348,
	"grad_norm": 0.30105265974998474,
	"learning_rate": 0.0001,
	"loss": 0.6774,
	"step": 122
	},
	{
	"epoch": 0.5239616613418531,
	"grad_norm": 0.39152050018310547,
	"learning_rate": 9.859754925764378e-05,
	"loss": 0.625,
	"step": 123
	},
	{
	"epoch": 0.5282215122470714,
	"grad_norm": 0.3618883192539215,
	"learning_rate": 9.719537437241312e-05,
	"loss": 0.6978,
	"step": 124
	},
	{
	"epoch": 0.5324813631522897,
	"grad_norm": 0.23670899868011475,
	"learning_rate": 9.579375114717351e-05,
	"loss": 0.3379,
	"step": 125
	},
	{
	"epoch": 0.536741214057508,
	"grad_norm": 0.3124864101409912,
	"learning_rate": 9.439295527628081e-05,
	"loss": 0.525,
	"step": 126
	},
	{
	"epoch": 0.5410010649627263,
	"grad_norm": 0.3667398989200592,
	"learning_rate": 9.299326229135326e-05,
	"loss": 0.6164,
	"step": 127
	},
	{
	"epoch": 0.5452609158679447,
	"grad_norm": 0.2894105613231659,
	"learning_rate": 9.159494750707526e-05,
	"loss": 0.4335,
	"step": 128
	},
	{
	"epoch": 0.549520766773163,
	"grad_norm": 0.30680200457572937,
	"learning_rate": 9.019828596704394e-05,
	"loss": 0.4507,
	"step": 129
	},
	{
	"epoch": 0.5537806176783813,
	"grad_norm": 0.3676758110523224,
	"learning_rate": 8.880355238966923e-05,
	"loss": 0.5955,
	"step": 130
	},
	{
	"epoch": 0.5580404685835996,
	"grad_norm": 0.3194178342819214,
	"learning_rate": 8.741102111413748e-05,
	"loss": 0.5675,
	"step": 131
	},
	{
	"epoch": 0.5623003194888179,
	"grad_norm": 0.29750558733940125,
	"learning_rate": 8.602096604645009e-05,
	"loss": 0.5785,
	"step": 132
	},
	{
	"epoch": 0.5665601703940362,
	"grad_norm": 0.37204545736312866,
	"learning_rate": 8.463366060554698e-05,
	"loss": 0.612,
	"step": 133
	},
	{
	"epoch": 0.5708200212992546,
	"grad_norm": 0.36891940236091614,
	"learning_rate": 8.324937766952638e-05,
	"loss": 0.5463,
	"step": 134
	},
	{
	"epoch": 0.5750798722044729,
	"grad_norm": 0.2863575518131256,
	"learning_rate": 8.186838952197018e-05,
	"loss": 0.4884,
	"step": 135
	},
	{
	"epoch": 0.5793397231096912,
	"grad_norm": 0.354523241519928,
	"learning_rate": 8.049096779838719e-05,
	"loss": 0.7727,
	"step": 136
	},
	{
	"epoch": 0.5835995740149095,
	"grad_norm": 0.30339759588241577,
	"learning_rate": 7.911738343278304e-05,
	"loss": 0.5543,
	"step": 137
	},
	{
	"epoch": 0.5878594249201278,
	"grad_norm": 0.27778202295303345,
	"learning_rate": 7.774790660436858e-05,
	"loss": 0.4716,
	"step": 138
	},
	{
	"epoch": 0.5921192758253461,
	"grad_norm": 0.38618960976600647,
	"learning_rate": 7.63828066844166e-05,
	"loss": 0.6519,
	"step": 139
	},
	{
	"epoch": 0.5963791267305645,
	"grad_norm": 0.3573627769947052,
	"learning_rate": 7.502235218327731e-05,
	"loss": 0.5128,
	"step": 140
	},
	{
	"epoch": 0.6006389776357828,
	"grad_norm": 0.30529165267944336,
	"learning_rate": 7.366681069756352e-05,
	"loss": 0.5184,
	"step": 141
	},
	{
	"epoch": 0.6048988285410011,
	"grad_norm": 0.2819828987121582,
	"learning_rate": 7.231644885751507e-05,
	"loss": 0.4259,
	"step": 142
	},
	{
	"epoch": 0.6091586794462194,
	"grad_norm": 0.32307252287864685,
	"learning_rate": 7.097153227455379e-05,
	"loss": 0.6048,
	"step": 143
	},
	{
	"epoch": 0.6134185303514377,
	"grad_norm": 0.31262722611427307,
	"learning_rate": 6.963232548903853e-05,
	"loss": 0.4834,
	"step": 144
	},
	{
	"epoch": 0.617678381256656,
	"grad_norm": 0.318851500749588,
	"learning_rate": 6.829909191823121e-05,
	"loss": 0.5011,
	"step": 145
	},
	{
	"epoch": 0.6219382321618744,
	"grad_norm": 0.44246405363082886,
	"learning_rate": 6.697209380448333e-05,
	"loss": 0.4384,
	"step": 146
	},
	{
	"epoch": 0.6261980830670927,
	"grad_norm": 0.3459945023059845,
	"learning_rate": 6.565159216365389e-05,
	"loss": 0.5657,
	"step": 147
	},
	{
	"epoch": 0.630457933972311,
	"grad_norm": 0.33843329548835754,
	"learning_rate": 6.43378467337687e-05,
	"loss": 0.5711,
	"step": 148
	},
	{
	"epoch": 0.6347177848775293,
	"grad_norm": 0.3812694549560547,
	"learning_rate": 6.30311159239305e-05,
	"loss": 0.6142,
	"step": 149
	},
	{
	"epoch": 0.6389776357827476,
	"grad_norm": 0.29333916306495667,
	"learning_rate": 6.173165676349103e-05,
	"loss": 0.585,
	"step": 150
	},
	{
	"epoch": 0.6432374866879659,
	"grad_norm": 0.2884041666984558,
	"learning_rate": 6.043972485149414e-05,
	"loss": 0.4866,
	"step": 151
	},
	{
	"epoch": 0.6474973375931843,
	"grad_norm": 0.33954814076423645,
	"learning_rate": 5.9155574306400395e-05,
	"loss": 0.571,
	"step": 152
	},
	{
	"epoch": 0.6517571884984026,
	"grad_norm": 0.33935782313346863,
	"learning_rate": 5.787945771610296e-05,
	"loss": 0.5037,
	"step": 153
	},
	{
	"epoch": 0.6560170394036209,
	"grad_norm": 0.27371054887771606,
	"learning_rate": 5.6611626088244194e-05,
	"loss": 0.3322,
	"step": 154
	},
	{
	"epoch": 0.6602768903088392,
	"grad_norm": 0.30788496136665344,
	"learning_rate": 5.5352328800843724e-05,
	"loss": 0.4454,
	"step": 155
	},
	{
	"epoch": 0.6645367412140575,
	"grad_norm": 0.34366151690483093,
	"learning_rate": 5.410181355324622e-05,
	"loss": 0.5788,
	"step": 156
	},
	{
	"epoch": 0.6687965921192758,
	"grad_norm": 0.33698371052742004,
	"learning_rate": 5.286032631740023e-05,
	"loss": 0.4378,
	"step": 157
	},
	{
	"epoch": 0.6730564430244942,
	"grad_norm": 0.4181162416934967,
	"learning_rate": 5.162811128947602e-05,
	"loss": 0.5367,
	"step": 158
	},
	{
	"epoch": 0.6773162939297125,
	"grad_norm": 0.4480881690979004,
	"learning_rate": 5.0405410841833253e-05,
	"loss": 0.6633,
	"step": 159
	},
	{
	"epoch": 0.6815761448349308,
	"grad_norm": 0.37488028407096863,
	"learning_rate": 4.919246547534708e-05,
	"loss": 0.5402,
	"step": 160
	},
	{
	"epoch": 0.6858359957401491,
	"grad_norm": 0.2964366376399994,
	"learning_rate": 4.7989513772102537e-05,
	"loss": 0.4109,
	"step": 161
	},
	{
	"epoch": 0.6900958466453674,
	"grad_norm": 0.35376259684562683,
	"learning_rate": 4.6796792348466356e-05,
	"loss": 0.636,
	"step": 162
	},
	{
	"epoch": 0.6943556975505857,
	"grad_norm": 0.3158915638923645,
	"learning_rate": 4.561453580854516e-05,
	"loss": 0.4893,
	"step": 163
	},
	{
	"epoch": 0.6986155484558041,
	"grad_norm": 0.420785516500473,
	"learning_rate": 4.444297669803981e-05,
	"loss": 0.7147,
	"step": 164
	},
	{
	"epoch": 0.7028753993610224,
	"grad_norm": 0.3272782564163208,
	"learning_rate": 4.328234545850442e-05,
	"loss": 0.3444,
	"step": 165
	},
	{
	"epoch": 0.7071352502662407,
	"grad_norm": 0.30052492022514343,
	"learning_rate": 4.213287038201943e-05,
	"loss": 0.5209,
	"step": 166
	},
	{
	"epoch": 0.711395101171459,
	"grad_norm": 0.37648481130599976,
	"learning_rate": 4.0994777566287204e-05,
	"loss": 0.684,
	"step": 167
	},
	{
	"epoch": 0.7156549520766773,
	"grad_norm": 0.3135606646537781,
	"learning_rate": 3.9868290870159405e-05,
	"loss": 0.4871,
	"step": 168
	},
	{
	"epoch": 0.7199148029818956,
	"grad_norm": 0.33847576379776,
	"learning_rate": 3.875363186960499e-05,
	"loss": 0.5294,
	"step": 169
	},
	{
	"epoch": 0.724174653887114,
	"grad_norm": 0.3337070047855377,
	"learning_rate": 3.7651019814126654e-05,
	"loss": 0.4425,
	"step": 170
	},
	{
	"epoch": 0.7284345047923323,
	"grad_norm": 0.4173165261745453,
	"learning_rate": 3.6560671583635467e-05,
	"loss": 0.637,
	"step": 171
	},
	{
	"epoch": 0.7326943556975506,
	"grad_norm": 0.41098451614379883,
	"learning_rate": 3.548280164579126e-05,
	"loss": 0.52,
	"step": 172
	},
	{
	"epoch": 0.7369542066027689,
	"grad_norm": 0.3789665699005127,
	"learning_rate": 3.4417622013817595e-05,
	"loss": 0.5995,
	"step": 173
	},
	{
	"epoch": 0.7412140575079872,
	"grad_norm": 0.3996846675872803,
	"learning_rate": 3.336534220479961e-05,
	"loss": 0.6237,
	"step": 174
	},
	{
	"epoch": 0.7454739084132055,
	"grad_norm": 0.3990687131881714,
	"learning_rate": 3.2326169198472556e-05,
	"loss": 0.555,
	"step": 175
	},
	{
	"epoch": 0.7497337593184239,
	"grad_norm": 0.32280924916267395,
	"learning_rate": 3.130030739650983e-05,
	"loss": 0.4742,
	"step": 176
	},
	{
	"epoch": 0.7539936102236422,
	"grad_norm": 0.4192362129688263,
	"learning_rate": 3.0287958582317676e-05,
	"loss": 0.6569,
	"step": 177
	},
	{
	"epoch": 0.7539936102236422,
	"eval_loss": 0.5110090970993042,
	"eval_runtime": 20.8066,
	"eval_samples_per_second": 18.984,
	"eval_steps_per_second": 2.403,
	"step": 177
	},
	{
	"epoch": 0.7582534611288605,
	"grad_norm": 0.35410746932029724,
	"learning_rate": 2.9289321881345254e-05,
	"loss": 0.4828,
	"step": 178
	},
	{
	"epoch": 0.7625133120340788,
	"grad_norm": 0.4463326036930084,
	"learning_rate": 2.8304593721917285e-05,
	"loss": 0.6976,
	"step": 179
	},
	{
	"epoch": 0.7667731629392971,
	"grad_norm": 0.29797378182411194,
	"learning_rate": 2.7333967796597315e-05,
	"loss": 0.564,
	"step": 180
	},
	{
	"epoch": 0.7710330138445154,
	"grad_norm": 0.31337812542915344,
	"learning_rate": 2.6377635024089087e-05,
	"loss": 0.5607,
	"step": 181
	},
	{
	"epoch": 0.7752928647497338,
	"grad_norm": 0.40470513701438904,
	"learning_rate": 2.5435783511683443e-05,
	"loss": 0.6428,
	"step": 182
	},
	{
	"epoch": 0.7795527156549521,
	"grad_norm": 0.413817822933197,
	"learning_rate": 2.450859851825842e-05,
	"loss": 0.7303,
	"step": 183
	},
	{
	"epoch": 0.7838125665601704,
	"grad_norm": 0.2931414842605591,
	"learning_rate": 2.3596262417839255e-05,
	"loss": 0.4051,
	"step": 184
	},
	{
	"epoch": 0.7880724174653887,
	"grad_norm": 0.34086865186691284,
	"learning_rate": 2.26989546637263e-05,
	"loss": 0.4329,
	"step": 185
	},
	{
	"epoch": 0.792332268370607,
	"grad_norm": 0.40336307883262634,
	"learning_rate": 2.181685175319702e-05,
	"loss": 0.5791,
	"step": 186
	},
	{
	"epoch": 0.7965921192758253,
	"grad_norm": 0.30092838406562805,
	"learning_rate": 2.095012719278966e-05,
	"loss": 0.4491,
	"step": 187
	},
	{
	"epoch": 0.8008519701810437,
	"grad_norm": 0.31043168902397156,
	"learning_rate": 2.009895146417512e-05,
	"loss": 0.4681,
	"step": 188
	},
	{
	"epoch": 0.805111821086262,
	"grad_norm": 0.3712119162082672,
	"learning_rate": 1.926349199062376e-05,
	"loss": 0.549,
	"step": 189
	},
	{
	"epoch": 0.8093716719914803,
	"grad_norm": 0.3679051995277405,
	"learning_rate": 1.8443913104073983e-05,
	"loss": 0.4971,
	"step": 190
	},
	{
	"epoch": 0.8136315228966986,
	"grad_norm": 0.3244669735431671,
	"learning_rate": 1.7640376012808536e-05,
	"loss": 0.4732,
	"step": 191
	},
	{
	"epoch": 0.8178913738019169,
	"grad_norm": 0.28653696179389954,
	"learning_rate": 1.6853038769745467e-05,
	"loss": 0.3469,
	"step": 192
	},
	{
	"epoch": 0.8221512247071352,
	"grad_norm": 0.3144218325614929,
	"learning_rate": 1.6082056241349786e-05,
	"loss": 0.5127,
	"step": 193
	},
	{
	"epoch": 0.8264110756123536,
	"grad_norm": 0.3801470994949341,
	"learning_rate": 1.5327580077171587e-05,
	"loss": 0.5178,
	"step": 194
	},
	{
	"epoch": 0.8306709265175719,
	"grad_norm": 0.37223386764526367,
	"learning_rate": 1.4589758680017263e-05,
	"loss": 0.5114,
	"step": 195
	},
	{
	"epoch": 0.8349307774227902,
	"grad_norm": 0.4167802333831787,
	"learning_rate": 1.3868737176759106e-05,
	"loss": 0.6949,
	"step": 196
	},
	{
	"epoch": 0.8391906283280085,
	"grad_norm": 0.620794951915741,
	"learning_rate": 1.3164657389789458e-05,
	"loss": 0.7015,
	"step": 197
	},
	{
	"epoch": 0.8434504792332268,
	"grad_norm": 0.32053133845329285,
	"learning_rate": 1.2477657809124631e-05,
	"loss": 0.5328,
	"step": 198
	},
	{
	"epoch": 0.8477103301384451,
	"grad_norm": 0.41892528533935547,
	"learning_rate": 1.1807873565164506e-05,
	"loss": 0.5929,
	"step": 199
	},
	{
	"epoch": 0.8519701810436635,
	"grad_norm": 0.2980664372444153,
	"learning_rate": 1.1155436402112785e-05,
	"loss": 0.4209,
	"step": 200
	},
	{
	"epoch": 0.8562300319488818,
	"grad_norm": 0.3290930986404419,
	"learning_rate": 1.0520474652063394e-05,
	"loss": 0.4423,
	"step": 201
	},
	{
	"epoch": 0.8604898828541001,
	"grad_norm": 0.3246372640132904,
	"learning_rate": 9.903113209758096e-06,
	"loss": 0.4631,
	"step": 202
	},
	{
	"epoch": 0.8647497337593184,
	"grad_norm": 0.3644905388355255,
	"learning_rate": 9.303473508019944e-06,
	"loss": 0.552,
	"step": 203
	},
	{
	"epoch": 0.8690095846645367,
	"grad_norm": 0.49974295496940613,
	"learning_rate": 8.72167349386811e-06,
	"loss": 0.7516,
	"step": 204
	},
	{
	"epoch": 0.873269435569755,
	"grad_norm": 0.3242340087890625,
	"learning_rate": 8.157827605317892e-06,
	"loss": 0.412,
	"step": 205
	},
	{
	"epoch": 0.8775292864749734,
	"grad_norm": 0.33690881729125977,
	"learning_rate": 7.612046748871327e-06,
	"loss": 0.4841,
	"step": 206
	},
	{
	"epoch": 0.8817891373801917,
	"grad_norm": 0.3246766924858093,
	"learning_rate": 7.084438277702188e-06,
	"loss": 0.4341,
	"step": 207
	},
	{
	"epoch": 0.88604898828541,
	"grad_norm": 0.4262131452560425,
	"learning_rate": 6.5751059705400295e-06,
	"loss": 0.6306,
	"step": 208
	},
	{
	"epoch": 0.8903088391906283,
	"grad_norm": 0.32158687710762024,
	"learning_rate": 6.084150011257239e-06,
	"loss": 0.4687,
	"step": 209
	},
	{
	"epoch": 0.8945686900958466,
	"grad_norm": 0.377208948135376,
	"learning_rate": 5.611666969163243e-06,
	"loss": 0.5849,
	"step": 210
	},
	{
	"epoch": 0.898828541001065,
	"grad_norm": 0.30956804752349854,
	"learning_rate": 5.157749780009735e-06,
	"loss": 0.4355,
	"step": 211
	},
	{
	"epoch": 0.9030883919062833,
	"grad_norm": 0.4885202944278717,
	"learning_rate": 4.722487727710368e-06,
	"loss": 0.6129,
	"step": 212
	},
	{
	"epoch": 0.9073482428115016,
	"grad_norm": 0.3384571075439453,
	"learning_rate": 4.305966426779118e-06,
	"loss": 0.4345,
	"step": 213
	},
	{
	"epoch": 0.9116080937167199,
	"grad_norm": 0.4629303514957428,
	"learning_rate": 3.908267805490051e-06,
	"loss": 0.6158,
	"step": 214
	},
	{
	"epoch": 0.9158679446219382,
	"grad_norm": 0.3206894099712372,
	"learning_rate": 3.529470089762421e-06,
	"loss": 0.4689,
	"step": 215
	},
	{
	"epoch": 0.9201277955271565,
	"grad_norm": 0.41424816846847534,
	"learning_rate": 3.169647787773866e-06,
	"loss": 0.5235,
	"step": 216
	},
	{
	"epoch": 0.9243876464323749,
	"grad_norm": 0.3189912736415863,
	"learning_rate": 2.8288716753049005e-06,
	"loss": 0.4262,
	"step": 217
	},
	{
	"epoch": 0.9286474973375932,
	"grad_norm": 0.3202993869781494,
	"learning_rate": 2.5072087818176382e-06,
	"loss": 0.493,
	"step": 218
	},
	{
	"epoch": 0.9329073482428115,
	"grad_norm": 0.32974228262901306,
	"learning_rate": 2.20472237727124e-06,
	"loss": 0.5512,
	"step": 219
	},
	{
	"epoch": 0.9371671991480298,
	"grad_norm": 0.27346375584602356,
	"learning_rate": 1.921471959676957e-06,
	"loss": 0.4154,
	"step": 220
	},
	{
	"epoch": 0.9414270500532481,
	"grad_norm": 0.29039615392684937,
	"learning_rate": 1.657513243395159e-06,
	"loss": 0.4367,
	"step": 221
	},
	{
	"epoch": 0.9456869009584664,
	"grad_norm": 0.3864074647426605,
	"learning_rate": 1.4128981481764115e-06,
	"loss": 0.5531,
	"step": 222
	},
	{
	"epoch": 0.9499467518636848,
	"grad_norm": 0.4020756185054779,
	"learning_rate": 1.1876747889491223e-06,
	"loss": 0.6631,
	"step": 223
	},
	{
	"epoch": 0.9542066027689031,
	"grad_norm": 0.37475478649139404,
	"learning_rate": 9.818874663554357e-07,
	"loss": 0.4979,
	"step": 224
	},
	{
	"epoch": 0.9584664536741214,
	"grad_norm": 0.3484041690826416,
	"learning_rate": 7.955766580375335e-07,
	"loss": 0.5207,
	"step": 225
	},
	{
	"epoch": 0.9627263045793397,
	"grad_norm": 0.3385999798774719,
	"learning_rate": 6.287790106757396e-07,
	"loss": 0.4832,
	"step": 226
	},
	{
	"epoch": 0.966986155484558,
	"grad_norm": 0.27909693121910095,
	"learning_rate": 4.815273327803182e-07,
	"loss": 0.3506,
	"step": 227
	},
	{
	"epoch": 0.9712460063897763,
	"grad_norm": 0.34196606278419495,
	"learning_rate": 3.5385058823809156e-07,
	"loss": 0.5283,
	"step": 228
	},
	{
	"epoch": 0.9755058572949947,
	"grad_norm": 0.39571547508239746,
	"learning_rate": 2.457738906153972e-07,
	"loss": 0.5099,
	"step": 229
	},
	{
	"epoch": 0.979765708200213,
	"grad_norm": 0.4107287526130676,
	"learning_rate": 1.5731849821833954e-07,
	"loss": 0.5042,
	"step": 230
	},
	{
	"epoch": 0.9840255591054313,
	"grad_norm": 0.3254135251045227,
	"learning_rate": 8.850180991131219e-08,
	"loss": 0.4929,
	"step": 231
	},
	{
	"epoch": 0.9882854100106496,
	"grad_norm": 0.2778495252132416,
	"learning_rate": 3.933736169471347e-08,
	"loss": 0.3571,
	"step": 232
	},
	{
	"epoch": 0.9925452609158679,
	"grad_norm": 0.4703886806964874,
	"learning_rate": 9.834824042498358e-09,
	"loss": 0.7013,
	"step": 233
	},
	{
	"epoch": 0.9968051118210862,
	"grad_norm": 0.3349379599094391,
	"learning_rate": 0.0,
	"loss": 0.4725,
	"step": 234
	}
	],
	"logging_steps": 1,
	"max_steps": 234,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 1,
	"save_steps": 500,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": true
	},
	"attributes": {}
	}
	},
	"total_flos": 1.1191352754700288e+16,
	"train_batch_size": 2,
	"trial_name": null,
	"trial_params": null
	}