nttx's picture
Training in progress, step 50, checkpoint
0fb4880 verified
raw
history blame
9.88 kB
{
"best_metric": 3.298891544342041,
"best_model_checkpoint": "miner_id_24/checkpoint-50",
"epoch": 0.21119324181626187,
"eval_steps": 50,
"global_step": 50,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.004223864836325237,
"grad_norm": 1.4468297958374023,
"learning_rate": 1e-05,
"loss": 2.4216,
"step": 1
},
{
"epoch": 0.004223864836325237,
"eval_loss": 3.413026809692383,
"eval_runtime": 15.4813,
"eval_samples_per_second": 12.919,
"eval_steps_per_second": 3.23,
"step": 1
},
{
"epoch": 0.008447729672650475,
"grad_norm": 1.4951223134994507,
"learning_rate": 2e-05,
"loss": 2.4719,
"step": 2
},
{
"epoch": 0.012671594508975714,
"grad_norm": 1.378113031387329,
"learning_rate": 3e-05,
"loss": 2.4266,
"step": 3
},
{
"epoch": 0.01689545934530095,
"grad_norm": 1.3803629875183105,
"learning_rate": 4e-05,
"loss": 2.4242,
"step": 4
},
{
"epoch": 0.021119324181626188,
"grad_norm": 1.3718631267547607,
"learning_rate": 5e-05,
"loss": 2.429,
"step": 5
},
{
"epoch": 0.025343189017951427,
"grad_norm": 1.218284010887146,
"learning_rate": 6e-05,
"loss": 2.4887,
"step": 6
},
{
"epoch": 0.029567053854276663,
"grad_norm": 1.161075234413147,
"learning_rate": 7e-05,
"loss": 2.4304,
"step": 7
},
{
"epoch": 0.0337909186906019,
"grad_norm": 1.3160911798477173,
"learning_rate": 8e-05,
"loss": 2.3349,
"step": 8
},
{
"epoch": 0.03801478352692714,
"grad_norm": 1.6408723592758179,
"learning_rate": 9e-05,
"loss": 2.3673,
"step": 9
},
{
"epoch": 0.042238648363252376,
"grad_norm": 1.8385587930679321,
"learning_rate": 0.0001,
"loss": 2.2662,
"step": 10
},
{
"epoch": 0.046462513199577615,
"grad_norm": 1.25449800491333,
"learning_rate": 9.99695413509548e-05,
"loss": 2.223,
"step": 11
},
{
"epoch": 0.050686378035902854,
"grad_norm": 1.3850810527801514,
"learning_rate": 9.987820251299122e-05,
"loss": 2.2053,
"step": 12
},
{
"epoch": 0.054910242872228086,
"grad_norm": 1.521848440170288,
"learning_rate": 9.972609476841367e-05,
"loss": 2.14,
"step": 13
},
{
"epoch": 0.059134107708553325,
"grad_norm": 0.7431579828262329,
"learning_rate": 9.951340343707852e-05,
"loss": 2.0468,
"step": 14
},
{
"epoch": 0.06335797254487856,
"grad_norm": 1.3225431442260742,
"learning_rate": 9.924038765061042e-05,
"loss": 2.0101,
"step": 15
},
{
"epoch": 0.0675818373812038,
"grad_norm": 1.1983656883239746,
"learning_rate": 9.890738003669029e-05,
"loss": 2.0347,
"step": 16
},
{
"epoch": 0.07180570221752904,
"grad_norm": 0.9302838444709778,
"learning_rate": 9.851478631379982e-05,
"loss": 2.0708,
"step": 17
},
{
"epoch": 0.07602956705385427,
"grad_norm": 1.3141449689865112,
"learning_rate": 9.806308479691595e-05,
"loss": 2.0202,
"step": 18
},
{
"epoch": 0.08025343189017951,
"grad_norm": 2.162736654281616,
"learning_rate": 9.755282581475769e-05,
"loss": 2.08,
"step": 19
},
{
"epoch": 0.08447729672650475,
"grad_norm": 1.8238545656204224,
"learning_rate": 9.698463103929542e-05,
"loss": 2.0146,
"step": 20
},
{
"epoch": 0.08870116156282999,
"grad_norm": 2.3217670917510986,
"learning_rate": 9.635919272833938e-05,
"loss": 2.029,
"step": 21
},
{
"epoch": 0.09292502639915523,
"grad_norm": 1.7219942808151245,
"learning_rate": 9.567727288213005e-05,
"loss": 2.0183,
"step": 22
},
{
"epoch": 0.09714889123548047,
"grad_norm": 1.9738433361053467,
"learning_rate": 9.493970231495835e-05,
"loss": 1.9807,
"step": 23
},
{
"epoch": 0.10137275607180571,
"grad_norm": 1.6816790103912354,
"learning_rate": 9.414737964294636e-05,
"loss": 1.8833,
"step": 24
},
{
"epoch": 0.10559662090813093,
"grad_norm": 2.078090190887451,
"learning_rate": 9.330127018922194e-05,
"loss": 1.9582,
"step": 25
},
{
"epoch": 0.10982048574445617,
"grad_norm": 2.3857650756835938,
"learning_rate": 9.24024048078213e-05,
"loss": 1.8632,
"step": 26
},
{
"epoch": 0.11404435058078141,
"grad_norm": 1.9374221563339233,
"learning_rate": 9.145187862775209e-05,
"loss": 1.9176,
"step": 27
},
{
"epoch": 0.11826821541710665,
"grad_norm": 3.010812282562256,
"learning_rate": 9.045084971874738e-05,
"loss": 1.9108,
"step": 28
},
{
"epoch": 0.12249208025343189,
"grad_norm": 3.265596628189087,
"learning_rate": 8.940053768033609e-05,
"loss": 1.7971,
"step": 29
},
{
"epoch": 0.12671594508975711,
"grad_norm": 1.8755942583084106,
"learning_rate": 8.83022221559489e-05,
"loss": 1.823,
"step": 30
},
{
"epoch": 0.13093980992608237,
"grad_norm": 2.2471399307250977,
"learning_rate": 8.715724127386972e-05,
"loss": 1.7691,
"step": 31
},
{
"epoch": 0.1351636747624076,
"grad_norm": 2.2615737915039062,
"learning_rate": 8.596699001693255e-05,
"loss": 1.9749,
"step": 32
},
{
"epoch": 0.13938753959873285,
"grad_norm": 2.1230502128601074,
"learning_rate": 8.473291852294987e-05,
"loss": 1.6917,
"step": 33
},
{
"epoch": 0.14361140443505807,
"grad_norm": 2.2565455436706543,
"learning_rate": 8.345653031794292e-05,
"loss": 1.737,
"step": 34
},
{
"epoch": 0.14783526927138332,
"grad_norm": 2.0613744258880615,
"learning_rate": 8.213938048432697e-05,
"loss": 1.7182,
"step": 35
},
{
"epoch": 0.15205913410770855,
"grad_norm": 1.7652757167816162,
"learning_rate": 8.07830737662829e-05,
"loss": 1.8768,
"step": 36
},
{
"epoch": 0.1562829989440338,
"grad_norm": 1.8088558912277222,
"learning_rate": 7.938926261462366e-05,
"loss": 1.872,
"step": 37
},
{
"epoch": 0.16050686378035903,
"grad_norm": 1.8547643423080444,
"learning_rate": 7.795964517353735e-05,
"loss": 1.8,
"step": 38
},
{
"epoch": 0.16473072861668428,
"grad_norm": 3.2694742679595947,
"learning_rate": 7.649596321166024e-05,
"loss": 2.0919,
"step": 39
},
{
"epoch": 0.1689545934530095,
"grad_norm": 3.890437126159668,
"learning_rate": 7.500000000000001e-05,
"loss": 2.1158,
"step": 40
},
{
"epoch": 0.17317845828933473,
"grad_norm": 3.148890256881714,
"learning_rate": 7.347357813929454e-05,
"loss": 1.9941,
"step": 41
},
{
"epoch": 0.17740232312565998,
"grad_norm": 2.840927839279175,
"learning_rate": 7.191855733945387e-05,
"loss": 1.5792,
"step": 42
},
{
"epoch": 0.1816261879619852,
"grad_norm": 3.339871406555176,
"learning_rate": 7.033683215379002e-05,
"loss": 1.7262,
"step": 43
},
{
"epoch": 0.18585005279831046,
"grad_norm": 3.198982000350952,
"learning_rate": 6.873032967079561e-05,
"loss": 1.675,
"step": 44
},
{
"epoch": 0.1900739176346357,
"grad_norm": 3.2554752826690674,
"learning_rate": 6.710100716628344e-05,
"loss": 1.4044,
"step": 45
},
{
"epoch": 0.19429778247096094,
"grad_norm": 2.540999412536621,
"learning_rate": 6.545084971874738e-05,
"loss": 1.554,
"step": 46
},
{
"epoch": 0.19852164730728616,
"grad_norm": 2.6020541191101074,
"learning_rate": 6.378186779084995e-05,
"loss": 1.3886,
"step": 47
},
{
"epoch": 0.20274551214361142,
"grad_norm": 3.0984528064727783,
"learning_rate": 6.209609477998338e-05,
"loss": 1.6801,
"step": 48
},
{
"epoch": 0.20696937697993664,
"grad_norm": 2.8602421283721924,
"learning_rate": 6.0395584540887963e-05,
"loss": 1.3767,
"step": 49
},
{
"epoch": 0.21119324181626187,
"grad_norm": 4.274623870849609,
"learning_rate": 5.868240888334653e-05,
"loss": 1.359,
"step": 50
},
{
"epoch": 0.21119324181626187,
"eval_loss": 3.298891544342041,
"eval_runtime": 15.8578,
"eval_samples_per_second": 12.612,
"eval_steps_per_second": 3.153,
"step": 50
}
],
"logging_steps": 1,
"max_steps": 100,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 50,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 5,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 1.6463460040704e+16,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}