Zestor's picture
Upload folder using huggingface_hub
2651cb3
raw
history blame
15.4 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 25.0,
"global_step": 3000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.2,
"learning_rate": 1.5333333333333334e-05,
"loss": 1.135,
"step": 24
},
{
"epoch": 0.4,
"learning_rate": 3.1333333333333334e-05,
"loss": 0.9975,
"step": 48
},
{
"epoch": 0.6,
"learning_rate": 4.7333333333333336e-05,
"loss": 0.8521,
"step": 72
},
{
"epoch": 0.8,
"learning_rate": 6.333333333333333e-05,
"loss": 0.6949,
"step": 96
},
{
"epoch": 1.0,
"learning_rate": 7.933333333333334e-05,
"loss": 0.5953,
"step": 120
},
{
"epoch": 1.2,
"learning_rate": 9.533333333333334e-05,
"loss": 0.5087,
"step": 144
},
{
"epoch": 1.4,
"learning_rate": 0.00011133333333333333,
"loss": 0.4767,
"step": 168
},
{
"epoch": 1.6,
"learning_rate": 0.00012733333333333336,
"loss": 0.4546,
"step": 192
},
{
"epoch": 1.8,
"learning_rate": 0.00014333333333333334,
"loss": 0.4363,
"step": 216
},
{
"epoch": 2.0,
"learning_rate": 0.00015933333333333332,
"loss": 0.4296,
"step": 240
},
{
"epoch": 2.2,
"learning_rate": 0.00017533333333333336,
"loss": 0.3956,
"step": 264
},
{
"epoch": 2.4,
"learning_rate": 0.00019133333333333334,
"loss": 0.3875,
"step": 288
},
{
"epoch": 2.6,
"learning_rate": 0.0001991851851851852,
"loss": 0.3885,
"step": 312
},
{
"epoch": 2.8,
"learning_rate": 0.00019740740740740743,
"loss": 0.3827,
"step": 336
},
{
"epoch": 3.0,
"learning_rate": 0.00019562962962962964,
"loss": 0.378,
"step": 360
},
{
"epoch": 3.2,
"learning_rate": 0.00019385185185185187,
"loss": 0.3337,
"step": 384
},
{
"epoch": 3.4,
"learning_rate": 0.00019207407407407408,
"loss": 0.3448,
"step": 408
},
{
"epoch": 3.6,
"learning_rate": 0.00019029629629629632,
"loss": 0.3409,
"step": 432
},
{
"epoch": 3.8,
"learning_rate": 0.00018851851851851853,
"loss": 0.3446,
"step": 456
},
{
"epoch": 4.0,
"learning_rate": 0.00018674074074074074,
"loss": 0.3396,
"step": 480
},
{
"epoch": 4.2,
"learning_rate": 0.00018496296296296297,
"loss": 0.2972,
"step": 504
},
{
"epoch": 4.4,
"learning_rate": 0.00018318518518518518,
"loss": 0.3032,
"step": 528
},
{
"epoch": 4.6,
"learning_rate": 0.00018140740740740742,
"loss": 0.3051,
"step": 552
},
{
"epoch": 4.8,
"learning_rate": 0.00017962962962962963,
"loss": 0.307,
"step": 576
},
{
"epoch": 5.0,
"learning_rate": 0.00017785185185185186,
"loss": 0.3093,
"step": 600
},
{
"epoch": 5.2,
"learning_rate": 0.00017607407407407407,
"loss": 0.261,
"step": 624
},
{
"epoch": 5.4,
"learning_rate": 0.0001742962962962963,
"loss": 0.2686,
"step": 648
},
{
"epoch": 5.6,
"learning_rate": 0.00017251851851851852,
"loss": 0.2753,
"step": 672
},
{
"epoch": 5.8,
"learning_rate": 0.00017074074074074075,
"loss": 0.2802,
"step": 696
},
{
"epoch": 6.0,
"learning_rate": 0.00016896296296296296,
"loss": 0.2805,
"step": 720
},
{
"epoch": 6.2,
"learning_rate": 0.0001671851851851852,
"loss": 0.2335,
"step": 744
},
{
"epoch": 6.4,
"learning_rate": 0.0001654074074074074,
"loss": 0.234,
"step": 768
},
{
"epoch": 6.6,
"learning_rate": 0.00016362962962962964,
"loss": 0.2434,
"step": 792
},
{
"epoch": 6.8,
"learning_rate": 0.00016185185185185185,
"loss": 0.253,
"step": 816
},
{
"epoch": 7.0,
"learning_rate": 0.0001600740740740741,
"loss": 0.2481,
"step": 840
},
{
"epoch": 7.2,
"learning_rate": 0.0001582962962962963,
"loss": 0.2053,
"step": 864
},
{
"epoch": 7.4,
"learning_rate": 0.00015651851851851854,
"loss": 0.2058,
"step": 888
},
{
"epoch": 7.6,
"learning_rate": 0.00015474074074074074,
"loss": 0.213,
"step": 912
},
{
"epoch": 7.8,
"learning_rate": 0.00015296296296296298,
"loss": 0.2155,
"step": 936
},
{
"epoch": 8.0,
"learning_rate": 0.0001511851851851852,
"loss": 0.2218,
"step": 960
},
{
"epoch": 8.2,
"learning_rate": 0.00014940740740740743,
"loss": 0.1721,
"step": 984
},
{
"epoch": 8.4,
"learning_rate": 0.00014762962962962964,
"loss": 0.1812,
"step": 1008
},
{
"epoch": 8.6,
"learning_rate": 0.00014585185185185187,
"loss": 0.1858,
"step": 1032
},
{
"epoch": 8.8,
"learning_rate": 0.00014407407407407408,
"loss": 0.187,
"step": 1056
},
{
"epoch": 9.0,
"learning_rate": 0.00014229629629629632,
"loss": 0.1929,
"step": 1080
},
{
"epoch": 9.2,
"learning_rate": 0.00014051851851851853,
"loss": 0.1449,
"step": 1104
},
{
"epoch": 9.4,
"learning_rate": 0.00013874074074074076,
"loss": 0.1539,
"step": 1128
},
{
"epoch": 9.6,
"learning_rate": 0.00013696296296296297,
"loss": 0.1556,
"step": 1152
},
{
"epoch": 9.8,
"learning_rate": 0.00013518518518518518,
"loss": 0.1615,
"step": 1176
},
{
"epoch": 10.0,
"learning_rate": 0.00013340740740740742,
"loss": 0.1704,
"step": 1200
},
{
"epoch": 10.2,
"learning_rate": 0.00013162962962962963,
"loss": 0.1237,
"step": 1224
},
{
"epoch": 10.4,
"learning_rate": 0.00012985185185185186,
"loss": 0.1282,
"step": 1248
},
{
"epoch": 10.6,
"learning_rate": 0.00012807407407407407,
"loss": 0.1335,
"step": 1272
},
{
"epoch": 10.8,
"learning_rate": 0.0001262962962962963,
"loss": 0.1401,
"step": 1296
},
{
"epoch": 11.0,
"learning_rate": 0.00012451851851851852,
"loss": 0.14,
"step": 1320
},
{
"epoch": 11.2,
"learning_rate": 0.00012274074074074075,
"loss": 0.0996,
"step": 1344
},
{
"epoch": 11.4,
"learning_rate": 0.00012096296296296296,
"loss": 0.1093,
"step": 1368
},
{
"epoch": 11.6,
"learning_rate": 0.0001191851851851852,
"loss": 0.1096,
"step": 1392
},
{
"epoch": 11.8,
"learning_rate": 0.00011740740740740741,
"loss": 0.1137,
"step": 1416
},
{
"epoch": 12.0,
"learning_rate": 0.00011562962962962964,
"loss": 0.1187,
"step": 1440
},
{
"epoch": 12.2,
"learning_rate": 0.00011385185185185185,
"loss": 0.0852,
"step": 1464
},
{
"epoch": 12.4,
"learning_rate": 0.00011207407407407409,
"loss": 0.0901,
"step": 1488
},
{
"epoch": 12.6,
"learning_rate": 0.0001102962962962963,
"loss": 0.0909,
"step": 1512
},
{
"epoch": 12.8,
"learning_rate": 0.00010851851851851853,
"loss": 0.094,
"step": 1536
},
{
"epoch": 13.0,
"learning_rate": 0.00010674074074074074,
"loss": 0.1011,
"step": 1560
},
{
"epoch": 13.2,
"learning_rate": 0.00010496296296296298,
"loss": 0.0703,
"step": 1584
},
{
"epoch": 13.4,
"learning_rate": 0.00010318518518518519,
"loss": 0.0726,
"step": 1608
},
{
"epoch": 13.6,
"learning_rate": 0.00010140740740740741,
"loss": 0.0769,
"step": 1632
},
{
"epoch": 13.8,
"learning_rate": 9.962962962962963e-05,
"loss": 0.0787,
"step": 1656
},
{
"epoch": 14.0,
"learning_rate": 9.785185185185186e-05,
"loss": 0.0848,
"step": 1680
},
{
"epoch": 14.2,
"learning_rate": 9.607407407407408e-05,
"loss": 0.0604,
"step": 1704
},
{
"epoch": 14.4,
"learning_rate": 9.42962962962963e-05,
"loss": 0.0586,
"step": 1728
},
{
"epoch": 14.6,
"learning_rate": 9.251851851851852e-05,
"loss": 0.0616,
"step": 1752
},
{
"epoch": 14.8,
"learning_rate": 9.074074074074075e-05,
"loss": 0.0663,
"step": 1776
},
{
"epoch": 15.0,
"learning_rate": 8.896296296296297e-05,
"loss": 0.0651,
"step": 1800
},
{
"epoch": 15.2,
"learning_rate": 8.718518518518519e-05,
"loss": 0.0479,
"step": 1824
},
{
"epoch": 15.4,
"learning_rate": 8.540740740740742e-05,
"loss": 0.05,
"step": 1848
},
{
"epoch": 15.6,
"learning_rate": 8.362962962962964e-05,
"loss": 0.0521,
"step": 1872
},
{
"epoch": 15.8,
"learning_rate": 8.185185185185186e-05,
"loss": 0.0523,
"step": 1896
},
{
"epoch": 16.0,
"learning_rate": 8.007407407407408e-05,
"loss": 0.0535,
"step": 1920
},
{
"epoch": 16.2,
"learning_rate": 7.82962962962963e-05,
"loss": 0.0402,
"step": 1944
},
{
"epoch": 16.4,
"learning_rate": 7.651851851851853e-05,
"loss": 0.0406,
"step": 1968
},
{
"epoch": 16.6,
"learning_rate": 7.474074074074074e-05,
"loss": 0.0426,
"step": 1992
},
{
"epoch": 16.8,
"learning_rate": 7.296296296296296e-05,
"loss": 0.0414,
"step": 2016
},
{
"epoch": 17.0,
"learning_rate": 7.118518518518518e-05,
"loss": 0.0438,
"step": 2040
},
{
"epoch": 17.2,
"learning_rate": 6.94074074074074e-05,
"loss": 0.0336,
"step": 2064
},
{
"epoch": 17.4,
"learning_rate": 6.762962962962963e-05,
"loss": 0.0332,
"step": 2088
},
{
"epoch": 17.6,
"learning_rate": 6.585185185185185e-05,
"loss": 0.0342,
"step": 2112
},
{
"epoch": 17.8,
"learning_rate": 6.407407407407407e-05,
"loss": 0.0342,
"step": 2136
},
{
"epoch": 18.0,
"learning_rate": 6.22962962962963e-05,
"loss": 0.0356,
"step": 2160
},
{
"epoch": 18.2,
"learning_rate": 6.051851851851852e-05,
"loss": 0.0259,
"step": 2184
},
{
"epoch": 18.4,
"learning_rate": 5.874074074074074e-05,
"loss": 0.0269,
"step": 2208
},
{
"epoch": 18.6,
"learning_rate": 5.6962962962962965e-05,
"loss": 0.0282,
"step": 2232
},
{
"epoch": 18.8,
"learning_rate": 5.518518518518519e-05,
"loss": 0.0291,
"step": 2256
},
{
"epoch": 19.0,
"learning_rate": 5.34074074074074e-05,
"loss": 0.0297,
"step": 2280
},
{
"epoch": 19.2,
"learning_rate": 5.1629629629629626e-05,
"loss": 0.0236,
"step": 2304
},
{
"epoch": 19.4,
"learning_rate": 4.9851851851851855e-05,
"loss": 0.0234,
"step": 2328
},
{
"epoch": 19.6,
"learning_rate": 4.807407407407408e-05,
"loss": 0.0233,
"step": 2352
},
{
"epoch": 19.8,
"learning_rate": 4.62962962962963e-05,
"loss": 0.0231,
"step": 2376
},
{
"epoch": 20.0,
"learning_rate": 4.4518518518518523e-05,
"loss": 0.0231,
"step": 2400
},
{
"epoch": 20.2,
"learning_rate": 4.274074074074074e-05,
"loss": 0.0183,
"step": 2424
},
{
"epoch": 20.4,
"learning_rate": 4.096296296296296e-05,
"loss": 0.0196,
"step": 2448
},
{
"epoch": 20.6,
"learning_rate": 3.9185185185185185e-05,
"loss": 0.0197,
"step": 2472
},
{
"epoch": 20.8,
"learning_rate": 3.740740740740741e-05,
"loss": 0.0198,
"step": 2496
},
{
"epoch": 21.0,
"learning_rate": 3.562962962962963e-05,
"loss": 0.0196,
"step": 2520
},
{
"epoch": 21.2,
"learning_rate": 3.385185185185185e-05,
"loss": 0.0162,
"step": 2544
},
{
"epoch": 21.4,
"learning_rate": 3.2074074074074075e-05,
"loss": 0.016,
"step": 2568
},
{
"epoch": 21.6,
"learning_rate": 3.02962962962963e-05,
"loss": 0.0166,
"step": 2592
},
{
"epoch": 21.8,
"learning_rate": 2.851851851851852e-05,
"loss": 0.0168,
"step": 2616
},
{
"epoch": 22.0,
"learning_rate": 2.6740740740740743e-05,
"loss": 0.0167,
"step": 2640
},
{
"epoch": 22.2,
"learning_rate": 2.4962962962962963e-05,
"loss": 0.0145,
"step": 2664
},
{
"epoch": 22.4,
"learning_rate": 2.318518518518519e-05,
"loss": 0.014,
"step": 2688
},
{
"epoch": 22.6,
"learning_rate": 2.1407407407407408e-05,
"loss": 0.0135,
"step": 2712
},
{
"epoch": 22.8,
"learning_rate": 1.962962962962963e-05,
"loss": 0.014,
"step": 2736
},
{
"epoch": 23.0,
"learning_rate": 1.7851851851851853e-05,
"loss": 0.0144,
"step": 2760
},
{
"epoch": 23.2,
"learning_rate": 1.6074074074074076e-05,
"loss": 0.0123,
"step": 2784
},
{
"epoch": 23.4,
"learning_rate": 1.4296296296296297e-05,
"loss": 0.0128,
"step": 2808
},
{
"epoch": 23.6,
"learning_rate": 1.2518518518518518e-05,
"loss": 0.0128,
"step": 2832
},
{
"epoch": 23.8,
"learning_rate": 1.074074074074074e-05,
"loss": 0.0124,
"step": 2856
},
{
"epoch": 24.0,
"learning_rate": 8.962962962962963e-06,
"loss": 0.0125,
"step": 2880
},
{
"epoch": 24.2,
"learning_rate": 7.185185185185185e-06,
"loss": 0.0113,
"step": 2904
},
{
"epoch": 24.4,
"learning_rate": 5.407407407407407e-06,
"loss": 0.0112,
"step": 2928
},
{
"epoch": 24.6,
"learning_rate": 3.6296296296296302e-06,
"loss": 0.0114,
"step": 2952
},
{
"epoch": 24.8,
"learning_rate": 1.8518518518518519e-06,
"loss": 0.0118,
"step": 2976
},
{
"epoch": 25.0,
"learning_rate": 7.407407407407407e-08,
"loss": 0.0116,
"step": 3000
}
],
"max_steps": 3000,
"num_train_epochs": 25,
"total_flos": 4.87764705411072e+17,
"trial_name": null,
"trial_params": null
}