german-qg-t5-quad / trainer_state.json
dehio's picture
Initial upload
254eb3c
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 9.998784511199862,
"global_step": 7190,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.14,
"learning_rate": 9.860917941585536e-05,
"loss": 1.7018,
"step": 100
},
{
"epoch": 0.28,
"learning_rate": 9.721835883171071e-05,
"loss": 1.5678,
"step": 200
},
{
"epoch": 0.42,
"learning_rate": 9.582753824756607e-05,
"loss": 1.5333,
"step": 300
},
{
"epoch": 0.56,
"learning_rate": 9.443671766342142e-05,
"loss": 1.5235,
"step": 400
},
{
"epoch": 0.69,
"learning_rate": 9.304589707927678e-05,
"loss": 1.4892,
"step": 500
},
{
"epoch": 0.83,
"learning_rate": 9.165507649513213e-05,
"loss": 1.4781,
"step": 600
},
{
"epoch": 0.97,
"learning_rate": 9.026425591098748e-05,
"loss": 1.4319,
"step": 700
},
{
"epoch": 1.11,
"learning_rate": 8.887343532684285e-05,
"loss": 1.3011,
"step": 800
},
{
"epoch": 1.25,
"learning_rate": 8.74826147426982e-05,
"loss": 1.2885,
"step": 900
},
{
"epoch": 1.39,
"learning_rate": 8.609179415855355e-05,
"loss": 1.2524,
"step": 1000
},
{
"epoch": 1.53,
"learning_rate": 8.47009735744089e-05,
"loss": 1.2674,
"step": 1100
},
{
"epoch": 1.67,
"learning_rate": 8.331015299026426e-05,
"loss": 1.2533,
"step": 1200
},
{
"epoch": 1.81,
"learning_rate": 8.191933240611962e-05,
"loss": 1.268,
"step": 1300
},
{
"epoch": 1.95,
"learning_rate": 8.052851182197498e-05,
"loss": 1.2499,
"step": 1400
},
{
"epoch": 2.09,
"learning_rate": 7.913769123783032e-05,
"loss": 1.1782,
"step": 1500
},
{
"epoch": 2.23,
"learning_rate": 7.774687065368567e-05,
"loss": 1.1113,
"step": 1600
},
{
"epoch": 2.36,
"learning_rate": 7.635605006954103e-05,
"loss": 1.1167,
"step": 1700
},
{
"epoch": 2.5,
"learning_rate": 7.496522948539638e-05,
"loss": 1.1422,
"step": 1800
},
{
"epoch": 2.64,
"learning_rate": 7.357440890125175e-05,
"loss": 1.118,
"step": 1900
},
{
"epoch": 2.78,
"learning_rate": 7.21835883171071e-05,
"loss": 1.1133,
"step": 2000
},
{
"epoch": 2.92,
"learning_rate": 7.079276773296244e-05,
"loss": 1.1058,
"step": 2100
},
{
"epoch": 3.06,
"learning_rate": 6.94019471488178e-05,
"loss": 1.0747,
"step": 2200
},
{
"epoch": 3.2,
"learning_rate": 6.801112656467315e-05,
"loss": 1.0059,
"step": 2300
},
{
"epoch": 3.34,
"learning_rate": 6.662030598052852e-05,
"loss": 0.9988,
"step": 2400
},
{
"epoch": 3.48,
"learning_rate": 6.522948539638388e-05,
"loss": 1.0111,
"step": 2500
},
{
"epoch": 3.62,
"learning_rate": 6.383866481223923e-05,
"loss": 1.0237,
"step": 2600
},
{
"epoch": 3.75,
"learning_rate": 6.244784422809457e-05,
"loss": 1.0011,
"step": 2700
},
{
"epoch": 3.89,
"learning_rate": 6.105702364394992e-05,
"loss": 1.0133,
"step": 2800
},
{
"epoch": 4.03,
"learning_rate": 5.966620305980529e-05,
"loss": 0.9724,
"step": 2900
},
{
"epoch": 4.17,
"learning_rate": 5.827538247566065e-05,
"loss": 0.9005,
"step": 3000
},
{
"epoch": 4.31,
"learning_rate": 5.6884561891515995e-05,
"loss": 0.9264,
"step": 3100
},
{
"epoch": 4.45,
"learning_rate": 5.549374130737135e-05,
"loss": 0.933,
"step": 3200
},
{
"epoch": 4.59,
"learning_rate": 5.41029207232267e-05,
"loss": 0.9283,
"step": 3300
},
{
"epoch": 4.73,
"learning_rate": 5.2712100139082064e-05,
"loss": 0.9033,
"step": 3400
},
{
"epoch": 4.87,
"learning_rate": 5.132127955493742e-05,
"loss": 0.9139,
"step": 3500
},
{
"epoch": 5.01,
"learning_rate": 4.993045897079277e-05,
"loss": 0.9383,
"step": 3600
},
{
"epoch": 5.15,
"learning_rate": 4.853963838664812e-05,
"loss": 0.8268,
"step": 3700
},
{
"epoch": 5.28,
"learning_rate": 4.714881780250348e-05,
"loss": 0.841,
"step": 3800
},
{
"epoch": 5.42,
"learning_rate": 4.5757997218358836e-05,
"loss": 0.8594,
"step": 3900
},
{
"epoch": 5.56,
"learning_rate": 4.436717663421418e-05,
"loss": 0.8346,
"step": 4000
},
{
"epoch": 5.7,
"learning_rate": 4.2976356050069544e-05,
"loss": 0.8626,
"step": 4100
},
{
"epoch": 5.84,
"learning_rate": 4.15855354659249e-05,
"loss": 0.8691,
"step": 4200
},
{
"epoch": 5.98,
"learning_rate": 4.019471488178025e-05,
"loss": 0.8424,
"step": 4300
},
{
"epoch": 6.12,
"learning_rate": 3.880389429763561e-05,
"loss": 0.793,
"step": 4400
},
{
"epoch": 6.26,
"learning_rate": 3.741307371349096e-05,
"loss": 0.8025,
"step": 4500
},
{
"epoch": 6.4,
"learning_rate": 3.6022253129346316e-05,
"loss": 0.8245,
"step": 4600
},
{
"epoch": 6.54,
"learning_rate": 3.463143254520167e-05,
"loss": 0.797,
"step": 4700
},
{
"epoch": 6.68,
"learning_rate": 3.3240611961057024e-05,
"loss": 0.8063,
"step": 4800
},
{
"epoch": 6.81,
"learning_rate": 3.184979137691238e-05,
"loss": 0.8042,
"step": 4900
},
{
"epoch": 6.95,
"learning_rate": 3.0458970792767733e-05,
"loss": 0.8068,
"step": 5000
},
{
"epoch": 7.09,
"learning_rate": 2.906815020862309e-05,
"loss": 0.7873,
"step": 5100
},
{
"epoch": 7.23,
"learning_rate": 2.767732962447844e-05,
"loss": 0.7495,
"step": 5200
},
{
"epoch": 7.37,
"learning_rate": 2.6286509040333796e-05,
"loss": 0.7538,
"step": 5300
},
{
"epoch": 7.51,
"learning_rate": 2.4895688456189153e-05,
"loss": 0.7631,
"step": 5400
},
{
"epoch": 7.65,
"learning_rate": 2.3504867872044508e-05,
"loss": 0.7532,
"step": 5500
},
{
"epoch": 7.79,
"learning_rate": 2.2114047287899862e-05,
"loss": 0.7772,
"step": 5600
},
{
"epoch": 7.93,
"learning_rate": 2.0723226703755216e-05,
"loss": 0.7691,
"step": 5700
},
{
"epoch": 8.07,
"learning_rate": 1.933240611961057e-05,
"loss": 0.7544,
"step": 5800
},
{
"epoch": 8.21,
"learning_rate": 1.7941585535465928e-05,
"loss": 0.7243,
"step": 5900
},
{
"epoch": 8.34,
"learning_rate": 1.655076495132128e-05,
"loss": 0.7308,
"step": 6000
},
{
"epoch": 8.48,
"learning_rate": 1.5159944367176635e-05,
"loss": 0.7115,
"step": 6100
},
{
"epoch": 8.62,
"learning_rate": 1.376912378303199e-05,
"loss": 0.7275,
"step": 6200
},
{
"epoch": 8.76,
"learning_rate": 1.2378303198887344e-05,
"loss": 0.7279,
"step": 6300
},
{
"epoch": 8.9,
"learning_rate": 1.0987482614742698e-05,
"loss": 0.7497,
"step": 6400
},
{
"epoch": 9.04,
"learning_rate": 9.596662030598054e-06,
"loss": 0.7163,
"step": 6500
},
{
"epoch": 9.18,
"learning_rate": 8.205841446453408e-06,
"loss": 0.7047,
"step": 6600
},
{
"epoch": 9.32,
"learning_rate": 6.815020862308763e-06,
"loss": 0.7089,
"step": 6700
},
{
"epoch": 9.46,
"learning_rate": 5.424200278164117e-06,
"loss": 0.7078,
"step": 6800
},
{
"epoch": 9.6,
"learning_rate": 4.033379694019471e-06,
"loss": 0.7065,
"step": 6900
},
{
"epoch": 9.73,
"learning_rate": 2.6425591098748263e-06,
"loss": 0.7017,
"step": 7000
},
{
"epoch": 9.87,
"learning_rate": 1.2517385257301808e-06,
"loss": 0.6855,
"step": 7100
},
{
"epoch": 10.0,
"step": 7190,
"total_flos": 6.20936877252096e+16,
"train_loss": 0.9689414988638463,
"train_runtime": 6282.5026,
"train_samples_per_second": 18.333,
"train_steps_per_second": 1.144
}
],
"max_steps": 7190,
"num_train_epochs": 10,
"total_flos": 6.20936877252096e+16,
"trial_name": null,
"trial_params": null
}