FinQa-llama3 / checkpoint-870 /trainer_state.json
zaursamedov1's picture
Upload folder using huggingface_hub
264834a verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 5.0,
"eval_steps": 500,
"global_step": 870,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.14367816091954022,
"grad_norm": 1.424246072769165,
"learning_rate": 5.517241379310345e-05,
"loss": 2.0273,
"step": 25
},
{
"epoch": 0.28735632183908044,
"grad_norm": 0.9430229663848877,
"learning_rate": 0.00011264367816091954,
"loss": 1.8285,
"step": 50
},
{
"epoch": 0.43103448275862066,
"grad_norm": 0.7553120851516724,
"learning_rate": 0.00017011494252873563,
"loss": 1.6756,
"step": 75
},
{
"epoch": 0.5747126436781609,
"grad_norm": 0.7407281398773193,
"learning_rate": 0.00019693486590038314,
"loss": 1.5606,
"step": 100
},
{
"epoch": 0.7183908045977011,
"grad_norm": 0.7509495615959167,
"learning_rate": 0.0001905491698595147,
"loss": 1.5438,
"step": 125
},
{
"epoch": 0.8620689655172413,
"grad_norm": 0.7058537602424622,
"learning_rate": 0.00018416347381864626,
"loss": 1.4816,
"step": 150
},
{
"epoch": 1.0057471264367817,
"grad_norm": 0.6202262043952942,
"learning_rate": 0.00017777777777777779,
"loss": 1.5232,
"step": 175
},
{
"epoch": 1.1494252873563218,
"grad_norm": 0.7314026355743408,
"learning_rate": 0.00017139208173690932,
"loss": 1.4954,
"step": 200
},
{
"epoch": 1.293103448275862,
"grad_norm": 0.7846217155456543,
"learning_rate": 0.00016500638569604087,
"loss": 1.4199,
"step": 225
},
{
"epoch": 1.4367816091954024,
"grad_norm": 0.751372754573822,
"learning_rate": 0.00015862068965517243,
"loss": 1.48,
"step": 250
},
{
"epoch": 1.5804597701149425,
"grad_norm": 0.7667829990386963,
"learning_rate": 0.00015223499361430396,
"loss": 1.4346,
"step": 275
},
{
"epoch": 1.7241379310344827,
"grad_norm": 0.7370414733886719,
"learning_rate": 0.00014584929757343552,
"loss": 1.4334,
"step": 300
},
{
"epoch": 1.867816091954023,
"grad_norm": 0.9422834515571594,
"learning_rate": 0.00013946360153256705,
"loss": 1.3969,
"step": 325
},
{
"epoch": 2.0114942528735633,
"grad_norm": 0.7596230506896973,
"learning_rate": 0.0001330779054916986,
"loss": 1.4241,
"step": 350
},
{
"epoch": 2.1551724137931036,
"grad_norm": 0.9657158851623535,
"learning_rate": 0.00012669220945083016,
"loss": 1.3115,
"step": 375
},
{
"epoch": 2.2988505747126435,
"grad_norm": 0.9675273895263672,
"learning_rate": 0.00012030651340996169,
"loss": 1.3382,
"step": 400
},
{
"epoch": 2.442528735632184,
"grad_norm": 0.9593296647071838,
"learning_rate": 0.00011392081736909323,
"loss": 1.3843,
"step": 425
},
{
"epoch": 2.586206896551724,
"grad_norm": 0.8661421537399292,
"learning_rate": 0.00010753512132822479,
"loss": 1.3161,
"step": 450
},
{
"epoch": 2.7298850574712645,
"grad_norm": 0.9521291851997375,
"learning_rate": 0.00010114942528735633,
"loss": 1.3355,
"step": 475
},
{
"epoch": 2.873563218390805,
"grad_norm": 1.0091650485992432,
"learning_rate": 9.476372924648788e-05,
"loss": 1.3052,
"step": 500
},
{
"epoch": 3.0172413793103448,
"grad_norm": 0.9030539989471436,
"learning_rate": 8.837803320561942e-05,
"loss": 1.2682,
"step": 525
},
{
"epoch": 3.160919540229885,
"grad_norm": 1.0906364917755127,
"learning_rate": 8.199233716475096e-05,
"loss": 1.2097,
"step": 550
},
{
"epoch": 3.3045977011494254,
"grad_norm": 1.1765050888061523,
"learning_rate": 7.56066411238825e-05,
"loss": 1.2564,
"step": 575
},
{
"epoch": 3.4482758620689653,
"grad_norm": 1.1693239212036133,
"learning_rate": 6.922094508301405e-05,
"loss": 1.2207,
"step": 600
},
{
"epoch": 3.5919540229885056,
"grad_norm": 1.0836124420166016,
"learning_rate": 6.283524904214559e-05,
"loss": 1.2338,
"step": 625
},
{
"epoch": 3.735632183908046,
"grad_norm": 1.0297603607177734,
"learning_rate": 5.644955300127714e-05,
"loss": 1.2179,
"step": 650
},
{
"epoch": 3.8793103448275863,
"grad_norm": 1.3402975797653198,
"learning_rate": 5.0063856960408687e-05,
"loss": 1.2363,
"step": 675
},
{
"epoch": 4.022988505747127,
"grad_norm": 1.1158450841903687,
"learning_rate": 4.367816091954024e-05,
"loss": 1.1844,
"step": 700
},
{
"epoch": 4.166666666666667,
"grad_norm": 1.3297624588012695,
"learning_rate": 3.729246487867178e-05,
"loss": 1.1176,
"step": 725
},
{
"epoch": 4.310344827586207,
"grad_norm": 1.3470237255096436,
"learning_rate": 3.090676883780332e-05,
"loss": 1.1639,
"step": 750
},
{
"epoch": 4.454022988505747,
"grad_norm": 1.293717861175537,
"learning_rate": 2.4521072796934867e-05,
"loss": 1.1668,
"step": 775
},
{
"epoch": 4.597701149425287,
"grad_norm": 1.400448203086853,
"learning_rate": 1.8135376756066413e-05,
"loss": 1.1751,
"step": 800
},
{
"epoch": 4.741379310344827,
"grad_norm": 1.1660029888153076,
"learning_rate": 1.1749680715197957e-05,
"loss": 1.1406,
"step": 825
},
{
"epoch": 4.885057471264368,
"grad_norm": 1.311295747756958,
"learning_rate": 5.3639846743295025e-06,
"loss": 1.1406,
"step": 850
}
],
"logging_steps": 25,
"max_steps": 870,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 500,
"total_flos": 1.6060942324334592e+17,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}