FlexingD's picture
Upload 8 files
adae161
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.17173278378842521,
"eval_steps": 50,
"global_step": 1000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 2.3773773773773775e-05,
"loss": 0.9928,
"step": 50
},
{
"epoch": 0.01,
"eval_loss": 0.8941472172737122,
"eval_runtime": 5245.4303,
"eval_samples_per_second": 0.987,
"eval_steps_per_second": 0.123,
"step": 50
},
{
"epoch": 0.02,
"learning_rate": 2.2522522522522523e-05,
"loss": 0.877,
"step": 100
},
{
"epoch": 0.02,
"eval_loss": 0.8646272420883179,
"eval_runtime": 5219.884,
"eval_samples_per_second": 0.992,
"eval_steps_per_second": 0.124,
"step": 100
},
{
"epoch": 0.03,
"learning_rate": 2.1271271271271275e-05,
"loss": 0.8642,
"step": 150
},
{
"epoch": 0.03,
"eval_loss": 0.8599761128425598,
"eval_runtime": 5214.4761,
"eval_samples_per_second": 0.993,
"eval_steps_per_second": 0.124,
"step": 150
},
{
"epoch": 0.03,
"learning_rate": 2.0020020020020023e-05,
"loss": 0.8576,
"step": 200
},
{
"epoch": 0.03,
"eval_loss": 0.8569617867469788,
"eval_runtime": 5214.0664,
"eval_samples_per_second": 0.993,
"eval_steps_per_second": 0.124,
"step": 200
},
{
"epoch": 0.04,
"learning_rate": 1.8768768768768768e-05,
"loss": 0.8349,
"step": 250
},
{
"epoch": 0.04,
"eval_loss": 0.8535052537918091,
"eval_runtime": 5222.9322,
"eval_samples_per_second": 0.991,
"eval_steps_per_second": 0.124,
"step": 250
},
{
"epoch": 0.05,
"learning_rate": 1.7517517517517516e-05,
"loss": 0.85,
"step": 300
},
{
"epoch": 0.05,
"eval_loss": 0.8515381813049316,
"eval_runtime": 5214.0267,
"eval_samples_per_second": 0.993,
"eval_steps_per_second": 0.124,
"step": 300
},
{
"epoch": 0.06,
"learning_rate": 1.6266266266266268e-05,
"loss": 0.8548,
"step": 350
},
{
"epoch": 0.06,
"eval_loss": 0.8497709035873413,
"eval_runtime": 5213.6211,
"eval_samples_per_second": 0.993,
"eval_steps_per_second": 0.124,
"step": 350
},
{
"epoch": 0.07,
"learning_rate": 1.5015015015015016e-05,
"loss": 0.8366,
"step": 400
},
{
"epoch": 0.07,
"eval_loss": 0.8475283980369568,
"eval_runtime": 5213.1999,
"eval_samples_per_second": 0.993,
"eval_steps_per_second": 0.124,
"step": 400
},
{
"epoch": 0.08,
"learning_rate": 1.3763763763763765e-05,
"loss": 0.8408,
"step": 450
},
{
"epoch": 0.08,
"eval_loss": 0.8464268445968628,
"eval_runtime": 5213.2855,
"eval_samples_per_second": 0.993,
"eval_steps_per_second": 0.124,
"step": 450
},
{
"epoch": 0.09,
"learning_rate": 1.2512512512512515e-05,
"loss": 0.8645,
"step": 500
},
{
"epoch": 0.09,
"eval_loss": 0.8458148241043091,
"eval_runtime": 5213.7104,
"eval_samples_per_second": 0.993,
"eval_steps_per_second": 0.124,
"step": 500
},
{
"epoch": 0.09,
"learning_rate": 1.1261261261261261e-05,
"loss": 0.8507,
"step": 550
},
{
"epoch": 0.09,
"eval_loss": 0.8435949087142944,
"eval_runtime": 5210.7804,
"eval_samples_per_second": 0.993,
"eval_steps_per_second": 0.124,
"step": 550
},
{
"epoch": 0.1,
"learning_rate": 1.0010010010010011e-05,
"loss": 0.8592,
"step": 600
},
{
"epoch": 0.1,
"eval_loss": 0.8434337973594666,
"eval_runtime": 5210.4901,
"eval_samples_per_second": 0.993,
"eval_steps_per_second": 0.124,
"step": 600
},
{
"epoch": 0.11,
"learning_rate": 8.758758758758758e-06,
"loss": 0.8326,
"step": 650
},
{
"epoch": 0.11,
"eval_loss": 0.8415650129318237,
"eval_runtime": 5212.5986,
"eval_samples_per_second": 0.993,
"eval_steps_per_second": 0.124,
"step": 650
},
{
"epoch": 0.12,
"learning_rate": 7.507507507507508e-06,
"loss": 0.8272,
"step": 700
},
{
"epoch": 0.12,
"eval_loss": 0.8408710360527039,
"eval_runtime": 5217.8607,
"eval_samples_per_second": 0.992,
"eval_steps_per_second": 0.124,
"step": 700
},
{
"epoch": 0.13,
"learning_rate": 6.256256256256257e-06,
"loss": 0.82,
"step": 750
},
{
"epoch": 0.13,
"eval_loss": 0.8401119709014893,
"eval_runtime": 5220.4921,
"eval_samples_per_second": 0.991,
"eval_steps_per_second": 0.124,
"step": 750
},
{
"epoch": 0.14,
"learning_rate": 5.005005005005006e-06,
"loss": 0.826,
"step": 800
},
{
"epoch": 0.14,
"eval_loss": 0.8393945097923279,
"eval_runtime": 5287.3794,
"eval_samples_per_second": 0.979,
"eval_steps_per_second": 0.122,
"step": 800
},
{
"epoch": 0.15,
"learning_rate": 3.753753753753754e-06,
"loss": 0.8468,
"step": 850
},
{
"epoch": 0.15,
"eval_loss": 0.8389515280723572,
"eval_runtime": 5212.07,
"eval_samples_per_second": 0.993,
"eval_steps_per_second": 0.124,
"step": 850
},
{
"epoch": 0.15,
"learning_rate": 2.502502502502503e-06,
"loss": 0.8438,
"step": 900
},
{
"epoch": 0.15,
"eval_loss": 0.8384743928909302,
"eval_runtime": 5211.9686,
"eval_samples_per_second": 0.993,
"eval_steps_per_second": 0.124,
"step": 900
},
{
"epoch": 0.16,
"learning_rate": 1.2512512512512514e-06,
"loss": 0.8384,
"step": 950
},
{
"epoch": 0.16,
"eval_loss": 0.838046669960022,
"eval_runtime": 5209.2343,
"eval_samples_per_second": 0.994,
"eval_steps_per_second": 0.124,
"step": 950
},
{
"epoch": 0.17,
"learning_rate": 0.0,
"loss": 0.8527,
"step": 1000
},
{
"epoch": 0.17,
"eval_loss": 0.8379368782043457,
"eval_runtime": 5209.3452,
"eval_samples_per_second": 0.994,
"eval_steps_per_second": 0.124,
"step": 1000
}
],
"logging_steps": 50,
"max_steps": 1000,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 50,
"total_flos": 1.76841553870848e+17,
"trial_name": null,
"trial_params": null
}