clip-roberta-finetuned / trainer_state.json
sharkMeow's picture
End of training
bf91d62 verified
raw
history blame
4.45 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 100.0,
"eval_steps": 300,
"global_step": 3000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 10.0,
"grad_norm": 10.267565727233887,
"learning_rate": 4.5e-05,
"loss": 2.6587,
"step": 300
},
{
"epoch": 10.0,
"eval_loss": 2.672071695327759,
"eval_runtime": 2.1303,
"eval_samples_per_second": 336.103,
"eval_steps_per_second": 3.755,
"step": 300
},
{
"epoch": 20.0,
"grad_norm": 5.43134069442749,
"learning_rate": 4e-05,
"loss": 0.5242,
"step": 600
},
{
"epoch": 20.0,
"eval_loss": 1.995083212852478,
"eval_runtime": 2.183,
"eval_samples_per_second": 327.995,
"eval_steps_per_second": 3.665,
"step": 600
},
{
"epoch": 30.0,
"grad_norm": 2.2661261558532715,
"learning_rate": 3.5e-05,
"loss": 0.1995,
"step": 900
},
{
"epoch": 30.0,
"eval_loss": 1.776659369468689,
"eval_runtime": 2.105,
"eval_samples_per_second": 340.146,
"eval_steps_per_second": 3.801,
"step": 900
},
{
"epoch": 40.0,
"grad_norm": 1.7297999858856201,
"learning_rate": 3e-05,
"loss": 0.1025,
"step": 1200
},
{
"epoch": 40.0,
"eval_loss": 1.6002683639526367,
"eval_runtime": 2.1049,
"eval_samples_per_second": 340.159,
"eval_steps_per_second": 3.801,
"step": 1200
},
{
"epoch": 50.0,
"grad_norm": 1.119903326034546,
"learning_rate": 2.5e-05,
"loss": 0.0609,
"step": 1500
},
{
"epoch": 50.0,
"eval_loss": 1.5019861459732056,
"eval_runtime": 2.0694,
"eval_samples_per_second": 345.998,
"eval_steps_per_second": 3.866,
"step": 1500
},
{
"epoch": 60.0,
"grad_norm": 0.4384348690509796,
"learning_rate": 2e-05,
"loss": 0.042,
"step": 1800
},
{
"epoch": 60.0,
"eval_loss": 1.3371723890304565,
"eval_runtime": 2.1069,
"eval_samples_per_second": 339.838,
"eval_steps_per_second": 3.797,
"step": 1800
},
{
"epoch": 70.0,
"grad_norm": 0.4751300811767578,
"learning_rate": 1.5e-05,
"loss": 0.0315,
"step": 2100
},
{
"epoch": 70.0,
"eval_loss": 1.3104065656661987,
"eval_runtime": 2.0197,
"eval_samples_per_second": 354.506,
"eval_steps_per_second": 3.961,
"step": 2100
},
{
"epoch": 80.0,
"grad_norm": 1.0900623798370361,
"learning_rate": 1e-05,
"loss": 0.0271,
"step": 2400
},
{
"epoch": 80.0,
"eval_loss": 1.2714661359786987,
"eval_runtime": 2.1021,
"eval_samples_per_second": 340.608,
"eval_steps_per_second": 3.806,
"step": 2400
},
{
"epoch": 90.0,
"grad_norm": 0.31860601902008057,
"learning_rate": 5e-06,
"loss": 0.0212,
"step": 2700
},
{
"epoch": 90.0,
"eval_loss": 1.2446495294570923,
"eval_runtime": 2.1073,
"eval_samples_per_second": 339.765,
"eval_steps_per_second": 3.796,
"step": 2700
},
{
"epoch": 100.0,
"grad_norm": 0.22674699127674103,
"learning_rate": 0.0,
"loss": 0.0202,
"step": 3000
},
{
"epoch": 100.0,
"eval_loss": 1.237874984741211,
"eval_runtime": 2.1055,
"eval_samples_per_second": 340.065,
"eval_steps_per_second": 3.8,
"step": 3000
},
{
"epoch": 100.0,
"step": 3000,
"total_flos": 3.1668214733568e+16,
"train_loss": 0.36876122029622393,
"train_runtime": 1639.2452,
"train_samples_per_second": 146.043,
"train_steps_per_second": 1.83
}
],
"logging_steps": 300,
"max_steps": 3000,
"num_input_tokens_seen": 0,
"num_train_epochs": 100,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 3.1668214733568e+16,
"train_batch_size": 80,
"trial_name": null,
"trial_params": null
}