dimasik1987's picture
Training in progress, step 25, checkpoint
577ba1f verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.05859947260474656,
"eval_steps": 3,
"global_step": 25,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0023439789041898623,
"grad_norm": 30.85260009765625,
"learning_rate": 5e-05,
"loss": 6.6922,
"step": 1
},
{
"epoch": 0.0023439789041898623,
"eval_loss": 7.640185832977295,
"eval_runtime": 375.7713,
"eval_samples_per_second": 1.913,
"eval_steps_per_second": 0.479,
"step": 1
},
{
"epoch": 0.004687957808379725,
"grad_norm": 32.24323272705078,
"learning_rate": 0.0001,
"loss": 7.3599,
"step": 2
},
{
"epoch": 0.007031936712569587,
"grad_norm": 35.10329818725586,
"learning_rate": 9.953429730181653e-05,
"loss": 4.3299,
"step": 3
},
{
"epoch": 0.007031936712569587,
"eval_loss": 1.007381796836853,
"eval_runtime": 377.1204,
"eval_samples_per_second": 1.907,
"eval_steps_per_second": 0.477,
"step": 3
},
{
"epoch": 0.00937591561675945,
"grad_norm": 12.174251556396484,
"learning_rate": 9.814586436738998e-05,
"loss": 1.0876,
"step": 4
},
{
"epoch": 0.011719894520949311,
"grad_norm": 11.167458534240723,
"learning_rate": 9.586056507527266e-05,
"loss": 0.618,
"step": 5
},
{
"epoch": 0.014063873425139173,
"grad_norm": 16.5855770111084,
"learning_rate": 9.272097022732443e-05,
"loss": 0.4434,
"step": 6
},
{
"epoch": 0.014063873425139173,
"eval_loss": 0.5613827109336853,
"eval_runtime": 377.1269,
"eval_samples_per_second": 1.907,
"eval_steps_per_second": 0.477,
"step": 6
},
{
"epoch": 0.016407852329329037,
"grad_norm": 2.6942837238311768,
"learning_rate": 8.8785564535221e-05,
"loss": 0.3475,
"step": 7
},
{
"epoch": 0.0187518312335189,
"grad_norm": 4.204878807067871,
"learning_rate": 8.412765716093272e-05,
"loss": 0.4269,
"step": 8
},
{
"epoch": 0.02109581013770876,
"grad_norm": 8.653458595275879,
"learning_rate": 7.883401610574336e-05,
"loss": 0.8488,
"step": 9
},
{
"epoch": 0.02109581013770876,
"eval_loss": 0.46133139729499817,
"eval_runtime": 376.6374,
"eval_samples_per_second": 1.909,
"eval_steps_per_second": 0.478,
"step": 9
},
{
"epoch": 0.023439789041898623,
"grad_norm": 6.0319647789001465,
"learning_rate": 7.300325188655761e-05,
"loss": 0.5979,
"step": 10
},
{
"epoch": 0.025783767946088484,
"grad_norm": 7.0973124504089355,
"learning_rate": 6.674398060854931e-05,
"loss": 0.3395,
"step": 11
},
{
"epoch": 0.028127746850278346,
"grad_norm": 3.4368035793304443,
"learning_rate": 6.01728006526317e-05,
"loss": 0.4039,
"step": 12
},
{
"epoch": 0.028127746850278346,
"eval_loss": 0.3852250874042511,
"eval_runtime": 376.4542,
"eval_samples_per_second": 1.91,
"eval_steps_per_second": 0.478,
"step": 12
},
{
"epoch": 0.030471725754468208,
"grad_norm": 4.774641990661621,
"learning_rate": 5.341212066823355e-05,
"loss": 0.4755,
"step": 13
},
{
"epoch": 0.032815704658658074,
"grad_norm": 3.387488842010498,
"learning_rate": 4.658787933176646e-05,
"loss": 0.3168,
"step": 14
},
{
"epoch": 0.03515968356284793,
"grad_norm": 4.079291343688965,
"learning_rate": 3.982719934736832e-05,
"loss": 0.3634,
"step": 15
},
{
"epoch": 0.03515968356284793,
"eval_loss": 0.3480370044708252,
"eval_runtime": 376.8136,
"eval_samples_per_second": 1.908,
"eval_steps_per_second": 0.478,
"step": 15
},
{
"epoch": 0.0375036624670378,
"grad_norm": 2.9585719108581543,
"learning_rate": 3.325601939145069e-05,
"loss": 0.2674,
"step": 16
},
{
"epoch": 0.039847641371227656,
"grad_norm": 3.4793038368225098,
"learning_rate": 2.6996748113442394e-05,
"loss": 0.2099,
"step": 17
},
{
"epoch": 0.04219162027541752,
"grad_norm": 3.6350109577178955,
"learning_rate": 2.1165983894256647e-05,
"loss": 0.2057,
"step": 18
},
{
"epoch": 0.04219162027541752,
"eval_loss": 0.30665451288223267,
"eval_runtime": 376.8336,
"eval_samples_per_second": 1.908,
"eval_steps_per_second": 0.478,
"step": 18
},
{
"epoch": 0.04453559917960739,
"grad_norm": 2.6633102893829346,
"learning_rate": 1.5872342839067306e-05,
"loss": 0.2433,
"step": 19
},
{
"epoch": 0.046879578083797245,
"grad_norm": 3.862442970275879,
"learning_rate": 1.1214435464779006e-05,
"loss": 0.2717,
"step": 20
},
{
"epoch": 0.04922355698798711,
"grad_norm": 4.074159145355225,
"learning_rate": 7.2790297726755716e-06,
"loss": 0.2596,
"step": 21
},
{
"epoch": 0.04922355698798711,
"eval_loss": 0.32189393043518066,
"eval_runtime": 376.6717,
"eval_samples_per_second": 1.909,
"eval_steps_per_second": 0.478,
"step": 21
},
{
"epoch": 0.05156753589217697,
"grad_norm": 4.781056880950928,
"learning_rate": 4.139434924727359e-06,
"loss": 0.4503,
"step": 22
},
{
"epoch": 0.053911514796366834,
"grad_norm": 6.350940227508545,
"learning_rate": 1.8541356326100433e-06,
"loss": 0.3268,
"step": 23
},
{
"epoch": 0.05625549370055669,
"grad_norm": 3.5974953174591064,
"learning_rate": 4.6570269818346224e-07,
"loss": 0.362,
"step": 24
},
{
"epoch": 0.05625549370055669,
"eval_loss": 0.31786561012268066,
"eval_runtime": 376.8204,
"eval_samples_per_second": 1.908,
"eval_steps_per_second": 0.478,
"step": 24
},
{
"epoch": 0.05859947260474656,
"grad_norm": 5.433014392852783,
"learning_rate": 0.0,
"loss": 0.2848,
"step": 25
}
],
"logging_steps": 1,
"max_steps": 25,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 25,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.508513578745856e+17,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}