vicuna-adv-robust-ul15-sft-lora / trainer_state.json
justinwangx's picture
Model save
4b57d01
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 14.588235294117647,
"eval_steps": 500,
"global_step": 220,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.04,
"learning_rate": 0.00029999428845962564,
"loss": 1.4179,
"step": 1
},
{
"epoch": 0.2,
"learning_rate": 0.00029985723323727866,
"loss": 1.3836,
"step": 5
},
{
"epoch": 0.41,
"learning_rate": 0.0002994292047137618,
"loss": 1.2291,
"step": 10
},
{
"epoch": 0.57,
"eval_loss": 1.1107903718948364,
"eval_runtime": 1318.1616,
"eval_samples_per_second": 17.71,
"eval_steps_per_second": 0.554,
"step": 14
},
{
"epoch": 1.01,
"learning_rate": 0.00029871672920607153,
"loss": 1.171,
"step": 15
},
{
"epoch": 1.22,
"learning_rate": 0.0002977211629518312,
"loss": 1.1376,
"step": 20
},
{
"epoch": 1.42,
"learning_rate": 0.00029644440106799,
"loss": 1.1237,
"step": 25
},
{
"epoch": 1.59,
"eval_loss": 1.0651090145111084,
"eval_runtime": 1320.1229,
"eval_samples_per_second": 17.683,
"eval_steps_per_second": 0.553,
"step": 29
},
{
"epoch": 2.03,
"learning_rate": 0.0002948888739433602,
"loss": 1.1078,
"step": 30
},
{
"epoch": 2.23,
"learning_rate": 0.000293057542612234,
"loss": 1.0947,
"step": 35
},
{
"epoch": 2.43,
"learning_rate": 0.0002909538931178862,
"loss": 1.0918,
"step": 40
},
{
"epoch": 2.6,
"eval_loss": 1.0472216606140137,
"eval_runtime": 1319.747,
"eval_samples_per_second": 17.688,
"eval_steps_per_second": 0.553,
"step": 44
},
{
"epoch": 3.04,
"learning_rate": 0.000288581929876693,
"loss": 1.0832,
"step": 45
},
{
"epoch": 3.24,
"learning_rate": 0.0002859461680554975,
"loss": 1.0705,
"step": 50
},
{
"epoch": 3.45,
"learning_rate": 0.0002830516249767332,
"loss": 1.0711,
"step": 55
},
{
"epoch": 3.57,
"eval_loss": 1.0370612144470215,
"eval_runtime": 1319.0792,
"eval_samples_per_second": 17.697,
"eval_steps_per_second": 0.553,
"step": 58
},
{
"epoch": 4.05,
"learning_rate": 0.0002799038105676658,
"loss": 1.0589,
"step": 60
},
{
"epoch": 4.26,
"learning_rate": 0.0002765087168719328,
"loss": 1.0489,
"step": 65
},
{
"epoch": 4.46,
"learning_rate": 0.00027287280664334875,
"loss": 1.0498,
"step": 70
},
{
"epoch": 4.58,
"eval_loss": 1.0298787355422974,
"eval_runtime": 1319.3478,
"eval_samples_per_second": 17.694,
"eval_steps_per_second": 0.553,
"step": 73
},
{
"epoch": 5.06,
"learning_rate": 0.00026900300104368524,
"loss": 1.0358,
"step": 75
},
{
"epoch": 5.27,
"learning_rate": 0.00026490666646784665,
"loss": 1.0261,
"step": 80
},
{
"epoch": 5.47,
"learning_rate": 0.0002605916005215186,
"loss": 1.0255,
"step": 85
},
{
"epoch": 5.6,
"eval_loss": 1.0246919393539429,
"eval_runtime": 1320.0654,
"eval_samples_per_second": 17.684,
"eval_steps_per_second": 0.553,
"step": 88
},
{
"epoch": 6.08,
"learning_rate": 0.00025606601717798207,
"loss": 1.0165,
"step": 90
},
{
"epoch": 6.28,
"learning_rate": 0.00025133853114234905,
"loss": 1.0119,
"step": 95
},
{
"epoch": 6.49,
"learning_rate": 0.0002464181414529809,
"loss": 1.0131,
"step": 100
},
{
"epoch": 6.57,
"eval_loss": 1.0210436582565308,
"eval_runtime": 1320.0018,
"eval_samples_per_second": 17.685,
"eval_steps_per_second": 0.553,
"step": 102
},
{
"epoch": 7.09,
"learning_rate": 0.00024131421435130807,
"loss": 1.0053,
"step": 105
},
{
"epoch": 7.29,
"learning_rate": 0.00023603646545265687,
"loss": 1.006,
"step": 110
},
{
"epoch": 7.5,
"learning_rate": 0.00023059494125202357,
"loss": 1.0047,
"step": 115
},
{
"epoch": 7.58,
"eval_loss": 1.0181236267089844,
"eval_runtime": 1318.7669,
"eval_samples_per_second": 17.701,
"eval_steps_per_second": 0.554,
"step": 117
},
{
"epoch": 8.1,
"learning_rate": 0.000225,
"loss": 1.0,
"step": 120
},
{
"epoch": 8.31,
"learning_rate": 0.0002192622919852551,
"loss": 0.9948,
"step": 125
},
{
"epoch": 8.51,
"learning_rate": 0.0002133927392611049,
"loss": 1.004,
"step": 130
},
{
"epoch": 8.59,
"eval_loss": 1.0159988403320312,
"eval_runtime": 1320.6395,
"eval_samples_per_second": 17.676,
"eval_steps_per_second": 0.553,
"step": 132
},
{
"epoch": 9.12,
"learning_rate": 0.00020740251485476345,
"loss": 0.9934,
"step": 135
},
{
"epoch": 9.32,
"learning_rate": 0.00020130302149885031,
"loss": 0.9901,
"step": 140
},
{
"epoch": 9.52,
"learning_rate": 0.00019510586992564093,
"loss": 1.0007,
"step": 145
},
{
"epoch": 9.57,
"eval_loss": 1.01445734500885,
"eval_runtime": 1320.1156,
"eval_samples_per_second": 17.683,
"eval_steps_per_second": 0.553,
"step": 146
},
{
"epoch": 10.13,
"learning_rate": 0.0001888228567653781,
"loss": 0.989,
"step": 150
},
{
"epoch": 10.33,
"learning_rate": 0.0001824659420907154,
"loss": 0.9889,
"step": 155
},
{
"epoch": 10.54,
"learning_rate": 0.00017604722665003956,
"loss": 0.9938,
"step": 160
},
{
"epoch": 10.58,
"eval_loss": 1.0132454633712769,
"eval_runtime": 1321.3534,
"eval_samples_per_second": 17.667,
"eval_steps_per_second": 0.552,
"step": 161
},
{
"epoch": 11.14,
"learning_rate": 0.00016957892883300775,
"loss": 0.9875,
"step": 165
},
{
"epoch": 11.35,
"learning_rate": 0.00016307336141214873,
"loss": 0.9864,
"step": 170
},
{
"epoch": 11.55,
"learning_rate": 0.00015654290810480042,
"loss": 0.9916,
"step": 175
},
{
"epoch": 11.59,
"eval_loss": 1.012216567993164,
"eval_runtime": 1320.9896,
"eval_samples_per_second": 17.672,
"eval_steps_per_second": 0.553,
"step": 176
},
{
"epoch": 12.15,
"learning_rate": 0.00015,
"loss": 0.9837,
"step": 180
},
{
"epoch": 12.36,
"learning_rate": 0.0001434570918951996,
"loss": 0.984,
"step": 185
},
{
"epoch": 12.56,
"learning_rate": 0.00013692663858785124,
"loss": 0.9884,
"step": 190
},
{
"epoch": 12.56,
"eval_loss": 1.0114786624908447,
"eval_runtime": 1322.1026,
"eval_samples_per_second": 17.657,
"eval_steps_per_second": 0.552,
"step": 190
},
{
"epoch": 13.17,
"learning_rate": 0.00013042107116699228,
"loss": 0.981,
"step": 195
},
{
"epoch": 13.37,
"learning_rate": 0.00012395277334996044,
"loss": 0.9821,
"step": 200
},
{
"epoch": 13.58,
"learning_rate": 0.00011753405790928456,
"loss": 0.9881,
"step": 205
},
{
"epoch": 13.58,
"eval_loss": 1.0108779668807983,
"eval_runtime": 1322.1583,
"eval_samples_per_second": 17.656,
"eval_steps_per_second": 0.552,
"step": 205
},
{
"epoch": 14.18,
"learning_rate": 0.00011117714323462186,
"loss": 0.9778,
"step": 210
},
{
"epoch": 14.38,
"learning_rate": 0.00010489413007435904,
"loss": 0.982,
"step": 215
},
{
"epoch": 14.59,
"learning_rate": 9.869697850114969e-05,
"loss": 0.9856,
"step": 220
},
{
"epoch": 14.59,
"eval_loss": 1.010445237159729,
"eval_runtime": 1322.8082,
"eval_samples_per_second": 17.647,
"eval_steps_per_second": 0.552,
"step": 220
},
{
"epoch": 14.59,
"step": 220,
"total_flos": 6.664936558166016e+16,
"train_loss": 1.0388530253009363,
"train_runtime": 47681.956,
"train_samples_per_second": 3.929,
"train_steps_per_second": 0.008
}
],
"logging_steps": 5,
"max_steps": 360,
"num_train_epochs": 15,
"save_steps": 500,
"total_flos": 6.664936558166016e+16,
"trial_name": null,
"trial_params": null
}