{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 83, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.12048192771084337, "grad_norm": 2.8577072620391846, "learning_rate": 4.8230451807939135e-05, "logits/chosen": -0.11954045295715332, "logits/rejected": -3.3223273754119873, "logps/chosen": -1.4568630456924438, "logps/rejected": -3.726320266723633, "loss": 1.4779, "num_input_tokens_seen": 8864, "odds_ratio_loss": 14.732812881469727, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.1456862986087799, "rewards/margins": 0.22694571316242218, "rewards/rejected": -0.3726319968700409, "sft_loss": 0.0046242037788033485, "step": 10 }, { "epoch": 0.24096385542168675, "grad_norm": 1.9434715509414673, "learning_rate": 4.3172311296078595e-05, "logits/chosen": -0.2733025550842285, "logits/rejected": -3.2514376640319824, "logps/chosen": -1.4726128578186035, "logps/rejected": -5.625662803649902, "loss": 1.4745, "num_input_tokens_seen": 17712, "odds_ratio_loss": 14.662857055664062, "rewards/accuracies": 1.0, "rewards/chosen": -0.14726129174232483, "rewards/margins": 0.4153049886226654, "rewards/rejected": -0.562566339969635, "sft_loss": 0.008168135769665241, "step": 20 }, { "epoch": 0.3614457831325301, "grad_norm": 2.154024124145508, "learning_rate": 3.55416283362546e-05, "logits/chosen": -0.20305636525154114, "logits/rejected": -3.410961627960205, "logps/chosen": -1.3130009174346924, "logps/rejected": -6.4230146408081055, "loss": 1.3135, "num_input_tokens_seen": 27520, "odds_ratio_loss": 13.098909378051758, "rewards/accuracies": 1.0, "rewards/chosen": -0.13130010664463043, "rewards/margins": 0.5110014081001282, "rewards/rejected": -0.6423014998435974, "sft_loss": 0.0035836666356772184, "step": 30 }, { "epoch": 0.4819277108433735, "grad_norm": 2.372204303741455, "learning_rate": 2.6418631827326857e-05, "logits/chosen": -0.2755209803581238, "logits/rejected": -3.2902603149414062, "logps/chosen": -1.2280548810958862, "logps/rejected": -6.585225582122803, "loss": 1.2285, "num_input_tokens_seen": 36096, "odds_ratio_loss": 12.17691707611084, "rewards/accuracies": 1.0, "rewards/chosen": -0.12280547618865967, "rewards/margins": 0.5357170104980469, "rewards/rejected": -0.6585224866867065, "sft_loss": 0.010825484991073608, "step": 40 }, { "epoch": 0.6024096385542169, "grad_norm": 2.4067399501800537, "learning_rate": 1.70948083275794e-05, "logits/chosen": -0.1695922613143921, "logits/rejected": -3.419903516769409, "logps/chosen": -1.0168521404266357, "logps/rejected": -6.722726345062256, "loss": 1.0171, "num_input_tokens_seen": 43456, "odds_ratio_loss": 10.138498306274414, "rewards/accuracies": 1.0, "rewards/chosen": -0.10168520361185074, "rewards/margins": 0.5705875158309937, "rewards/rejected": -0.6722726821899414, "sft_loss": 0.0032351273111999035, "step": 50 }, { "epoch": 0.7228915662650602, "grad_norm": 2.4104135036468506, "learning_rate": 8.890074238378074e-06, "logits/chosen": -0.2158750295639038, "logits/rejected": -3.261337995529175, "logps/chosen": -1.4426627159118652, "logps/rejected": -6.552022457122803, "loss": 1.4433, "num_input_tokens_seen": 51600, "odds_ratio_loss": 14.290933609008789, "rewards/accuracies": 1.0, "rewards/chosen": -0.144266277551651, "rewards/margins": 0.5109359622001648, "rewards/rejected": -0.6552022695541382, "sft_loss": 0.01422051526606083, "step": 60 }, { "epoch": 0.8433734939759037, "grad_norm": 2.4223577976226807, "learning_rate": 2.9659233496337786e-06, "logits/chosen": -0.14738118648529053, "logits/rejected": -3.434018611907959, "logps/chosen": -1.1621037721633911, "logps/rejected": -6.5490217208862305, "loss": 1.1624, "num_input_tokens_seen": 62624, "odds_ratio_loss": 11.598767280578613, "rewards/accuracies": 1.0, "rewards/chosen": -0.11621036380529404, "rewards/margins": 0.5386918783187866, "rewards/rejected": -0.6549022197723389, "sft_loss": 0.0025552159640938044, "step": 70 }, { "epoch": 0.963855421686747, "grad_norm": 1.6316858530044556, "learning_rate": 1.6100130092037703e-07, "logits/chosen": -0.22155144810676575, "logits/rejected": -3.4224212169647217, "logps/chosen": -1.2641386985778809, "logps/rejected": -6.619866371154785, "loss": 1.2645, "num_input_tokens_seen": 73184, "odds_ratio_loss": 12.610678672790527, "rewards/accuracies": 1.0, "rewards/chosen": -0.12641386687755585, "rewards/margins": 0.5355727672576904, "rewards/rejected": -0.6619867086410522, "sft_loss": 0.003463461296632886, "step": 80 }, { "epoch": 1.0, "num_input_tokens_seen": 75616, "step": 83, "total_flos": 3443013082939392.0, "train_loss": 1.2913588443434383, "train_runtime": 65.9669, "train_samples_per_second": 1.258, "train_steps_per_second": 1.258 } ], "logging_steps": 10, "max_steps": 83, "num_input_tokens_seen": 75616, "num_train_epochs": 1, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3443013082939392.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }