{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 83, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.12048192771084337, "grad_norm": 2.8706510066986084, "learning_rate": 4.8230451807939135e-05, "logits/chosen": -0.11457079648971558, "logits/rejected": -3.2889251708984375, "logps/chosen": -1.456398367881775, "logps/rejected": -3.6490638256073, "loss": 1.4781, "num_input_tokens_seen": 8864, "odds_ratio_loss": 14.7344388961792, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.14563985168933868, "rewards/margins": 0.21926657855510712, "rewards/rejected": -0.3649064302444458, "sft_loss": 0.0046225739642977715, "step": 10 }, { "epoch": 0.24096385542168675, "grad_norm": 2.050874710083008, "learning_rate": 4.3172311296078595e-05, "logits/chosen": -0.26779693365097046, "logits/rejected": -3.211538314819336, "logps/chosen": -1.4712103605270386, "logps/rejected": -5.510354042053223, "loss": 1.4732, "num_input_tokens_seen": 17712, "odds_ratio_loss": 14.650479316711426, "rewards/accuracies": 1.0, "rewards/chosen": -0.14712102711200714, "rewards/margins": 0.4039144515991211, "rewards/rejected": -0.551035463809967, "sft_loss": 0.008165580220520496, "step": 20 }, { "epoch": 0.3614457831325301, "grad_norm": 2.1595640182495117, "learning_rate": 3.55416283362546e-05, "logits/chosen": -0.2051524668931961, "logits/rejected": -3.36714506149292, "logps/chosen": -1.3134592771530151, "logps/rejected": -6.383711814880371, "loss": 1.314, "num_input_tokens_seen": 27520, "odds_ratio_loss": 13.103729248046875, "rewards/accuracies": 1.0, "rewards/chosen": -0.1313459426164627, "rewards/margins": 0.5070253610610962, "rewards/rejected": -0.6383712291717529, "sft_loss": 0.0035842768847942352, "step": 30 }, { "epoch": 0.4819277108433735, "grad_norm": 2.4059877395629883, "learning_rate": 2.6418631827326857e-05, "logits/chosen": -0.26706236600875854, "logits/rejected": -3.2312896251678467, "logps/chosen": -1.2261059284210205, "logps/rejected": -6.600752830505371, "loss": 1.2266, "num_input_tokens_seen": 36096, "odds_ratio_loss": 12.158276557922363, "rewards/accuracies": 1.0, "rewards/chosen": -0.12261058390140533, "rewards/margins": 0.5374647378921509, "rewards/rejected": -0.660075306892395, "sft_loss": 0.010727915912866592, "step": 40 }, { "epoch": 0.6024096385542169, "grad_norm": 2.4229519367218018, "learning_rate": 1.70948083275794e-05, "logits/chosen": -0.1631634533405304, "logits/rejected": -3.3860981464385986, "logps/chosen": -1.0198614597320557, "logps/rejected": -6.75305700302124, "loss": 1.0201, "num_input_tokens_seen": 43456, "odds_ratio_loss": 10.168444633483887, "rewards/accuracies": 1.0, "rewards/chosen": -0.10198613256216049, "rewards/margins": 0.5733195543289185, "rewards/rejected": -0.6753057241439819, "sft_loss": 0.0032480526715517044, "step": 50 }, { "epoch": 0.7228915662650602, "grad_norm": 2.3134255409240723, "learning_rate": 8.890074238378074e-06, "logits/chosen": -0.19685813784599304, "logits/rejected": -3.2213809490203857, "logps/chosen": -1.4441629648208618, "logps/rejected": -6.606205940246582, "loss": 1.4448, "num_input_tokens_seen": 51600, "odds_ratio_loss": 14.304969787597656, "rewards/accuracies": 1.0, "rewards/chosen": -0.14441628754138947, "rewards/margins": 0.5162042379379272, "rewards/rejected": -0.6606205701828003, "sft_loss": 0.014286870136857033, "step": 60 }, { "epoch": 0.8433734939759037, "grad_norm": 2.4275636672973633, "learning_rate": 2.9659233496337786e-06, "logits/chosen": -0.13393770158290863, "logits/rejected": -3.393664836883545, "logps/chosen": -1.1611616611480713, "logps/rejected": -6.605759620666504, "loss": 1.1615, "num_input_tokens_seen": 62624, "odds_ratio_loss": 11.589168548583984, "rewards/accuracies": 1.0, "rewards/chosen": -0.11611616611480713, "rewards/margins": 0.5444598197937012, "rewards/rejected": -0.6605759859085083, "sft_loss": 0.002552599413320422, "step": 70 }, { "epoch": 0.963855421686747, "grad_norm": 1.650820255279541, "learning_rate": 1.6100130092037703e-07, "logits/chosen": -0.20625996589660645, "logits/rejected": -3.3859100341796875, "logps/chosen": -1.2622536420822144, "logps/rejected": -6.649239540100098, "loss": 1.2626, "num_input_tokens_seen": 73184, "odds_ratio_loss": 12.59186840057373, "rewards/accuracies": 1.0, "rewards/chosen": -0.12622536718845367, "rewards/margins": 0.5386986136436462, "rewards/rejected": -0.664923906326294, "sft_loss": 0.0034584165550768375, "step": 80 }, { "epoch": 1.0, "num_input_tokens_seen": 75616, "step": 83, "total_flos": 3443013082939392.0, "train_loss": 1.2913809109883136, "train_runtime": 56.3341, "train_samples_per_second": 1.473, "train_steps_per_second": 1.473 } ], "logging_steps": 10, "max_steps": 83, "num_input_tokens_seen": 75616, "num_train_epochs": 1, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3443013082939392.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }