{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 1, "global_step": 26, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "abs_diff": 0.4453125, "all_logps_1": -644.9681396484375, "all_logps_1_values": -644.9680786132812, "all_logps_2": 424.9236145019531, "all_logps_2_values": 424.9236145019531, "epoch": 0.038461538461538464, "grad_norm": 20.744102687471287, "learning_rate": 3.333333333333333e-07, "logits/chosen": 5.625, "logits/rejected": 5.625, "logps/chosen": -2.03125, "logps/rejected": -1.9375, "loss": 1.9612, "original_losses": 2.0625, "rewards/accuracies": 0.4270833432674408, "rewards/chosen": -5.0625, "rewards/margins": -0.2421875, "rewards/rejected": -4.84375, "step": 1, "weight": 1.0 }, { "epoch": 0.038461538461538464, "eval_abs_diff": 0.44921875, "eval_all_logps_1": -657.8338623046875, "eval_all_logps_1_values": -657.8338012695312, "eval_all_logps_2": 434.6329040527344, "eval_all_logps_2_values": 434.6329345703125, "eval_logits/chosen": 5.71875, "eval_logits/rejected": 5.6875, "eval_logps/chosen": -1.96875, "eval_logps/rejected": -2.03125, "eval_loss": 1.7894011735916138, "eval_original_losses": 1.8125, "eval_rewards/accuracies": 0.4404762089252472, "eval_rewards/chosen": -4.90625, "eval_rewards/margins": 0.189453125, "eval_rewards/rejected": -5.09375, "eval_runtime": 18.7618, "eval_samples_per_second": 104.521, "eval_steps_per_second": 0.373, "eval_weight": 1.0, "step": 1 }, { "epoch": 0.07692307692307693, "eval_abs_diff": 0.453125, "eval_all_logps_1": -657.5560913085938, "eval_all_logps_1_values": -657.5560302734375, "eval_all_logps_2": 434.6329040527344, "eval_all_logps_2_values": 434.6329345703125, "eval_logits/chosen": 5.6875, "eval_logits/rejected": 5.6875, "eval_logps/chosen": -1.9609375, "eval_logps/rejected": -2.03125, "eval_loss": 1.7886760234832764, "eval_original_losses": 1.8125, "eval_rewards/accuracies": 0.4444444477558136, "eval_rewards/chosen": -4.90625, "eval_rewards/margins": 0.189453125, "eval_rewards/rejected": -5.09375, "eval_runtime": 7.6106, "eval_samples_per_second": 257.667, "eval_steps_per_second": 0.92, "eval_weight": 1.0, "step": 2 }, { "epoch": 0.11538461538461539, "eval_abs_diff": 0.451171875, "eval_all_logps_1": -657.2574462890625, "eval_all_logps_1_values": -657.2574462890625, "eval_all_logps_2": 434.6329040527344, "eval_all_logps_2_values": 434.6329345703125, "eval_logits/chosen": 5.71875, "eval_logits/rejected": 5.6875, "eval_logps/chosen": -1.96875, "eval_logps/rejected": -2.046875, "eval_loss": 1.7886680364608765, "eval_original_losses": 1.8203125, "eval_rewards/accuracies": 0.4444444477558136, "eval_rewards/chosen": -4.9375, "eval_rewards/margins": 0.1884765625, "eval_rewards/rejected": -5.125, "eval_runtime": 8.5255, "eval_samples_per_second": 230.016, "eval_steps_per_second": 0.821, "eval_weight": 1.0, "step": 3 }, { "epoch": 0.15384615384615385, "eval_abs_diff": 0.451171875, "eval_all_logps_1": -657.5513916015625, "eval_all_logps_1_values": -657.5513305664062, "eval_all_logps_2": 434.6329040527344, "eval_all_logps_2_values": 434.6329345703125, "eval_logits/chosen": 5.71875, "eval_logits/rejected": 5.6875, "eval_logps/chosen": -1.96875, "eval_logps/rejected": -2.046875, "eval_loss": 1.789082407951355, "eval_original_losses": 1.8125, "eval_rewards/accuracies": 0.4365079700946808, "eval_rewards/chosen": -4.9375, "eval_rewards/margins": 0.1806640625, "eval_rewards/rejected": -5.09375, "eval_runtime": 7.6165, "eval_samples_per_second": 257.468, "eval_steps_per_second": 0.919, "eval_weight": 1.0, "step": 4 }, { "abs_diff": 0.466796875, "all_logps_1": -640.5847778320312, "all_logps_1_values": -640.584716796875, "all_logps_2": 413.3194274902344, "all_logps_2_values": 413.3194580078125, "epoch": 0.19230769230769232, "grad_norm": 21.578765352244158, "learning_rate": 9.814586436738997e-07, "logits/chosen": 5.6875, "logits/rejected": 5.6875, "logps/chosen": -1.9765625, "logps/rejected": -2.0, "loss": 1.868, "original_losses": 1.9140625, "rewards/accuracies": 0.4366319179534912, "rewards/chosen": -4.9375, "rewards/margins": 0.0712890625, "rewards/rejected": -5.0, "step": 5, "weight": 1.0 }, { "epoch": 0.19230769230769232, "eval_abs_diff": 0.447265625, "eval_all_logps_1": -656.76513671875, "eval_all_logps_1_values": -656.76513671875, "eval_all_logps_2": 434.6329040527344, "eval_all_logps_2_values": 434.6329345703125, "eval_logits/chosen": 5.71875, "eval_logits/rejected": 5.6875, "eval_logps/chosen": -1.9609375, "eval_logps/rejected": -2.03125, "eval_loss": 1.788102388381958, "eval_original_losses": 1.8125, "eval_rewards/accuracies": 0.432539701461792, "eval_rewards/chosen": -4.90625, "eval_rewards/margins": 0.181640625, "eval_rewards/rejected": -5.09375, "eval_runtime": 7.6214, "eval_samples_per_second": 257.303, "eval_steps_per_second": 0.918, "eval_weight": 1.0, "step": 5 }, { "epoch": 0.23076923076923078, "eval_abs_diff": 0.451171875, "eval_all_logps_1": -658.1024169921875, "eval_all_logps_1_values": -658.1024169921875, "eval_all_logps_2": 434.6329040527344, "eval_all_logps_2_values": 434.6329345703125, "eval_logits/chosen": 5.71875, "eval_logits/rejected": 5.6875, "eval_logps/chosen": -1.9765625, "eval_logps/rejected": -2.046875, "eval_loss": 1.7911142110824585, "eval_original_losses": 1.8203125, "eval_rewards/accuracies": 0.4523809254169464, "eval_rewards/chosen": -4.9375, "eval_rewards/margins": 0.1669921875, "eval_rewards/rejected": -5.09375, "eval_runtime": 7.6157, "eval_samples_per_second": 257.493, "eval_steps_per_second": 0.919, "eval_weight": 1.0, "step": 6 }, { "epoch": 0.2692307692307692, "eval_abs_diff": 0.451171875, "eval_all_logps_1": -657.3370361328125, "eval_all_logps_1_values": -657.3370361328125, "eval_all_logps_2": 434.6329040527344, "eval_all_logps_2_values": 434.6329345703125, "eval_logits/chosen": 5.71875, "eval_logits/rejected": 5.6875, "eval_logps/chosen": -1.9609375, "eval_logps/rejected": -2.03125, "eval_loss": 1.7869629859924316, "eval_original_losses": 1.8125, "eval_rewards/accuracies": 0.44841268658638, "eval_rewards/chosen": -4.90625, "eval_rewards/margins": 0.1845703125, "eval_rewards/rejected": -5.09375, "eval_runtime": 7.6132, "eval_samples_per_second": 257.579, "eval_steps_per_second": 0.919, "eval_weight": 1.0, "step": 7 }, { "epoch": 0.3076923076923077, "eval_abs_diff": 0.447265625, "eval_all_logps_1": -657.35888671875, "eval_all_logps_1_values": -657.35888671875, "eval_all_logps_2": 434.6329040527344, "eval_all_logps_2_values": 434.6329345703125, "eval_logits/chosen": 5.6875, "eval_logits/rejected": 5.65625, "eval_logps/chosen": -1.96875, "eval_logps/rejected": -2.03125, "eval_loss": 1.7835049629211426, "eval_original_losses": 1.8203125, "eval_rewards/accuracies": 0.4404762089252472, "eval_rewards/chosen": -4.90625, "eval_rewards/margins": 0.1728515625, "eval_rewards/rejected": -5.09375, "eval_runtime": 7.6173, "eval_samples_per_second": 257.44, "eval_steps_per_second": 0.919, "eval_weight": 1.0, "step": 8 }, { "epoch": 0.34615384615384615, "eval_abs_diff": 0.4453125, "eval_all_logps_1": -657.4702758789062, "eval_all_logps_1_values": -657.47021484375, "eval_all_logps_2": 434.6329040527344, "eval_all_logps_2_values": 434.6329345703125, "eval_logits/chosen": 5.71875, "eval_logits/rejected": 5.6875, "eval_logps/chosen": -1.9609375, "eval_logps/rejected": -2.03125, "eval_loss": 1.785959005355835, "eval_original_losses": 1.8125, "eval_rewards/accuracies": 0.4404761791229248, "eval_rewards/chosen": -4.90625, "eval_rewards/margins": 0.185546875, "eval_rewards/rejected": -5.09375, "eval_runtime": 7.6175, "eval_samples_per_second": 257.433, "eval_steps_per_second": 0.919, "eval_weight": 1.0, "step": 9 }, { "abs_diff": 0.427734375, "all_logps_1": -622.0468139648438, "all_logps_1_values": -622.0468139648438, "all_logps_2": 399.6798400878906, "all_logps_2_values": 399.67987060546875, "epoch": 0.38461538461538464, "grad_norm": 18.220030847238334, "learning_rate": 7.883401610574336e-07, "logits/chosen": 5.6875, "logits/rejected": 5.6875, "logps/chosen": -1.953125, "logps/rejected": -1.96875, "loss": 1.886, "original_losses": 1.8671875, "rewards/accuracies": 0.47291669249534607, "rewards/chosen": -4.875, "rewards/margins": 0.025146484375, "rewards/rejected": -4.90625, "step": 10, "weight": 1.0 }, { "epoch": 0.38461538461538464, "eval_abs_diff": 0.4453125, "eval_all_logps_1": -657.2244873046875, "eval_all_logps_1_values": -657.2244262695312, "eval_all_logps_2": 434.6329040527344, "eval_all_logps_2_values": 434.6329345703125, "eval_logits/chosen": 5.71875, "eval_logits/rejected": 5.6875, "eval_logps/chosen": -1.9609375, "eval_logps/rejected": -2.03125, "eval_loss": 1.789680004119873, "eval_original_losses": 1.8125, "eval_rewards/accuracies": 0.4325396716594696, "eval_rewards/chosen": -4.90625, "eval_rewards/margins": 0.185546875, "eval_rewards/rejected": -5.09375, "eval_runtime": 7.6199, "eval_samples_per_second": 257.352, "eval_steps_per_second": 0.919, "eval_weight": 1.0, "step": 10 }, { "epoch": 0.4230769230769231, "eval_abs_diff": 0.447265625, "eval_all_logps_1": -657.7448120117188, "eval_all_logps_1_values": -657.7448120117188, "eval_all_logps_2": 434.6329040527344, "eval_all_logps_2_values": 434.6329345703125, "eval_logits/chosen": 5.71875, "eval_logits/rejected": 5.6875, "eval_logps/chosen": -1.9609375, "eval_logps/rejected": -2.03125, "eval_loss": 1.785233974456787, "eval_original_losses": 1.8125, "eval_rewards/accuracies": 0.44841268658638, "eval_rewards/chosen": -4.90625, "eval_rewards/margins": 0.1806640625, "eval_rewards/rejected": -5.09375, "eval_runtime": 7.6147, "eval_samples_per_second": 257.527, "eval_steps_per_second": 0.919, "eval_weight": 1.0, "step": 11 }, { "epoch": 0.46153846153846156, "eval_abs_diff": 0.44921875, "eval_all_logps_1": -657.9037475585938, "eval_all_logps_1_values": -657.9037475585938, "eval_all_logps_2": 434.6329040527344, "eval_all_logps_2_values": 434.6329345703125, "eval_logits/chosen": 5.71875, "eval_logits/rejected": 5.6875, "eval_logps/chosen": -1.9609375, "eval_logps/rejected": -2.03125, "eval_loss": 1.7826603651046753, "eval_original_losses": 1.8203125, "eval_rewards/accuracies": 0.4603174328804016, "eval_rewards/chosen": -4.90625, "eval_rewards/margins": 0.1796875, "eval_rewards/rejected": -5.09375, "eval_runtime": 7.6147, "eval_samples_per_second": 257.529, "eval_steps_per_second": 0.919, "eval_weight": 1.0, "step": 12 }, { "epoch": 0.5, "eval_abs_diff": 0.451171875, "eval_all_logps_1": -657.7488403320312, "eval_all_logps_1_values": -657.748779296875, "eval_all_logps_2": 434.6329040527344, "eval_all_logps_2_values": 434.6329345703125, "eval_logits/chosen": 5.71875, "eval_logits/rejected": 5.6875, "eval_logps/chosen": -1.9609375, "eval_logps/rejected": -2.03125, "eval_loss": 1.7843893766403198, "eval_original_losses": 1.8203125, "eval_rewards/accuracies": 0.4365079402923584, "eval_rewards/chosen": -4.90625, "eval_rewards/margins": 0.1689453125, "eval_rewards/rejected": -5.0625, "eval_runtime": 7.6141, "eval_samples_per_second": 257.548, "eval_steps_per_second": 0.919, "eval_weight": 1.0, "step": 13 }, { "epoch": 0.5384615384615384, "eval_abs_diff": 0.439453125, "eval_all_logps_1": -657.5706787109375, "eval_all_logps_1_values": -657.5706787109375, "eval_all_logps_2": 434.6329040527344, "eval_all_logps_2_values": 434.6329345703125, "eval_logits/chosen": 5.71875, "eval_logits/rejected": 5.6875, "eval_logps/chosen": -1.953125, "eval_logps/rejected": -2.03125, "eval_loss": 1.782787799835205, "eval_original_losses": 1.8046875, "eval_rewards/accuracies": 0.4404762089252472, "eval_rewards/chosen": -4.875, "eval_rewards/margins": 0.1884765625, "eval_rewards/rejected": -5.0625, "eval_runtime": 7.6188, "eval_samples_per_second": 257.39, "eval_steps_per_second": 0.919, "eval_weight": 1.0, "step": 14 }, { "abs_diff": 0.4375, "all_logps_1": -654.1092529296875, "all_logps_1_values": -654.1092529296875, "all_logps_2": 426.70001220703125, "all_logps_2_values": 426.70001220703125, "epoch": 0.5769230769230769, "grad_norm": 20.281380041912414, "learning_rate": 4.6587879331766457e-07, "logits/chosen": 5.6875, "logits/rejected": 5.6875, "logps/chosen": -2.015625, "logps/rejected": -2.03125, "loss": 1.8572, "original_losses": 1.875, "rewards/accuracies": 0.43194445967674255, "rewards/chosen": -5.03125, "rewards/margins": 0.03857421875, "rewards/rejected": -5.0625, "step": 15, "weight": 1.0 }, { "epoch": 0.5769230769230769, "eval_abs_diff": 0.4453125, "eval_all_logps_1": -657.2753295898438, "eval_all_logps_1_values": -657.2752685546875, "eval_all_logps_2": 434.6329040527344, "eval_all_logps_2_values": 434.6329345703125, "eval_logits/chosen": 5.71875, "eval_logits/rejected": 5.6875, "eval_logps/chosen": -1.9609375, "eval_logps/rejected": -2.03125, "eval_loss": 1.785233974456787, "eval_original_losses": 1.8125, "eval_rewards/accuracies": 0.4365079402923584, "eval_rewards/chosen": -4.90625, "eval_rewards/margins": 0.1767578125, "eval_rewards/rejected": -5.0625, "eval_runtime": 7.6233, "eval_samples_per_second": 257.239, "eval_steps_per_second": 0.918, "eval_weight": 1.0, "step": 15 }, { "epoch": 0.6153846153846154, "eval_abs_diff": 0.44140625, "eval_all_logps_1": -657.5228271484375, "eval_all_logps_1_values": -657.5228271484375, "eval_all_logps_2": 434.6329040527344, "eval_all_logps_2_values": 434.6329345703125, "eval_logits/chosen": 5.71875, "eval_logits/rejected": 5.6875, "eval_logps/chosen": -1.953125, "eval_logps/rejected": -2.015625, "eval_loss": 1.7797918319702148, "eval_original_losses": 1.8125, "eval_rewards/accuracies": 0.4246031939983368, "eval_rewards/chosen": -4.90625, "eval_rewards/margins": 0.1708984375, "eval_rewards/rejected": -5.0625, "eval_runtime": 7.6127, "eval_samples_per_second": 257.596, "eval_steps_per_second": 0.92, "eval_weight": 1.0, "step": 16 }, { "epoch": 0.6538461538461539, "eval_abs_diff": 0.44140625, "eval_all_logps_1": -657.8072509765625, "eval_all_logps_1_values": -657.8073120117188, "eval_all_logps_2": 434.6329040527344, "eval_all_logps_2_values": 434.6329345703125, "eval_logits/chosen": 5.71875, "eval_logits/rejected": 5.6875, "eval_logps/chosen": -1.953125, "eval_logps/rejected": -2.03125, "eval_loss": 1.7796564102172852, "eval_original_losses": 1.8046875, "eval_rewards/accuracies": 0.4484127163887024, "eval_rewards/chosen": -4.875, "eval_rewards/margins": 0.181640625, "eval_rewards/rejected": -5.0625, "eval_runtime": 7.6162, "eval_samples_per_second": 257.477, "eval_steps_per_second": 0.919, "eval_weight": 1.0, "step": 17 }, { "epoch": 0.6923076923076923, "eval_abs_diff": 0.4375, "eval_all_logps_1": -657.43701171875, "eval_all_logps_1_values": -657.43701171875, "eval_all_logps_2": 434.6329040527344, "eval_all_logps_2_values": 434.6329345703125, "eval_logits/chosen": 5.71875, "eval_logits/rejected": 5.6875, "eval_logps/chosen": -1.9609375, "eval_logps/rejected": -2.03125, "eval_loss": 1.7829551696777344, "eval_original_losses": 1.8125, "eval_rewards/accuracies": 0.4404762089252472, "eval_rewards/chosen": -4.90625, "eval_rewards/margins": 0.1630859375, "eval_rewards/rejected": -5.0625, "eval_runtime": 7.6171, "eval_samples_per_second": 257.447, "eval_steps_per_second": 0.919, "eval_weight": 1.0, "step": 18 }, { "epoch": 0.7307692307692307, "eval_abs_diff": 0.44140625, "eval_all_logps_1": -657.5411376953125, "eval_all_logps_1_values": -657.5411987304688, "eval_all_logps_2": 434.6329040527344, "eval_all_logps_2_values": 434.6329345703125, "eval_logits/chosen": 5.71875, "eval_logits/rejected": 5.6875, "eval_logps/chosen": -1.9609375, "eval_logps/rejected": -2.03125, "eval_loss": 1.783098578453064, "eval_original_losses": 1.8046875, "eval_rewards/accuracies": 0.4523809552192688, "eval_rewards/chosen": -4.875, "eval_rewards/margins": 0.1787109375, "eval_rewards/rejected": -5.0625, "eval_runtime": 7.6155, "eval_samples_per_second": 257.501, "eval_steps_per_second": 0.919, "eval_weight": 1.0, "step": 19 }, { "abs_diff": 0.392578125, "all_logps_1": -622.8844604492188, "all_logps_1_values": -622.8843994140625, "all_logps_2": 406.131591796875, "all_logps_2_values": 406.131591796875, "epoch": 0.7692307692307693, "grad_norm": 25.7996031816865, "learning_rate": 1.5872342839067304e-07, "logits/chosen": 5.75, "logits/rejected": 5.71875, "logps/chosen": -1.9609375, "logps/rejected": -1.96875, "loss": 1.8374, "original_losses": 1.828125, "rewards/accuracies": 0.44930553436279297, "rewards/chosen": -4.90625, "rewards/margins": 0.0208740234375, "rewards/rejected": -4.9375, "step": 20, "weight": 1.0 }, { "epoch": 0.7692307692307693, "eval_abs_diff": 0.451171875, "eval_all_logps_1": -657.5830078125, "eval_all_logps_1_values": -657.5830688476562, "eval_all_logps_2": 434.6329040527344, "eval_all_logps_2_values": 434.6329345703125, "eval_logits/chosen": 5.71875, "eval_logits/rejected": 5.6875, "eval_logps/chosen": -1.953125, "eval_logps/rejected": -2.03125, "eval_loss": 1.7812340259552002, "eval_original_losses": 1.8046875, "eval_rewards/accuracies": 0.4523809552192688, "eval_rewards/chosen": -4.90625, "eval_rewards/margins": 0.197265625, "eval_rewards/rejected": -5.09375, "eval_runtime": 7.6104, "eval_samples_per_second": 257.674, "eval_steps_per_second": 0.92, "eval_weight": 1.0, "step": 20 }, { "epoch": 0.8076923076923077, "eval_abs_diff": 0.44140625, "eval_all_logps_1": -657.6909790039062, "eval_all_logps_1_values": -657.6909790039062, "eval_all_logps_2": 434.6329040527344, "eval_all_logps_2_values": 434.6329345703125, "eval_logits/chosen": 5.71875, "eval_logits/rejected": 5.6875, "eval_logps/chosen": -1.9609375, "eval_logps/rejected": -2.03125, "eval_loss": 1.7849550247192383, "eval_original_losses": 1.8125, "eval_rewards/accuracies": 0.4444444477558136, "eval_rewards/chosen": -4.875, "eval_rewards/margins": 0.171875, "eval_rewards/rejected": -5.0625, "eval_runtime": 7.6199, "eval_samples_per_second": 257.354, "eval_steps_per_second": 0.919, "eval_weight": 1.0, "step": 21 }, { "epoch": 0.8461538461538461, "eval_abs_diff": 0.443359375, "eval_all_logps_1": -657.1679077148438, "eval_all_logps_1_values": -657.1679077148438, "eval_all_logps_2": 434.6329040527344, "eval_all_logps_2_values": 434.6329345703125, "eval_logits/chosen": 5.71875, "eval_logits/rejected": 5.6875, "eval_logps/chosen": -1.953125, "eval_logps/rejected": -2.03125, "eval_loss": 1.7851064205169678, "eval_original_losses": 1.8046875, "eval_rewards/accuracies": 0.4404761791229248, "eval_rewards/chosen": -4.90625, "eval_rewards/margins": 0.18359375, "eval_rewards/rejected": -5.0625, "eval_runtime": 7.6179, "eval_samples_per_second": 257.42, "eval_steps_per_second": 0.919, "eval_weight": 1.0, "step": 22 }, { "epoch": 0.8846153846153846, "eval_abs_diff": 0.4375, "eval_all_logps_1": -658.0194091796875, "eval_all_logps_1_values": -658.0193481445312, "eval_all_logps_2": 434.6329040527344, "eval_all_logps_2_values": 434.6329345703125, "eval_logits/chosen": 5.71875, "eval_logits/rejected": 5.6875, "eval_logps/chosen": -1.9609375, "eval_logps/rejected": -2.03125, "eval_loss": 1.7782222032546997, "eval_original_losses": 1.8046875, "eval_rewards/accuracies": 0.436507910490036, "eval_rewards/chosen": -4.90625, "eval_rewards/margins": 0.1748046875, "eval_rewards/rejected": -5.0625, "eval_runtime": 7.6206, "eval_samples_per_second": 257.33, "eval_steps_per_second": 0.919, "eval_weight": 1.0, "step": 23 }, { "epoch": 0.9230769230769231, "eval_abs_diff": 0.4375, "eval_all_logps_1": -657.4482421875, "eval_all_logps_1_values": -657.4481811523438, "eval_all_logps_2": 434.6329040527344, "eval_all_logps_2_values": 434.6329345703125, "eval_logits/chosen": 5.71875, "eval_logits/rejected": 5.6875, "eval_logps/chosen": -1.9609375, "eval_logps/rejected": -2.03125, "eval_loss": 1.7799512147903442, "eval_original_losses": 1.8046875, "eval_rewards/accuracies": 0.4523809552192688, "eval_rewards/chosen": -4.90625, "eval_rewards/margins": 0.1708984375, "eval_rewards/rejected": -5.0625, "eval_runtime": 7.6139, "eval_samples_per_second": 257.556, "eval_steps_per_second": 0.919, "eval_weight": 1.0, "step": 24 }, { "abs_diff": 0.396484375, "all_logps_1": -644.0272216796875, "all_logps_1_values": -644.0272216796875, "all_logps_2": 413.7660217285156, "all_logps_2_values": 413.7659606933594, "epoch": 0.9615384615384616, "grad_norm": 18.38859727265469, "learning_rate": 4.657026981834622e-09, "logits/chosen": 5.71875, "logits/rejected": 5.6875, "logps/chosen": -1.984375, "logps/rejected": -1.9609375, "loss": 1.8714, "original_losses": 1.90625, "rewards/accuracies": 0.4229166507720947, "rewards/chosen": -4.96875, "rewards/margins": -0.0693359375, "rewards/rejected": -4.90625, "step": 25, "weight": 1.0 }, { "epoch": 0.9615384615384616, "eval_abs_diff": 0.4375, "eval_all_logps_1": -657.451171875, "eval_all_logps_1_values": -657.4511108398438, "eval_all_logps_2": 434.6329040527344, "eval_all_logps_2_values": 434.6329345703125, "eval_logits/chosen": 5.71875, "eval_logits/rejected": 5.6875, "eval_logps/chosen": -1.953125, "eval_logps/rejected": -2.03125, "eval_loss": 1.778795838356018, "eval_original_losses": 1.796875, "eval_rewards/accuracies": 0.4325396716594696, "eval_rewards/chosen": -4.875, "eval_rewards/margins": 0.181640625, "eval_rewards/rejected": -5.0625, "eval_runtime": 7.6217, "eval_samples_per_second": 257.293, "eval_steps_per_second": 0.918, "eval_weight": 1.0, "step": 25 }, { "epoch": 1.0, "eval_abs_diff": 0.4453125, "eval_all_logps_1": -656.8973388671875, "eval_all_logps_1_values": -656.8973388671875, "eval_all_logps_2": 434.6329040527344, "eval_all_logps_2_values": 434.6329345703125, "eval_logits/chosen": 5.71875, "eval_logits/rejected": 5.6875, "eval_logps/chosen": -1.9453125, "eval_logps/rejected": -2.03125, "eval_loss": 1.7800946235656738, "eval_original_losses": 1.796875, "eval_rewards/accuracies": 0.4404762089252472, "eval_rewards/chosen": -4.875, "eval_rewards/margins": 0.2001953125, "eval_rewards/rejected": -5.0625, "eval_runtime": 7.6164, "eval_samples_per_second": 257.472, "eval_steps_per_second": 0.919, "eval_weight": 1.0, "step": 26 }, { "epoch": 1.0, "step": 26, "total_flos": 0.0, "train_loss": 1.8681734525240385, "train_runtime": 997.9993, "train_samples_per_second": 59.996, "train_steps_per_second": 0.026 } ], "logging_steps": 5, "max_steps": 26, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 36, "trial_name": null, "trial_params": null }