{ "best_metric": 1.6283894777297974, "best_model_checkpoint": "./Zephyr/09-03-24-Weni-WeniGPT-2.8.1-Zephyr-7B-zephyr-prompt-DPO-binarized_DPO tests with binarized dataset-2_max_steps-112_batch_16_2024-03-09_ppid_7/checkpoint-100", "epoch": 0.8888888888888888, "eval_steps": 100, "global_step": 100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.18, "grad_norm": 65.50724029541016, "learning_rate": 0.000194, "logits/chosen": -2.6164324283599854, "logits/rejected": -2.6339125633239746, "logps/chosen": -366.4737854003906, "logps/rejected": -344.0569763183594, "loss": 1.0162, "rewards/accuracies": 0.3218750059604645, "rewards/chosen": -0.37838560342788696, "rewards/margins": 0.8077453374862671, "rewards/rejected": -1.1861308813095093, "step": 20 }, { "epoch": 0.36, "grad_norm": 302.9382629394531, "learning_rate": 0.000154, "logits/chosen": -2.624668598175049, "logits/rejected": -2.626055955886841, "logps/chosen": -387.1952209472656, "logps/rejected": -358.3865661621094, "loss": 2.0677, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": 8.134626388549805, "rewards/margins": 6.054707050323486, "rewards/rejected": 2.0799195766448975, "step": 40 }, { "epoch": 0.53, "grad_norm": 105.22598266601562, "learning_rate": 0.00011399999999999999, "logits/chosen": -2.7092220783233643, "logits/rejected": -2.7026538848876953, "logps/chosen": -317.4967041015625, "logps/rejected": -311.2018737792969, "loss": 2.2608, "rewards/accuracies": 0.47187501192092896, "rewards/chosen": 0.6841039657592773, "rewards/margins": 5.323451042175293, "rewards/rejected": -4.639346599578857, "step": 60 }, { "epoch": 0.71, "grad_norm": 82.97381591796875, "learning_rate": 7.4e-05, "logits/chosen": -2.659626007080078, "logits/rejected": -2.6671993732452393, "logps/chosen": -358.8479919433594, "logps/rejected": -344.2733154296875, "loss": 2.3074, "rewards/accuracies": 0.512499988079071, "rewards/chosen": -0.8363685607910156, "rewards/margins": 4.435623645782471, "rewards/rejected": -5.2719926834106445, "step": 80 }, { "epoch": 0.89, "grad_norm": 139.97763061523438, "learning_rate": 3.4000000000000007e-05, "logits/chosen": -2.6330013275146484, "logits/rejected": -2.6500449180603027, "logps/chosen": -378.33270263671875, "logps/rejected": -368.93402099609375, "loss": 2.0157, "rewards/accuracies": 0.5093749761581421, "rewards/chosen": -0.5400040745735168, "rewards/margins": 4.212619304656982, "rewards/rejected": -4.752623558044434, "step": 100 }, { "epoch": 0.89, "eval_logits/chosen": -2.5779407024383545, "eval_logits/rejected": -2.5749547481536865, "eval_logps/chosen": -362.0609130859375, "eval_logps/rejected": -357.7032165527344, "eval_loss": 1.6283894777297974, "eval_rewards/accuracies": 0.5649999976158142, "eval_rewards/chosen": 0.8732965588569641, "eval_rewards/margins": 6.959313869476318, "eval_rewards/rejected": -6.086017608642578, "eval_runtime": 99.816, "eval_samples_per_second": 2.004, "eval_steps_per_second": 0.501, "step": 100 } ], "logging_steps": 20, "max_steps": 112, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }