{ "best_metric": 0.3777986466884613, "best_model_checkpoint": "./zephyr/09-04-24-Weni-WeniGPT-Agents-Zephyr-1.0.13-KTO_Hyperparameter search, altering lora params for KTO task.-2_max_steps-145_batch_16_2024-04-09_ppid_9/checkpoint-100", "epoch": 0.684931506849315, "eval_steps": 50, "global_step": 100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.14, "grad_norm": 2.111046075820923, "learning_rate": 0.00018, "loss": 0.4439, "step": 20, "train/kl": 8.516629219055176, "train/logps/chosen": -296.32746550324674, "train/logps/rejected": -277.9095679593373, "train/rewards/chosen": 0.7222159744857194, "train/rewards/margins": 0.9555325568552365, "train/rewards/rejected": -0.23331658236951713 }, { "epoch": 0.27, "grad_norm": 3.2633113861083984, "learning_rate": 0.00015142857142857143, "loss": 0.4194, "step": 40, "train/kl": 1.0890424251556396, "train/logps/chosen": -307.1073526152482, "train/logps/rejected": -314.9674406424581, "train/rewards/chosen": -1.5282932579094637, "train/rewards/margins": 1.4390720282942722, "train/rewards/rejected": -2.967365286203736 }, { "epoch": 0.34, "eval/kl": 15.062309265136719, "eval/logps/chosen": -259.4809639084507, "eval/logps/rejected": -256.0586679193038, "eval/rewards/chosen": 2.41808867118728, "eval/rewards/margins": 1.5870417096564255, "eval/rewards/rejected": 0.8310469615308544, "eval_loss": 0.42071497440338135, "eval_runtime": 140.3611, "eval_samples_per_second": 2.137, "eval_steps_per_second": 0.534, "step": 50 }, { "epoch": 0.41, "grad_norm": 2.257838487625122, "learning_rate": 0.00012285714285714287, "loss": 0.398, "step": 60, "train/kl": 14.152316093444824, "train/logps/chosen": -258.6226310483871, "train/logps/rejected": -281.4683475378788, "train/rewards/chosen": 2.234444304435484, "train/rewards/margins": 1.6334723195722025, "train/rewards/rejected": 0.6009719848632813 }, { "epoch": 0.55, "grad_norm": 2.802126884460449, "learning_rate": 9.428571428571429e-05, "loss": 0.381, "step": 80, "train/kl": 3.5482826232910156, "train/logps/chosen": -274.61489125844594, "train/logps/rejected": -313.1891124636628, "train/rewards/chosen": 0.9537969022183805, "train/rewards/margins": 3.017533396415782, "train/rewards/rejected": -2.0637364941974017 }, { "epoch": 0.68, "grad_norm": 1.998615026473999, "learning_rate": 6.571428571428571e-05, "loss": 0.3603, "step": 100, "train/kl": 6.8296308517456055, "train/logps/chosen": -256.6186432453416, "train/logps/rejected": -291.86861242138366, "train/rewards/chosen": 2.3773075838266693, "train/rewards/margins": 3.0153399332272195, "train/rewards/rejected": -0.6380323494005503 }, { "epoch": 0.68, "eval/kl": 10.186383247375488, "eval/logps/chosen": -251.80529269366198, "eval/logps/rejected": -260.74075356012656, "eval/rewards/chosen": 3.1856545461735255, "eval/rewards/margins": 2.8228144007244302, "eval/rewards/rejected": 0.36284014544909515, "eval_loss": 0.3777986466884613, "eval_runtime": 140.492, "eval_samples_per_second": 2.135, "eval_steps_per_second": 0.534, "step": 100 } ], "logging_steps": 20, "max_steps": 145, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }