{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 1158, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.2, "learning_rate": 1.9137931034482762e-05, "logits/chosen": -2.7039783000946045, "logits/rejected": -2.699718713760376, "logps/chosen": -178.0665283203125, "logps/rejected": -202.87210083007812, "loss": 0.65, "rewards/accuracies": 0.798701286315918, "rewards/chosen": -0.09282779693603516, "rewards/margins": 0.09910193085670471, "rewards/rejected": -0.19192971289157867, "step": 77 }, { "epoch": 0.4, "learning_rate": 2.899232245681382e-05, "logits/chosen": -2.7582526206970215, "logits/rejected": -2.6498587131500244, "logps/chosen": -206.910888671875, "logps/rejected": -244.54962158203125, "loss": 0.3215, "rewards/accuracies": 0.9740259647369385, "rewards/chosen": -2.6819589138031006, "rewards/margins": 1.8207621574401855, "rewards/rejected": -4.502721309661865, "step": 154 }, { "epoch": 0.6, "learning_rate": 2.6833013435700576e-05, "logits/chosen": -3.261249542236328, "logits/rejected": -3.26186203956604, "logps/chosen": -636.440673828125, "logps/rejected": -844.8958740234375, "loss": 0.1384, "rewards/accuracies": 0.9675324559211731, "rewards/chosen": -45.84988021850586, "rewards/margins": 18.366718292236328, "rewards/rejected": -64.21659088134766, "step": 231 }, { "epoch": 0.8, "learning_rate": 2.4644913627639156e-05, "logits/chosen": -3.799668788909912, "logits/rejected": -3.877474308013916, "logps/chosen": -1188.7479248046875, "logps/rejected": -1513.479248046875, "loss": 0.1428, "rewards/accuracies": 0.9675324559211731, "rewards/chosen": -100.75252532958984, "rewards/margins": 30.6811580657959, "rewards/rejected": -131.4336700439453, "step": 308 }, { "epoch": 1.0, "learning_rate": 2.2428023032629558e-05, "logits/chosen": -3.48297119140625, "logits/rejected": -3.503549098968506, "logps/chosen": -1211.92041015625, "logps/rejected": -1585.6292724609375, "loss": 0.3247, "rewards/accuracies": 0.9545454382896423, "rewards/chosen": -102.89164733886719, "rewards/margins": 35.46805953979492, "rewards/rejected": -138.3596954345703, "step": 385 }, { "epoch": 1.2, "learning_rate": 2.0211132437619963e-05, "logits/chosen": -3.9055564403533936, "logits/rejected": -3.94558048248291, "logps/chosen": -1156.05908203125, "logps/rejected": -1524.2059326171875, "loss": 0.2889, "rewards/accuracies": 0.9545454382896423, "rewards/chosen": -97.4389877319336, "rewards/margins": 34.911155700683594, "rewards/rejected": -132.3501434326172, "step": 462 }, { "epoch": 1.4, "learning_rate": 1.7994241842610365e-05, "logits/chosen": -4.063717365264893, "logits/rejected": -4.160610675811768, "logps/chosen": -1216.2169189453125, "logps/rejected": -1579.79931640625, "loss": 0.0666, "rewards/accuracies": 0.9935064911842346, "rewards/chosen": -103.59471130371094, "rewards/margins": 34.364356994628906, "rewards/rejected": -137.95909118652344, "step": 539 }, { "epoch": 1.6, "learning_rate": 1.577735124760077e-05, "logits/chosen": -3.7622299194335938, "logits/rejected": -3.8654086589813232, "logps/chosen": -1256.462158203125, "logps/rejected": -1670.1668701171875, "loss": 0.3068, "rewards/accuracies": 0.9545454382896423, "rewards/chosen": -107.87626647949219, "rewards/margins": 38.972930908203125, "rewards/rejected": -146.8491973876953, "step": 616 }, { "epoch": 1.8, "learning_rate": 1.3589251439539348e-05, "logits/chosen": -3.9780585765838623, "logits/rejected": -4.0323262214660645, "logps/chosen": -1205.580322265625, "logps/rejected": -1609.4249267578125, "loss": 0.0732, "rewards/accuracies": 0.9675324559211731, "rewards/chosen": -102.83528137207031, "rewards/margins": 38.15076446533203, "rewards/rejected": -140.9860382080078, "step": 693 }, { "epoch": 1.99, "learning_rate": 1.137236084452975e-05, "logits/chosen": -3.9996836185455322, "logits/rejected": -4.038970947265625, "logps/chosen": -1479.3536376953125, "logps/rejected": -1951.1041259765625, "loss": 0.2627, "rewards/accuracies": 0.9935064911842346, "rewards/chosen": -129.55506896972656, "rewards/margins": 45.21900177001953, "rewards/rejected": -174.77407836914062, "step": 770 }, { "epoch": 2.19, "learning_rate": 9.155470249520153e-06, "logits/chosen": -4.09077262878418, "logits/rejected": -4.1275434494018555, "logps/chosen": -1435.885986328125, "logps/rejected": -1983.1124267578125, "loss": 0.0968, "rewards/accuracies": 0.9870129823684692, "rewards/chosen": -125.91879272460938, "rewards/margins": 52.251136779785156, "rewards/rejected": -178.16995239257812, "step": 847 }, { "epoch": 2.39, "learning_rate": 6.938579654510557e-06, "logits/chosen": -4.158607482910156, "logits/rejected": -4.210606098175049, "logps/chosen": -1408.3914794921875, "logps/rejected": -1875.4822998046875, "loss": 0.0166, "rewards/accuracies": 0.9805194735527039, "rewards/chosen": -122.99951934814453, "rewards/margins": 44.64362335205078, "rewards/rejected": -167.64315795898438, "step": 924 }, { "epoch": 2.59, "learning_rate": 4.72168905950096e-06, "logits/chosen": -4.183628559112549, "logits/rejected": -4.219183444976807, "logps/chosen": -1309.217529296875, "logps/rejected": -1762.13232421875, "loss": 0.0602, "rewards/accuracies": 0.9870129823684692, "rewards/chosen": -113.17806243896484, "rewards/margins": 43.657859802246094, "rewards/rejected": -156.83592224121094, "step": 1001 }, { "epoch": 2.79, "learning_rate": 2.5335892514395392e-06, "logits/chosen": -4.1587114334106445, "logits/rejected": -4.193912029266357, "logps/chosen": -1347.27978515625, "logps/rejected": -1834.71728515625, "loss": 0.0365, "rewards/accuracies": 0.9935064911842346, "rewards/chosen": -116.40560150146484, "rewards/margins": 46.630714416503906, "rewards/rejected": -163.03631591796875, "step": 1078 }, { "epoch": 2.99, "learning_rate": 3.166986564299424e-07, "logits/chosen": -4.090827465057373, "logits/rejected": -4.138672351837158, "logps/chosen": -1456.274169921875, "logps/rejected": -1961.9942626953125, "loss": 0.0055, "rewards/accuracies": 0.9935064911842346, "rewards/chosen": -127.70254516601562, "rewards/margins": 48.34178924560547, "rewards/rejected": -176.04434204101562, "step": 1155 } ], "logging_steps": 77, "max_steps": 1158, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }