|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.968, |
|
"eval_steps": 100, |
|
"global_step": 248, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 2.0000000000000002e-07, |
|
"logits/chosen": 0.34545159339904785, |
|
"logits/rejected": 0.2957597076892853, |
|
"logps/chosen": -217.1103973388672, |
|
"logps/rejected": -154.90234375, |
|
"loss": 0.0009, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"logits/chosen": 0.24290476739406586, |
|
"logits/rejected": 0.1927487701177597, |
|
"logps/chosen": -189.50128173828125, |
|
"logps/rejected": -162.37692260742188, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 0.3055555522441864, |
|
"rewards/chosen": -0.0005859931115992367, |
|
"rewards/margins": -0.001147971022874117, |
|
"rewards/rejected": 0.0005619778530672193, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.000000000000001e-06, |
|
"logits/chosen": 0.1098150983452797, |
|
"logits/rejected": 0.06667135655879974, |
|
"logps/chosen": -170.3683319091797, |
|
"logps/rejected": -145.548095703125, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 0.38749998807907104, |
|
"rewards/chosen": -2.7951715310337022e-05, |
|
"rewards/margins": -0.00022801189334131777, |
|
"rewards/rejected": 0.00020006010890938342, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.993800445762451e-06, |
|
"logits/chosen": 0.15559187531471252, |
|
"logits/rejected": 0.15184751152992249, |
|
"logps/chosen": -184.87167358398438, |
|
"logps/rejected": -146.75131225585938, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 0.39375001192092896, |
|
"rewards/chosen": -0.00036125333281233907, |
|
"rewards/margins": -0.0002040974359260872, |
|
"rewards/rejected": -0.00015715583867859095, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.944388344834205e-06, |
|
"logits/chosen": 0.11296383291482925, |
|
"logits/rejected": 0.20522311329841614, |
|
"logps/chosen": -182.8056640625, |
|
"logps/rejected": -148.15756225585938, |
|
"loss": 0.0009, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -0.00020849374413955957, |
|
"rewards/margins": 0.001540421275421977, |
|
"rewards/rejected": -0.0017489150632172823, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.8465431931347904e-06, |
|
"logits/chosen": 0.2731459140777588, |
|
"logits/rejected": 0.17598305642604828, |
|
"logps/chosen": -196.51651000976562, |
|
"logps/rejected": -161.89797973632812, |
|
"loss": 0.001, |
|
"rewards/accuracies": 0.4437499940395355, |
|
"rewards/chosen": -0.0016538338968530297, |
|
"rewards/margins": 0.000834259029943496, |
|
"rewards/rejected": -0.0024880929850041866, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 4.702203692102539e-06, |
|
"logits/chosen": 0.19920073449611664, |
|
"logits/rejected": 0.17924004793167114, |
|
"logps/chosen": -182.0515594482422, |
|
"logps/rejected": -150.9364013671875, |
|
"loss": 0.001, |
|
"rewards/accuracies": 0.40625, |
|
"rewards/chosen": 0.00011732898565242067, |
|
"rewards/margins": 0.0012116450816392899, |
|
"rewards/rejected": -0.0010943160159513354, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 4.514229781074239e-06, |
|
"logits/chosen": 0.1093553751707077, |
|
"logits/rejected": 0.17700831592082977, |
|
"logps/chosen": -203.3074188232422, |
|
"logps/rejected": -173.57713317871094, |
|
"loss": 0.001, |
|
"rewards/accuracies": 0.4312500059604645, |
|
"rewards/chosen": -0.00015989062376320362, |
|
"rewards/margins": 0.0011065483558923006, |
|
"rewards/rejected": -0.0012664392124861479, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 4.286345970517195e-06, |
|
"logits/chosen": 0.13181297481060028, |
|
"logits/rejected": 0.1530202180147171, |
|
"logps/chosen": -193.3525848388672, |
|
"logps/rejected": -161.02328491210938, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 0.40625, |
|
"rewards/chosen": 0.00105115189217031, |
|
"rewards/margins": 0.00026105757569894195, |
|
"rewards/rejected": 0.0007900940254330635, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 4.023067544670082e-06, |
|
"logits/chosen": 0.10432066023349762, |
|
"logits/rejected": 0.11901885271072388, |
|
"logps/chosen": -184.4915771484375, |
|
"logps/rejected": -151.45025634765625, |
|
"loss": 0.0009, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": 0.0010537179186940193, |
|
"rewards/margins": 0.002086392603814602, |
|
"rewards/rejected": -0.0010326746851205826, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 3.7296110958116845e-06, |
|
"logits/chosen": 0.17887040972709656, |
|
"logits/rejected": 0.13352127373218536, |
|
"logps/chosen": -176.19094848632812, |
|
"logps/rejected": -149.05015563964844, |
|
"loss": 0.001, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": 0.0006333684432320297, |
|
"rewards/margins": 0.00030291633447632194, |
|
"rewards/rejected": 0.00033045216696336865, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_logits/chosen": -0.005087433848530054, |
|
"eval_logits/rejected": 0.09259650856256485, |
|
"eval_logps/chosen": -306.27325439453125, |
|
"eval_logps/rejected": -278.57037353515625, |
|
"eval_loss": 0.0014748616376891732, |
|
"eval_rewards/accuracies": 0.5040000081062317, |
|
"eval_rewards/chosen": 8.555292879464105e-06, |
|
"eval_rewards/margins": 0.00035174566437490284, |
|
"eval_rewards/rejected": -0.0003431903896853328, |
|
"eval_runtime": 421.2425, |
|
"eval_samples_per_second": 4.748, |
|
"eval_steps_per_second": 1.187, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 3.4117911628292944e-06, |
|
"logits/chosen": 0.1393759548664093, |
|
"logits/rejected": 0.09525509178638458, |
|
"logps/chosen": -194.57229614257812, |
|
"logps/rejected": -159.84437561035156, |
|
"loss": 0.001, |
|
"rewards/accuracies": 0.40625, |
|
"rewards/chosen": 0.0006214675377123058, |
|
"rewards/margins": 0.0007887079264037311, |
|
"rewards/rejected": -0.00016724050510674715, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 3.075905022087675e-06, |
|
"logits/chosen": 0.19464930891990662, |
|
"logits/rejected": 0.26265355944633484, |
|
"logps/chosen": -182.62338256835938, |
|
"logps/rejected": -146.72181701660156, |
|
"loss": 0.001, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": 0.00028288367320783436, |
|
"rewards/margins": 0.0012492609675973654, |
|
"rewards/rejected": -0.0009663773817010224, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 2.728607913349464e-06, |
|
"logits/chosen": 0.2267303168773651, |
|
"logits/rejected": 0.21481864154338837, |
|
"logps/chosen": -175.65589904785156, |
|
"logps/rejected": -143.40615844726562, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 0.40625, |
|
"rewards/chosen": 0.0007823506603017449, |
|
"rewards/margins": 0.00022141262888908386, |
|
"rewards/rejected": 0.000560938089620322, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 2.376781173017589e-06, |
|
"logits/chosen": 0.18638554215431213, |
|
"logits/rejected": 0.19504567980766296, |
|
"logps/chosen": -181.9698486328125, |
|
"logps/rejected": -152.50296020507812, |
|
"loss": 0.001, |
|
"rewards/accuracies": 0.4312500059604645, |
|
"rewards/chosen": 0.002239116234704852, |
|
"rewards/margins": 0.001197666977532208, |
|
"rewards/rejected": 0.0010414490243420005, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 2.0273958875043877e-06, |
|
"logits/chosen": 0.17610232532024384, |
|
"logits/rejected": 0.1723514050245285, |
|
"logps/chosen": -199.57699584960938, |
|
"logps/rejected": -177.21139526367188, |
|
"loss": 0.001, |
|
"rewards/accuracies": 0.4124999940395355, |
|
"rewards/chosen": 0.0006620158674195409, |
|
"rewards/margins": 0.0013073014561086893, |
|
"rewards/rejected": -0.0006452856468968093, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 1.6873747682962393e-06, |
|
"logits/chosen": 0.07310830056667328, |
|
"logits/rejected": 0.17652472853660583, |
|
"logps/chosen": -183.4370880126953, |
|
"logps/rejected": -141.97218322753906, |
|
"loss": 0.001, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.0025453295093029737, |
|
"rewards/margins": 0.0023006144911050797, |
|
"rewards/rejected": 0.00024471539654769003, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 1.363454985517803e-06, |
|
"logits/chosen": 0.10622234642505646, |
|
"logits/rejected": 0.20915362238883972, |
|
"logps/chosen": -185.3426971435547, |
|
"logps/rejected": -146.079345703125, |
|
"loss": 0.0009, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": 0.0036269682459533215, |
|
"rewards/margins": 0.002667922293767333, |
|
"rewards/rejected": 0.0009590461850166321, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 1.062054677808238e-06, |
|
"logits/chosen": 0.19405516982078552, |
|
"logits/rejected": 0.16787810623645782, |
|
"logps/chosen": -199.97219848632812, |
|
"logps/rejected": -163.14398193359375, |
|
"loss": 0.001, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": 0.0023652021773159504, |
|
"rewards/margins": 0.0015264868270605803, |
|
"rewards/rejected": 0.000838715408463031, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 7.891457834794711e-07, |
|
"logits/chosen": 0.21832183003425598, |
|
"logits/rejected": 0.20919294655323029, |
|
"logps/chosen": -167.15711975097656, |
|
"logps/rejected": -146.7445526123047, |
|
"loss": 0.001, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": 8.343837544089183e-05, |
|
"rewards/margins": 0.0007264987798407674, |
|
"rewards/rejected": -0.0006430605426430702, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 5.501357126768117e-07, |
|
"logits/chosen": 0.07451615482568741, |
|
"logits/rejected": 0.10825479030609131, |
|
"logps/chosen": -187.6416015625, |
|
"logps/rejected": -153.81216430664062, |
|
"loss": 0.001, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": 0.0008775632595643401, |
|
"rewards/margins": 0.0012995953438803554, |
|
"rewards/rejected": -0.00042203222983516753, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"eval_logits/chosen": -0.0019514876184985042, |
|
"eval_logits/rejected": 0.0977490246295929, |
|
"eval_logps/chosen": -306.2765197753906, |
|
"eval_logps/rejected": -278.5775146484375, |
|
"eval_loss": 0.0015296473866328597, |
|
"eval_rewards/accuracies": 0.4884999990463257, |
|
"eval_rewards/chosen": -2.441116339468863e-05, |
|
"eval_rewards/margins": 0.00038999062962830067, |
|
"eval_rewards/rejected": -0.000414401845773682, |
|
"eval_runtime": 420.5773, |
|
"eval_samples_per_second": 4.755, |
|
"eval_steps_per_second": 1.189, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"learning_rate": 3.4976020508682345e-07, |
|
"logits/chosen": 0.24440991878509521, |
|
"logits/rejected": 0.20419850945472717, |
|
"logps/chosen": -173.31942749023438, |
|
"logps/rejected": -149.56796264648438, |
|
"loss": 0.001, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": 0.0018821163102984428, |
|
"rewards/margins": 0.0013987896963953972, |
|
"rewards/rejected": 0.0004833267885260284, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"learning_rate": 1.9198949610721273e-07, |
|
"logits/chosen": 0.195010706782341, |
|
"logits/rejected": 0.1568932682275772, |
|
"logps/chosen": -196.238525390625, |
|
"logps/rejected": -168.1038055419922, |
|
"loss": 0.0009, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": 0.0010507756378501654, |
|
"rewards/margins": 0.0023238039575517178, |
|
"rewards/rejected": -0.001273027970455587, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"learning_rate": 7.994965069994143e-08, |
|
"logits/chosen": 0.1438043862581253, |
|
"logits/rejected": 0.13570797443389893, |
|
"logps/chosen": -193.8529815673828, |
|
"logps/rejected": -155.14317321777344, |
|
"loss": 0.001, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": 0.0007188455201685429, |
|
"rewards/margins": 0.0012139389291405678, |
|
"rewards/rejected": -0.0004950935835950077, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 1.5860623616664183e-08, |
|
"logits/chosen": 0.1838085651397705, |
|
"logits/rejected": 0.06515821814537048, |
|
"logps/chosen": -176.7322235107422, |
|
"logps/rejected": -146.44021606445312, |
|
"loss": 0.0009, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": 0.004661304876208305, |
|
"rewards/margins": 0.0025658137165009975, |
|
"rewards/rejected": 0.0020954906940460205, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"step": 248, |
|
"total_flos": 0.0, |
|
"train_loss": 0.001002159876318934, |
|
"train_runtime": 2735.225, |
|
"train_samples_per_second": 1.462, |
|
"train_steps_per_second": 0.091 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 248, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|