|
{
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 1.0,
|
|
"eval_steps": 500,
|
|
"global_step": 83,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.12048192771084337,
|
|
"grad_norm": 2.8577072620391846,
|
|
"learning_rate": 4.8230451807939135e-05,
|
|
"logits/chosen": -0.11954045295715332,
|
|
"logits/rejected": -3.3223273754119873,
|
|
"logps/chosen": -1.4568630456924438,
|
|
"logps/rejected": -3.726320266723633,
|
|
"loss": 1.4779,
|
|
"num_input_tokens_seen": 8864,
|
|
"odds_ratio_loss": 14.732812881469727,
|
|
"rewards/accuracies": 0.8999999761581421,
|
|
"rewards/chosen": -0.1456862986087799,
|
|
"rewards/margins": 0.22694571316242218,
|
|
"rewards/rejected": -0.3726319968700409,
|
|
"sft_loss": 0.0046242037788033485,
|
|
"step": 10
|
|
},
|
|
{
|
|
"epoch": 0.24096385542168675,
|
|
"grad_norm": 1.9434715509414673,
|
|
"learning_rate": 4.3172311296078595e-05,
|
|
"logits/chosen": -0.2733025550842285,
|
|
"logits/rejected": -3.2514376640319824,
|
|
"logps/chosen": -1.4726128578186035,
|
|
"logps/rejected": -5.625662803649902,
|
|
"loss": 1.4745,
|
|
"num_input_tokens_seen": 17712,
|
|
"odds_ratio_loss": 14.662857055664062,
|
|
"rewards/accuracies": 1.0,
|
|
"rewards/chosen": -0.14726129174232483,
|
|
"rewards/margins": 0.4153049886226654,
|
|
"rewards/rejected": -0.562566339969635,
|
|
"sft_loss": 0.008168135769665241,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 0.3614457831325301,
|
|
"grad_norm": 2.154024124145508,
|
|
"learning_rate": 3.55416283362546e-05,
|
|
"logits/chosen": -0.20305636525154114,
|
|
"logits/rejected": -3.410961627960205,
|
|
"logps/chosen": -1.3130009174346924,
|
|
"logps/rejected": -6.4230146408081055,
|
|
"loss": 1.3135,
|
|
"num_input_tokens_seen": 27520,
|
|
"odds_ratio_loss": 13.098909378051758,
|
|
"rewards/accuracies": 1.0,
|
|
"rewards/chosen": -0.13130010664463043,
|
|
"rewards/margins": 0.5110014081001282,
|
|
"rewards/rejected": -0.6423014998435974,
|
|
"sft_loss": 0.0035836666356772184,
|
|
"step": 30
|
|
},
|
|
{
|
|
"epoch": 0.4819277108433735,
|
|
"grad_norm": 2.372204303741455,
|
|
"learning_rate": 2.6418631827326857e-05,
|
|
"logits/chosen": -0.2755209803581238,
|
|
"logits/rejected": -3.2902603149414062,
|
|
"logps/chosen": -1.2280548810958862,
|
|
"logps/rejected": -6.585225582122803,
|
|
"loss": 1.2285,
|
|
"num_input_tokens_seen": 36096,
|
|
"odds_ratio_loss": 12.17691707611084,
|
|
"rewards/accuracies": 1.0,
|
|
"rewards/chosen": -0.12280547618865967,
|
|
"rewards/margins": 0.5357170104980469,
|
|
"rewards/rejected": -0.6585224866867065,
|
|
"sft_loss": 0.010825484991073608,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 0.6024096385542169,
|
|
"grad_norm": 2.4067399501800537,
|
|
"learning_rate": 1.70948083275794e-05,
|
|
"logits/chosen": -0.1695922613143921,
|
|
"logits/rejected": -3.419903516769409,
|
|
"logps/chosen": -1.0168521404266357,
|
|
"logps/rejected": -6.722726345062256,
|
|
"loss": 1.0171,
|
|
"num_input_tokens_seen": 43456,
|
|
"odds_ratio_loss": 10.138498306274414,
|
|
"rewards/accuracies": 1.0,
|
|
"rewards/chosen": -0.10168520361185074,
|
|
"rewards/margins": 0.5705875158309937,
|
|
"rewards/rejected": -0.6722726821899414,
|
|
"sft_loss": 0.0032351273111999035,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.7228915662650602,
|
|
"grad_norm": 2.4104135036468506,
|
|
"learning_rate": 8.890074238378074e-06,
|
|
"logits/chosen": -0.2158750295639038,
|
|
"logits/rejected": -3.261337995529175,
|
|
"logps/chosen": -1.4426627159118652,
|
|
"logps/rejected": -6.552022457122803,
|
|
"loss": 1.4433,
|
|
"num_input_tokens_seen": 51600,
|
|
"odds_ratio_loss": 14.290933609008789,
|
|
"rewards/accuracies": 1.0,
|
|
"rewards/chosen": -0.144266277551651,
|
|
"rewards/margins": 0.5109359622001648,
|
|
"rewards/rejected": -0.6552022695541382,
|
|
"sft_loss": 0.01422051526606083,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 0.8433734939759037,
|
|
"grad_norm": 2.4223577976226807,
|
|
"learning_rate": 2.9659233496337786e-06,
|
|
"logits/chosen": -0.14738118648529053,
|
|
"logits/rejected": -3.434018611907959,
|
|
"logps/chosen": -1.1621037721633911,
|
|
"logps/rejected": -6.5490217208862305,
|
|
"loss": 1.1624,
|
|
"num_input_tokens_seen": 62624,
|
|
"odds_ratio_loss": 11.598767280578613,
|
|
"rewards/accuracies": 1.0,
|
|
"rewards/chosen": -0.11621036380529404,
|
|
"rewards/margins": 0.5386918783187866,
|
|
"rewards/rejected": -0.6549022197723389,
|
|
"sft_loss": 0.0025552159640938044,
|
|
"step": 70
|
|
},
|
|
{
|
|
"epoch": 0.963855421686747,
|
|
"grad_norm": 1.6316858530044556,
|
|
"learning_rate": 1.6100130092037703e-07,
|
|
"logits/chosen": -0.22155144810676575,
|
|
"logits/rejected": -3.4224212169647217,
|
|
"logps/chosen": -1.2641386985778809,
|
|
"logps/rejected": -6.619866371154785,
|
|
"loss": 1.2645,
|
|
"num_input_tokens_seen": 73184,
|
|
"odds_ratio_loss": 12.610678672790527,
|
|
"rewards/accuracies": 1.0,
|
|
"rewards/chosen": -0.12641386687755585,
|
|
"rewards/margins": 0.5355727672576904,
|
|
"rewards/rejected": -0.6619867086410522,
|
|
"sft_loss": 0.003463461296632886,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"num_input_tokens_seen": 75616,
|
|
"step": 83,
|
|
"total_flos": 3443013082939392.0,
|
|
"train_loss": 1.2913588443434383,
|
|
"train_runtime": 65.9669,
|
|
"train_samples_per_second": 1.258,
|
|
"train_steps_per_second": 1.258
|
|
}
|
|
],
|
|
"logging_steps": 10,
|
|
"max_steps": 83,
|
|
"num_input_tokens_seen": 75616,
|
|
"num_train_epochs": 1,
|
|
"save_steps": 1000,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 3443013082939392.0,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|
|
|