Luminia-8B-RP / Luminia-8B-RP-DPO /trainer_state.json
Nekochu's picture
Add Luminia-8B-RP v0.3
00749a1 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 500,
"global_step": 83,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.12048192771084337,
"grad_norm": 2.8577072620391846,
"learning_rate": 4.8230451807939135e-05,
"logits/chosen": -0.11954045295715332,
"logits/rejected": -3.3223273754119873,
"logps/chosen": -1.4568630456924438,
"logps/rejected": -3.726320266723633,
"loss": 1.4779,
"num_input_tokens_seen": 8864,
"odds_ratio_loss": 14.732812881469727,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.1456862986087799,
"rewards/margins": 0.22694571316242218,
"rewards/rejected": -0.3726319968700409,
"sft_loss": 0.0046242037788033485,
"step": 10
},
{
"epoch": 0.24096385542168675,
"grad_norm": 1.9434715509414673,
"learning_rate": 4.3172311296078595e-05,
"logits/chosen": -0.2733025550842285,
"logits/rejected": -3.2514376640319824,
"logps/chosen": -1.4726128578186035,
"logps/rejected": -5.625662803649902,
"loss": 1.4745,
"num_input_tokens_seen": 17712,
"odds_ratio_loss": 14.662857055664062,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.14726129174232483,
"rewards/margins": 0.4153049886226654,
"rewards/rejected": -0.562566339969635,
"sft_loss": 0.008168135769665241,
"step": 20
},
{
"epoch": 0.3614457831325301,
"grad_norm": 2.154024124145508,
"learning_rate": 3.55416283362546e-05,
"logits/chosen": -0.20305636525154114,
"logits/rejected": -3.410961627960205,
"logps/chosen": -1.3130009174346924,
"logps/rejected": -6.4230146408081055,
"loss": 1.3135,
"num_input_tokens_seen": 27520,
"odds_ratio_loss": 13.098909378051758,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.13130010664463043,
"rewards/margins": 0.5110014081001282,
"rewards/rejected": -0.6423014998435974,
"sft_loss": 0.0035836666356772184,
"step": 30
},
{
"epoch": 0.4819277108433735,
"grad_norm": 2.372204303741455,
"learning_rate": 2.6418631827326857e-05,
"logits/chosen": -0.2755209803581238,
"logits/rejected": -3.2902603149414062,
"logps/chosen": -1.2280548810958862,
"logps/rejected": -6.585225582122803,
"loss": 1.2285,
"num_input_tokens_seen": 36096,
"odds_ratio_loss": 12.17691707611084,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.12280547618865967,
"rewards/margins": 0.5357170104980469,
"rewards/rejected": -0.6585224866867065,
"sft_loss": 0.010825484991073608,
"step": 40
},
{
"epoch": 0.6024096385542169,
"grad_norm": 2.4067399501800537,
"learning_rate": 1.70948083275794e-05,
"logits/chosen": -0.1695922613143921,
"logits/rejected": -3.419903516769409,
"logps/chosen": -1.0168521404266357,
"logps/rejected": -6.722726345062256,
"loss": 1.0171,
"num_input_tokens_seen": 43456,
"odds_ratio_loss": 10.138498306274414,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.10168520361185074,
"rewards/margins": 0.5705875158309937,
"rewards/rejected": -0.6722726821899414,
"sft_loss": 0.0032351273111999035,
"step": 50
},
{
"epoch": 0.7228915662650602,
"grad_norm": 2.4104135036468506,
"learning_rate": 8.890074238378074e-06,
"logits/chosen": -0.2158750295639038,
"logits/rejected": -3.261337995529175,
"logps/chosen": -1.4426627159118652,
"logps/rejected": -6.552022457122803,
"loss": 1.4433,
"num_input_tokens_seen": 51600,
"odds_ratio_loss": 14.290933609008789,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.144266277551651,
"rewards/margins": 0.5109359622001648,
"rewards/rejected": -0.6552022695541382,
"sft_loss": 0.01422051526606083,
"step": 60
},
{
"epoch": 0.8433734939759037,
"grad_norm": 2.4223577976226807,
"learning_rate": 2.9659233496337786e-06,
"logits/chosen": -0.14738118648529053,
"logits/rejected": -3.434018611907959,
"logps/chosen": -1.1621037721633911,
"logps/rejected": -6.5490217208862305,
"loss": 1.1624,
"num_input_tokens_seen": 62624,
"odds_ratio_loss": 11.598767280578613,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.11621036380529404,
"rewards/margins": 0.5386918783187866,
"rewards/rejected": -0.6549022197723389,
"sft_loss": 0.0025552159640938044,
"step": 70
},
{
"epoch": 0.963855421686747,
"grad_norm": 1.6316858530044556,
"learning_rate": 1.6100130092037703e-07,
"logits/chosen": -0.22155144810676575,
"logits/rejected": -3.4224212169647217,
"logps/chosen": -1.2641386985778809,
"logps/rejected": -6.619866371154785,
"loss": 1.2645,
"num_input_tokens_seen": 73184,
"odds_ratio_loss": 12.610678672790527,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.12641386687755585,
"rewards/margins": 0.5355727672576904,
"rewards/rejected": -0.6619867086410522,
"sft_loss": 0.003463461296632886,
"step": 80
},
{
"epoch": 1.0,
"num_input_tokens_seen": 75616,
"step": 83,
"total_flos": 3443013082939392.0,
"train_loss": 1.2913588443434383,
"train_runtime": 65.9669,
"train_samples_per_second": 1.258,
"train_steps_per_second": 1.258
}
],
"logging_steps": 10,
"max_steps": 83,
"num_input_tokens_seen": 75616,
"num_train_epochs": 1,
"save_steps": 1000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 3443013082939392.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}