File size: 4,720 Bytes
2f6c378 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 |
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 100,
"global_step": 78,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"grad_norm": 5.038770139725663,
"learning_rate": 6.25e-07,
"logits/chosen": -1.3596761226654053,
"logits/rejected": -1.0023326873779297,
"logps/chosen": -450.79583740234375,
"logps/rejected": -781.127197265625,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.13,
"grad_norm": 5.222015988708302,
"learning_rate": 4.989935734988098e-06,
"logits/chosen": -1.0887341499328613,
"logits/rejected": -1.0588740110397339,
"logps/chosen": -564.32080078125,
"logps/rejected": -855.5671997070312,
"loss": 0.6684,
"rewards/accuracies": 0.6944444179534912,
"rewards/chosen": -0.007345028221607208,
"rewards/margins": 0.07519946992397308,
"rewards/rejected": -0.08254450559616089,
"step": 10
},
{
"epoch": 0.26,
"grad_norm": 3.800980531623392,
"learning_rate": 4.646121984004666e-06,
"logits/chosen": -1.4986072778701782,
"logits/rejected": -2.113752603530884,
"logps/chosen": -606.9659423828125,
"logps/rejected": -1018.4754638671875,
"loss": 0.4124,
"rewards/accuracies": 0.8374999761581421,
"rewards/chosen": -0.3804299235343933,
"rewards/margins": 1.465038537979126,
"rewards/rejected": -1.845468521118164,
"step": 20
},
{
"epoch": 0.38,
"grad_norm": 4.950457492388352,
"learning_rate": 3.8772424536302565e-06,
"logits/chosen": -1.7479159832000732,
"logits/rejected": -2.567039966583252,
"logps/chosen": -614.0457153320312,
"logps/rejected": -1108.7679443359375,
"loss": 0.3199,
"rewards/accuracies": 0.893750011920929,
"rewards/chosen": -0.10914883762598038,
"rewards/margins": 2.6420645713806152,
"rewards/rejected": -2.751213550567627,
"step": 30
},
{
"epoch": 0.51,
"grad_norm": 4.011791311794506,
"learning_rate": 2.835583164544139e-06,
"logits/chosen": -1.7483808994293213,
"logits/rejected": -2.6819939613342285,
"logps/chosen": -644.5540161132812,
"logps/rejected": -1273.226318359375,
"loss": 0.2279,
"rewards/accuracies": 0.9125000238418579,
"rewards/chosen": 0.04285002499818802,
"rewards/margins": 4.04502010345459,
"rewards/rejected": -4.002170085906982,
"step": 40
},
{
"epoch": 0.64,
"grad_norm": 3.2682180093043582,
"learning_rate": 1.7274575140626318e-06,
"logits/chosen": -1.5594079494476318,
"logits/rejected": -2.4641501903533936,
"logps/chosen": -627.8310546875,
"logps/rejected": -1211.9755859375,
"loss": 0.2181,
"rewards/accuracies": 0.9375,
"rewards/chosen": 0.09700597822666168,
"rewards/margins": 3.8286356925964355,
"rewards/rejected": -3.7316298484802246,
"step": 50
},
{
"epoch": 0.77,
"grad_norm": 2.445366662132522,
"learning_rate": 7.723433775328385e-07,
"logits/chosen": -1.6432807445526123,
"logits/rejected": -2.286984920501709,
"logps/chosen": -544.6373291015625,
"logps/rejected": -1150.681396484375,
"loss": 0.1556,
"rewards/accuracies": 0.956250011920929,
"rewards/chosen": 0.27296125888824463,
"rewards/margins": 3.291428327560425,
"rewards/rejected": -3.0184669494628906,
"step": 60
},
{
"epoch": 0.9,
"grad_norm": 4.67000331566183,
"learning_rate": 1.59412823400657e-07,
"logits/chosen": -1.5773608684539795,
"logits/rejected": -2.4722862243652344,
"logps/chosen": -636.8548583984375,
"logps/rejected": -1234.2066650390625,
"loss": 0.1503,
"rewards/accuracies": 0.956250011920929,
"rewards/chosen": 0.26559901237487793,
"rewards/margins": 4.093817234039307,
"rewards/rejected": -3.828218460083008,
"step": 70
},
{
"epoch": 1.0,
"step": 78,
"total_flos": 0.0,
"train_loss": 0.2915988182410216,
"train_runtime": 1026.489,
"train_samples_per_second": 4.863,
"train_steps_per_second": 0.076
}
],
"logging_steps": 10,
"max_steps": 78,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}
|