File size: 4,910 Bytes
224dcf4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 1.0,
  "eval_steps": 500,
  "global_step": 80,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.01,
      "learning_rate": 6.25e-08,
      "logits/chosen": -2.902447462081909,
      "logits/rejected": -2.93850040435791,
      "logps/chosen": -331.92425537109375,
      "logps/rejected": -304.7728576660156,
      "loss": 0.6931,
      "rewards/accuracies": 0.0,
      "rewards/chosen": 0.0,
      "rewards/margins": 0.0,
      "rewards/rejected": 0.0,
      "step": 1
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.990486745229364e-07,
      "logits/chosen": -2.8264474868774414,
      "logits/rejected": -2.8004088401794434,
      "logps/chosen": -250.434326171875,
      "logps/rejected": -216.94482421875,
      "loss": 0.6926,
      "rewards/accuracies": 0.4652777910232544,
      "rewards/chosen": 0.000873287848662585,
      "rewards/margins": 0.0005172663950361311,
      "rewards/rejected": 0.0003560214536264539,
      "step": 10
    },
    {
      "epoch": 0.25,
      "learning_rate": 4.6650635094610966e-07,
      "logits/chosen": -2.761777639389038,
      "logits/rejected": -2.7672297954559326,
      "logps/chosen": -256.48187255859375,
      "logps/rejected": -276.37890625,
      "loss": 0.6831,
      "rewards/accuracies": 0.612500011920929,
      "rewards/chosen": 0.015453550033271313,
      "rewards/margins": 0.014861134812235832,
      "rewards/rejected": 0.0005924167344346642,
      "step": 20
    },
    {
      "epoch": 0.38,
      "learning_rate": 3.933941090877615e-07,
      "logits/chosen": -2.8019919395446777,
      "logits/rejected": -2.7836480140686035,
      "logps/chosen": -257.0506286621094,
      "logps/rejected": -244.07858276367188,
      "loss": 0.6633,
      "rewards/accuracies": 0.7124999761581421,
      "rewards/chosen": 0.004371149465441704,
      "rewards/margins": 0.06962588429450989,
      "rewards/rejected": -0.06525473296642303,
      "step": 30
    },
    {
      "epoch": 0.5,
      "learning_rate": 2.934120444167326e-07,
      "logits/chosen": -2.7478058338165283,
      "logits/rejected": -2.731013298034668,
      "logps/chosen": -291.93682861328125,
      "logps/rejected": -290.897705078125,
      "loss": 0.6459,
      "rewards/accuracies": 0.6812499761581421,
      "rewards/chosen": -0.08649159967899323,
      "rewards/margins": 0.1326875388622284,
      "rewards/rejected": -0.21917912364006042,
      "step": 40
    },
    {
      "epoch": 0.62,
      "learning_rate": 1.8529523872436977e-07,
      "logits/chosen": -2.7630627155303955,
      "logits/rejected": -2.769901752471924,
      "logps/chosen": -275.48516845703125,
      "logps/rejected": -269.76690673828125,
      "loss": 0.6259,
      "rewards/accuracies": 0.675000011920929,
      "rewards/chosen": -0.17858798801898956,
      "rewards/margins": 0.11792335659265518,
      "rewards/rejected": -0.29651135206222534,
      "step": 50
    },
    {
      "epoch": 0.75,
      "learning_rate": 8.930309757836516e-08,
      "logits/chosen": -2.7746453285217285,
      "logits/rejected": -2.717956304550171,
      "logps/chosen": -277.69140625,
      "logps/rejected": -282.4396667480469,
      "loss": 0.6146,
      "rewards/accuracies": 0.7124999761581421,
      "rewards/chosen": -0.17534565925598145,
      "rewards/margins": 0.22178736329078674,
      "rewards/rejected": -0.3971330225467682,
      "step": 60
    },
    {
      "epoch": 0.88,
      "learning_rate": 2.3423053240837514e-08,
      "logits/chosen": -2.7869958877563477,
      "logits/rejected": -2.776400327682495,
      "logps/chosen": -290.5310974121094,
      "logps/rejected": -295.3239440917969,
      "loss": 0.6132,
      "rewards/accuracies": 0.637499988079071,
      "rewards/chosen": -0.25528621673583984,
      "rewards/margins": 0.1374310851097107,
      "rewards/rejected": -0.39271730184555054,
      "step": 70
    },
    {
      "epoch": 1.0,
      "learning_rate": 0.0,
      "logits/chosen": -2.7256760597229004,
      "logits/rejected": -2.682258367538452,
      "logps/chosen": -292.8467712402344,
      "logps/rejected": -316.6199951171875,
      "loss": 0.6074,
      "rewards/accuracies": 0.581250011920929,
      "rewards/chosen": -0.29217854142189026,
      "rewards/margins": 0.14723847806453705,
      "rewards/rejected": -0.4394169747829437,
      "step": 80
    },
    {
      "epoch": 1.0,
      "step": 80,
      "total_flos": 0.0,
      "train_loss": 0.6432609260082245,
      "train_runtime": 1321.6714,
      "train_samples_per_second": 7.709,
      "train_steps_per_second": 0.061
    }
  ],
  "logging_steps": 10,
  "max_steps": 80,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 100,
  "total_flos": 0.0,
  "train_batch_size": 8,
  "trial_name": null,
  "trial_params": null
}