BraylonDash commited on
Commit
e6f2538
1 Parent(s): c7483ea

Model save

Browse files
README.md ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: peft
3
+ tags:
4
+ - trl
5
+ - dpo
6
+ - generated_from_trainer
7
+ base_model: DUAL-GPO/phi-2-dpo-chatml-lora-40k-60k-i1-merged
8
+ model-index:
9
+ - name: phi-2-dpo-chatml-lora-40k-60k-v2-i2
10
+ results: []
11
+ ---
12
+
13
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
14
+ should probably proofread and complete it, then remove this comment. -->
15
+
16
+ # phi-2-dpo-chatml-lora-40k-60k-v2-i2
17
+
18
+ This model is a fine-tuned version of [DUAL-GPO/phi-2-dpo-chatml-lora-40k-60k-i1-merged](https://huggingface.co/DUAL-GPO/phi-2-dpo-chatml-lora-40k-60k-i1-merged) on the None dataset.
19
+
20
+ ## Model description
21
+
22
+ More information needed
23
+
24
+ ## Intended uses & limitations
25
+
26
+ More information needed
27
+
28
+ ## Training and evaluation data
29
+
30
+ More information needed
31
+
32
+ ## Training procedure
33
+
34
+ ### Training hyperparameters
35
+
36
+ The following hyperparameters were used during training:
37
+ - learning_rate: 5e-06
38
+ - train_batch_size: 4
39
+ - eval_batch_size: 4
40
+ - seed: 42
41
+ - distributed_type: multi-GPU
42
+ - num_devices: 4
43
+ - gradient_accumulation_steps: 4
44
+ - total_train_batch_size: 64
45
+ - total_eval_batch_size: 16
46
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
47
+ - lr_scheduler_type: cosine
48
+ - lr_scheduler_warmup_ratio: 0.1
49
+ - num_epochs: 1
50
+
51
+ ### Training results
52
+
53
+
54
+
55
+ ### Framework versions
56
+
57
+ - PEFT 0.7.1
58
+ - Transformers 4.36.2
59
+ - Pytorch 2.1.2
60
+ - Datasets 2.14.6
61
+ - Tokenizers 0.15.2
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:869439b1ee9b9540aa300228a6750426ea7502a52c42528ec4b11a0216ff572a
3
  size 335579632
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e2e93cf34072fac22f8b4dfadb65dbe0d1dc04e584f1e110bc65709794b3774e
3
  size 335579632
all_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "train_loss": 0.666122229435505,
4
+ "train_runtime": 4198.8516,
5
+ "train_samples": 20000,
6
+ "train_samples_per_second": 4.763,
7
+ "train_steps_per_second": 0.074
8
+ }
emissions.csv ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ timestamp,project_name,run_id,duration,emissions,emissions_rate,cpu_power,gpu_power,ram_power,cpu_energy,gpu_energy,ram_energy,energy_consumed,country_name,country_iso_code,region,cloud_provider,cloud_region,os,python_version,codecarbon_version,cpu_count,cpu_model,gpu_count,gpu_model,longitude,latitude,ram_total_size,tracking_mode,on_cloud,pue
2
+ 2024-09-12T15:09:01,codecarbon,fb687a3f-aab0-4b7e-bd1e-9b0b7cb3d8e0,4198.856112241745,0.0038916446920244244,9.268344968236356e-07,42.5,1098.976,188.74309015274048,0.04956925347877874,1.3680259234841827,0.21946277793713023,1.6370579549000932,Canada,CAN,quebec,,,Linux-5.15.0-84-generic-x86_64-with-glibc2.35,3.10.14,2.2.3,32,Intel(R) Xeon(R) W-3335 CPU @ 3.40GHz,4,4 x NVIDIA GeForce RTX 4090,-71.2,46.8,503.3149070739746,machine,N,1.0
runs/Sep12_13-58-15_gpu4-119-5/events.out.tfevents.1726113542.gpu4-119-5.2187599.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:47cef0cedbcbdf74b37d4e696299f0aab4e4b900ffdab12f47b95349618db576
3
- size 24473
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9cd3849f224fee7c84d4474d67eac12d8e4e27e0d52ec3427b11da55e55f993f
3
+ size 25461
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "train_loss": 0.666122229435505,
4
+ "train_runtime": 4198.8516,
5
+ "train_samples": 20000,
6
+ "train_samples_per_second": 4.763,
7
+ "train_steps_per_second": 0.074
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,478 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 0.9984,
5
+ "eval_steps": 500,
6
+ "global_step": 312,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.0,
13
+ "learning_rate": 1.5625e-07,
14
+ "logits/chosen": 0.37485986948013306,
15
+ "logits/rejected": 0.6487500071525574,
16
+ "logps/chosen": -1078.384765625,
17
+ "logps/rejected": -1101.77490234375,
18
+ "loss": 0.6931,
19
+ "rewards/accuracies": 0.0,
20
+ "rewards/chosen": 0.0,
21
+ "rewards/margins": 0.0,
22
+ "rewards/rejected": 0.0,
23
+ "step": 1
24
+ },
25
+ {
26
+ "epoch": 0.03,
27
+ "learning_rate": 1.5625e-06,
28
+ "logits/chosen": 0.47351381182670593,
29
+ "logits/rejected": 0.5273572206497192,
30
+ "logps/chosen": -1056.42822265625,
31
+ "logps/rejected": -1169.265625,
32
+ "loss": 0.6932,
33
+ "rewards/accuracies": 0.3958333432674408,
34
+ "rewards/chosen": -0.00091694132424891,
35
+ "rewards/margins": -0.00018613642896525562,
36
+ "rewards/rejected": -0.0007308049243874848,
37
+ "step": 10
38
+ },
39
+ {
40
+ "epoch": 0.06,
41
+ "learning_rate": 3.125e-06,
42
+ "logits/chosen": 0.42252635955810547,
43
+ "logits/rejected": 0.49473732709884644,
44
+ "logps/chosen": -1147.17236328125,
45
+ "logps/rejected": -1265.768798828125,
46
+ "loss": 0.6914,
47
+ "rewards/accuracies": 0.48124998807907104,
48
+ "rewards/chosen": -0.022750383242964745,
49
+ "rewards/margins": 0.0030057504773139954,
50
+ "rewards/rejected": -0.02575613185763359,
51
+ "step": 20
52
+ },
53
+ {
54
+ "epoch": 0.1,
55
+ "learning_rate": 4.6875000000000004e-06,
56
+ "logits/chosen": 0.5097017884254456,
57
+ "logits/rejected": 0.5685985088348389,
58
+ "logps/chosen": -1142.3890380859375,
59
+ "logps/rejected": -1274.76171875,
60
+ "loss": 0.6878,
61
+ "rewards/accuracies": 0.5,
62
+ "rewards/chosen": -0.09474565088748932,
63
+ "rewards/margins": 0.013504189439117908,
64
+ "rewards/rejected": -0.10824984312057495,
65
+ "step": 30
66
+ },
67
+ {
68
+ "epoch": 0.13,
69
+ "learning_rate": 4.989935734988098e-06,
70
+ "logits/chosen": 0.6262896656990051,
71
+ "logits/rejected": 0.5369861125946045,
72
+ "logps/chosen": -1379.107177734375,
73
+ "logps/rejected": -1572.9727783203125,
74
+ "loss": 0.679,
75
+ "rewards/accuracies": 0.5562499761581421,
76
+ "rewards/chosen": -0.25016888976097107,
77
+ "rewards/margins": 0.05141867324709892,
78
+ "rewards/rejected": -0.3015875816345215,
79
+ "step": 40
80
+ },
81
+ {
82
+ "epoch": 0.16,
83
+ "learning_rate": 4.949188496058089e-06,
84
+ "logits/chosen": 0.595242977142334,
85
+ "logits/rejected": 0.6554633378982544,
86
+ "logps/chosen": -1302.7606201171875,
87
+ "logps/rejected": -1508.60107421875,
88
+ "loss": 0.6637,
89
+ "rewards/accuracies": 0.48750001192092896,
90
+ "rewards/chosen": -0.35188037157058716,
91
+ "rewards/margins": 0.07706869393587112,
92
+ "rewards/rejected": -0.4289490282535553,
93
+ "step": 50
94
+ },
95
+ {
96
+ "epoch": 0.19,
97
+ "learning_rate": 4.8776412907378845e-06,
98
+ "logits/chosen": 0.6328147053718567,
99
+ "logits/rejected": 0.695043683052063,
100
+ "logps/chosen": -1562.052978515625,
101
+ "logps/rejected": -1646.878662109375,
102
+ "loss": 0.6789,
103
+ "rewards/accuracies": 0.4749999940395355,
104
+ "rewards/chosen": -0.4670296609401703,
105
+ "rewards/margins": 0.025544878095388412,
106
+ "rewards/rejected": -0.492574542760849,
107
+ "step": 60
108
+ },
109
+ {
110
+ "epoch": 0.22,
111
+ "learning_rate": 4.7761938666470405e-06,
112
+ "logits/chosen": 0.5762341618537903,
113
+ "logits/rejected": 0.6952670812606812,
114
+ "logps/chosen": -1321.309814453125,
115
+ "logps/rejected": -1596.348876953125,
116
+ "loss": 0.6667,
117
+ "rewards/accuracies": 0.606249988079071,
118
+ "rewards/chosen": -0.27966535091400146,
119
+ "rewards/margins": 0.1273583322763443,
120
+ "rewards/rejected": -0.40702366828918457,
121
+ "step": 70
122
+ },
123
+ {
124
+ "epoch": 0.26,
125
+ "learning_rate": 4.646121984004666e-06,
126
+ "logits/chosen": 0.5692261457443237,
127
+ "logits/rejected": 0.8441296815872192,
128
+ "logps/chosen": -1434.124755859375,
129
+ "logps/rejected": -1714.9644775390625,
130
+ "loss": 0.6653,
131
+ "rewards/accuracies": 0.512499988079071,
132
+ "rewards/chosen": -0.38832995295524597,
133
+ "rewards/margins": 0.12836144864559174,
134
+ "rewards/rejected": -0.5166913866996765,
135
+ "step": 80
136
+ },
137
+ {
138
+ "epoch": 0.29,
139
+ "learning_rate": 4.4890613722044526e-06,
140
+ "logits/chosen": 0.5933000445365906,
141
+ "logits/rejected": 0.7363389730453491,
142
+ "logps/chosen": -1363.330810546875,
143
+ "logps/rejected": -1636.5830078125,
144
+ "loss": 0.6614,
145
+ "rewards/accuracies": 0.581250011920929,
146
+ "rewards/chosen": -0.36261868476867676,
147
+ "rewards/margins": 0.11317511647939682,
148
+ "rewards/rejected": -0.47579383850097656,
149
+ "step": 90
150
+ },
151
+ {
152
+ "epoch": 0.32,
153
+ "learning_rate": 4.3069871595684795e-06,
154
+ "logits/chosen": 0.6000683903694153,
155
+ "logits/rejected": 0.7250877618789673,
156
+ "logps/chosen": -1501.1116943359375,
157
+ "logps/rejected": -1688.3638916015625,
158
+ "loss": 0.6729,
159
+ "rewards/accuracies": 0.512499988079071,
160
+ "rewards/chosen": -0.42007994651794434,
161
+ "rewards/margins": 0.07729745656251907,
162
+ "rewards/rejected": -0.49737733602523804,
163
+ "step": 100
164
+ },
165
+ {
166
+ "epoch": 0.35,
167
+ "learning_rate": 4.102189034962561e-06,
168
+ "logits/chosen": 0.6664993166923523,
169
+ "logits/rejected": 0.8522599935531616,
170
+ "logps/chosen": -1442.220703125,
171
+ "logps/rejected": -1732.263671875,
172
+ "loss": 0.6579,
173
+ "rewards/accuracies": 0.581250011920929,
174
+ "rewards/chosen": -0.3821481168270111,
175
+ "rewards/margins": 0.13758106529712677,
176
+ "rewards/rejected": -0.5197292566299438,
177
+ "step": 110
178
+ },
179
+ {
180
+ "epoch": 0.38,
181
+ "learning_rate": 3.8772424536302565e-06,
182
+ "logits/chosen": 0.7149074077606201,
183
+ "logits/rejected": 0.7848154902458191,
184
+ "logps/chosen": -1421.830322265625,
185
+ "logps/rejected": -1694.971923828125,
186
+ "loss": 0.6525,
187
+ "rewards/accuracies": 0.543749988079071,
188
+ "rewards/chosen": -0.38273271918296814,
189
+ "rewards/margins": 0.12498722970485687,
190
+ "rewards/rejected": -0.5077199935913086,
191
+ "step": 120
192
+ },
193
+ {
194
+ "epoch": 0.42,
195
+ "learning_rate": 3.634976249348867e-06,
196
+ "logits/chosen": 0.709136962890625,
197
+ "logits/rejected": 0.7118825912475586,
198
+ "logps/chosen": -1674.842041015625,
199
+ "logps/rejected": -1917.5904541015625,
200
+ "loss": 0.6612,
201
+ "rewards/accuracies": 0.550000011920929,
202
+ "rewards/chosen": -0.5186460614204407,
203
+ "rewards/margins": 0.10890078544616699,
204
+ "rewards/rejected": -0.6275469064712524,
205
+ "step": 130
206
+ },
207
+ {
208
+ "epoch": 0.45,
209
+ "learning_rate": 3.3784370602033572e-06,
210
+ "logits/chosen": 0.6097074747085571,
211
+ "logits/rejected": 0.8623794317245483,
212
+ "logps/chosen": -1559.768798828125,
213
+ "logps/rejected": -1758.3902587890625,
214
+ "loss": 0.6673,
215
+ "rewards/accuracies": 0.5874999761581421,
216
+ "rewards/chosen": -0.446970134973526,
217
+ "rewards/margins": 0.08759806305170059,
218
+ "rewards/rejected": -0.534568190574646,
219
+ "step": 140
220
+ },
221
+ {
222
+ "epoch": 0.48,
223
+ "learning_rate": 3.1108510153447352e-06,
224
+ "logits/chosen": 0.567806601524353,
225
+ "logits/rejected": 0.8320453763008118,
226
+ "logps/chosen": -1542.0999755859375,
227
+ "logps/rejected": -1677.0787353515625,
228
+ "loss": 0.6708,
229
+ "rewards/accuracies": 0.46875,
230
+ "rewards/chosen": -0.41464248299598694,
231
+ "rewards/margins": 0.05952323600649834,
232
+ "rewards/rejected": -0.47416573762893677,
233
+ "step": 150
234
+ },
235
+ {
236
+ "epoch": 0.51,
237
+ "learning_rate": 2.835583164544139e-06,
238
+ "logits/chosen": 0.7906177639961243,
239
+ "logits/rejected": 0.7841562628746033,
240
+ "logps/chosen": -1429.5552978515625,
241
+ "logps/rejected": -1619.2171630859375,
242
+ "loss": 0.6578,
243
+ "rewards/accuracies": 0.543749988079071,
244
+ "rewards/chosen": -0.3786751627922058,
245
+ "rewards/margins": 0.09149602800607681,
246
+ "rewards/rejected": -0.47017115354537964,
247
+ "step": 160
248
+ },
249
+ {
250
+ "epoch": 0.54,
251
+ "learning_rate": 2.556095160739513e-06,
252
+ "logits/chosen": 0.7081605195999146,
253
+ "logits/rejected": 0.6871576905250549,
254
+ "logps/chosen": -1445.623779296875,
255
+ "logps/rejected": -1650.127197265625,
256
+ "loss": 0.6577,
257
+ "rewards/accuracies": 0.5687500238418579,
258
+ "rewards/chosen": -0.41454702615737915,
259
+ "rewards/margins": 0.10776009410619736,
260
+ "rewards/rejected": -0.5223071575164795,
261
+ "step": 170
262
+ },
263
+ {
264
+ "epoch": 0.58,
265
+ "learning_rate": 2.2759017277414165e-06,
266
+ "logits/chosen": 0.7904581427574158,
267
+ "logits/rejected": 0.7038453817367554,
268
+ "logps/chosen": -1671.363525390625,
269
+ "logps/rejected": -1868.4300537109375,
270
+ "loss": 0.6579,
271
+ "rewards/accuracies": 0.5874999761581421,
272
+ "rewards/chosen": -0.5421901941299438,
273
+ "rewards/margins": 0.10026909410953522,
274
+ "rewards/rejected": -0.6424592733383179,
275
+ "step": 180
276
+ },
277
+ {
278
+ "epoch": 0.61,
279
+ "learning_rate": 1.9985264605418185e-06,
280
+ "logits/chosen": 0.6540366411209106,
281
+ "logits/rejected": 0.7285584807395935,
282
+ "logps/chosen": -1518.58935546875,
283
+ "logps/rejected": -1726.029052734375,
284
+ "loss": 0.652,
285
+ "rewards/accuracies": 0.512499988079071,
286
+ "rewards/chosen": -0.4512515962123871,
287
+ "rewards/margins": 0.09267839789390564,
288
+ "rewards/rejected": -0.5439299941062927,
289
+ "step": 190
290
+ },
291
+ {
292
+ "epoch": 0.64,
293
+ "learning_rate": 1.7274575140626318e-06,
294
+ "logits/chosen": 0.879226565361023,
295
+ "logits/rejected": 0.8555147051811218,
296
+ "logps/chosen": -1627.2894287109375,
297
+ "logps/rejected": -1928.3841552734375,
298
+ "loss": 0.6602,
299
+ "rewards/accuracies": 0.5375000238418579,
300
+ "rewards/chosen": -0.5301727056503296,
301
+ "rewards/margins": 0.161948561668396,
302
+ "rewards/rejected": -0.6921212673187256,
303
+ "step": 200
304
+ },
305
+ {
306
+ "epoch": 0.67,
307
+ "learning_rate": 1.466103737583699e-06,
308
+ "logits/chosen": 0.6861797571182251,
309
+ "logits/rejected": 0.9016023874282837,
310
+ "logps/chosen": -1539.524658203125,
311
+ "logps/rejected": -1874.627685546875,
312
+ "loss": 0.659,
313
+ "rewards/accuracies": 0.6187499761581421,
314
+ "rewards/chosen": -0.4866175651550293,
315
+ "rewards/margins": 0.17361479997634888,
316
+ "rewards/rejected": -0.6602323651313782,
317
+ "step": 210
318
+ },
319
+ {
320
+ "epoch": 0.7,
321
+ "learning_rate": 1.217751806485235e-06,
322
+ "logits/chosen": 0.7002454996109009,
323
+ "logits/rejected": 0.8032233119010925,
324
+ "logps/chosen": -1692.8636474609375,
325
+ "logps/rejected": -1974.6669921875,
326
+ "loss": 0.6663,
327
+ "rewards/accuracies": 0.5562499761581421,
328
+ "rewards/chosen": -0.5267156958580017,
329
+ "rewards/margins": 0.14205826818943024,
330
+ "rewards/rejected": -0.6687740087509155,
331
+ "step": 220
332
+ },
333
+ {
334
+ "epoch": 0.74,
335
+ "learning_rate": 9.855248903979505e-07,
336
+ "logits/chosen": 0.7965744733810425,
337
+ "logits/rejected": 0.7885487079620361,
338
+ "logps/chosen": -1604.53466796875,
339
+ "logps/rejected": -1794.8245849609375,
340
+ "loss": 0.6672,
341
+ "rewards/accuracies": 0.5625,
342
+ "rewards/chosen": -0.48480549454689026,
343
+ "rewards/margins": 0.0899248868227005,
344
+ "rewards/rejected": -0.574730396270752,
345
+ "step": 230
346
+ },
347
+ {
348
+ "epoch": 0.77,
349
+ "learning_rate": 7.723433775328385e-07,
350
+ "logits/chosen": 0.7397829294204712,
351
+ "logits/rejected": 0.8248909711837769,
352
+ "logps/chosen": -1530.4615478515625,
353
+ "logps/rejected": -1826.6988525390625,
354
+ "loss": 0.654,
355
+ "rewards/accuracies": 0.6187499761581421,
356
+ "rewards/chosen": -0.41139334440231323,
357
+ "rewards/margins": 0.1389993578195572,
358
+ "rewards/rejected": -0.550392746925354,
359
+ "step": 240
360
+ },
361
+ {
362
+ "epoch": 0.8,
363
+ "learning_rate": 5.808881491049723e-07,
364
+ "logits/chosen": 0.6129003763198853,
365
+ "logits/rejected": 0.8366864919662476,
366
+ "logps/chosen": -1451.37890625,
367
+ "logps/rejected": -1729.635986328125,
368
+ "loss": 0.6728,
369
+ "rewards/accuracies": 0.5687500238418579,
370
+ "rewards/chosen": -0.40951260924339294,
371
+ "rewards/margins": 0.1274668127298355,
372
+ "rewards/rejected": -0.5369793772697449,
373
+ "step": 250
374
+ },
375
+ {
376
+ "epoch": 0.83,
377
+ "learning_rate": 4.1356686569674344e-07,
378
+ "logits/chosen": 0.7178203463554382,
379
+ "logits/rejected": 0.8371836543083191,
380
+ "logps/chosen": -1524.3497314453125,
381
+ "logps/rejected": -1778.346923828125,
382
+ "loss": 0.6495,
383
+ "rewards/accuracies": 0.5687500238418579,
384
+ "rewards/chosen": -0.4218766689300537,
385
+ "rewards/margins": 0.12465916574001312,
386
+ "rewards/rejected": -0.546535849571228,
387
+ "step": 260
388
+ },
389
+ {
390
+ "epoch": 0.86,
391
+ "learning_rate": 2.7248368952908055e-07,
392
+ "logits/chosen": 0.6586390733718872,
393
+ "logits/rejected": 0.8500372767448425,
394
+ "logps/chosen": -1523.057373046875,
395
+ "logps/rejected": -1753.3544921875,
396
+ "loss": 0.6641,
397
+ "rewards/accuracies": 0.574999988079071,
398
+ "rewards/chosen": -0.43091854453086853,
399
+ "rewards/margins": 0.11140650510787964,
400
+ "rewards/rejected": -0.5423250198364258,
401
+ "step": 270
402
+ },
403
+ {
404
+ "epoch": 0.9,
405
+ "learning_rate": 1.59412823400657e-07,
406
+ "logits/chosen": 0.7076243162155151,
407
+ "logits/rejected": 0.7846710681915283,
408
+ "logps/chosen": -1433.0615234375,
409
+ "logps/rejected": -1673.491455078125,
410
+ "loss": 0.6713,
411
+ "rewards/accuracies": 0.4937500059604645,
412
+ "rewards/chosen": -0.39409512281417847,
413
+ "rewards/margins": 0.11671394109725952,
414
+ "rewards/rejected": -0.510809063911438,
415
+ "step": 280
416
+ },
417
+ {
418
+ "epoch": 0.93,
419
+ "learning_rate": 7.577619905828281e-08,
420
+ "logits/chosen": 0.6640155911445618,
421
+ "logits/rejected": 0.7537108659744263,
422
+ "logps/chosen": -1413.724853515625,
423
+ "logps/rejected": -1650.0406494140625,
424
+ "loss": 0.6651,
425
+ "rewards/accuracies": 0.543749988079071,
426
+ "rewards/chosen": -0.38546374440193176,
427
+ "rewards/margins": 0.11493394523859024,
428
+ "rewards/rejected": -0.5003976821899414,
429
+ "step": 290
430
+ },
431
+ {
432
+ "epoch": 0.96,
433
+ "learning_rate": 2.262559558016325e-08,
434
+ "logits/chosen": 0.635712742805481,
435
+ "logits/rejected": 0.8228713274002075,
436
+ "logps/chosen": -1463.5419921875,
437
+ "logps/rejected": -1637.050048828125,
438
+ "loss": 0.6578,
439
+ "rewards/accuracies": 0.59375,
440
+ "rewards/chosen": -0.41111892461776733,
441
+ "rewards/margins": 0.08340780436992645,
442
+ "rewards/rejected": -0.4945267140865326,
443
+ "step": 300
444
+ },
445
+ {
446
+ "epoch": 0.99,
447
+ "learning_rate": 6.294126437336734e-10,
448
+ "logits/chosen": 0.6655277013778687,
449
+ "logits/rejected": 0.759280800819397,
450
+ "logps/chosen": -1619.28857421875,
451
+ "logps/rejected": -1727.4703369140625,
452
+ "loss": 0.6663,
453
+ "rewards/accuracies": 0.512499988079071,
454
+ "rewards/chosen": -0.4628829061985016,
455
+ "rewards/margins": 0.05492577701807022,
456
+ "rewards/rejected": -0.517808735370636,
457
+ "step": 310
458
+ },
459
+ {
460
+ "epoch": 1.0,
461
+ "step": 312,
462
+ "total_flos": 0.0,
463
+ "train_loss": 0.666122229435505,
464
+ "train_runtime": 4198.8516,
465
+ "train_samples_per_second": 4.763,
466
+ "train_steps_per_second": 0.074
467
+ }
468
+ ],
469
+ "logging_steps": 10,
470
+ "max_steps": 312,
471
+ "num_input_tokens_seen": 0,
472
+ "num_train_epochs": 1,
473
+ "save_steps": 100,
474
+ "total_flos": 0.0,
475
+ "train_batch_size": 4,
476
+ "trial_name": null,
477
+ "trial_params": null
478
+ }