tanliboy commited on
Commit
6693dd8
1 Parent(s): 8f8c885

Model save

Browse files
README.md CHANGED
@@ -1,4 +1,6 @@
1
  ---
 
 
2
  tags:
3
  - trl
4
  - dpo
@@ -13,17 +15,17 @@ should probably proofread and complete it, then remove this comment. -->
13
 
14
  # zephyr-7b-gemma-dpo
15
 
16
- This model was trained from scratch on an unknown dataset.
17
  It achieves the following results on the evaluation set:
18
- - Loss: 0.4812
19
- - Rewards/chosen: -0.1423
20
- - Rewards/rejected: -1.2147
21
- - Rewards/accuracies: 0.7083
22
- - Rewards/margins: 1.0724
23
- - Logps/rejected: -719.2219
24
- - Logps/chosen: -699.1321
25
- - Logits/rejected: 152.4098
26
- - Logits/chosen: 152.7205
27
 
28
  ## Model description
29
 
@@ -60,7 +62,7 @@ The following hyperparameters were used during training:
60
 
61
  | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
62
  |:-------------:|:------:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
63
- | 0.1441 | 1.8957 | 100 | 0.4812 | -0.1423 | -1.2147 | 0.7083 | 1.0724 | -719.2219 | -699.1321 | 152.4098 | 152.7205 |
64
 
65
 
66
  ### Framework versions
 
1
  ---
2
+ license: gemma
3
+ base_model: tanliboy/zephyr-7b-gemma-sft
4
  tags:
5
  - trl
6
  - dpo
 
15
 
16
  # zephyr-7b-gemma-dpo
17
 
18
+ This model is a fine-tuned version of [tanliboy/zephyr-7b-gemma-sft](https://huggingface.co/tanliboy/zephyr-7b-gemma-sft) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
+ - Loss: 0.4722
21
+ - Rewards/chosen: -0.0658
22
+ - Rewards/rejected: -1.2673
23
+ - Rewards/accuracies: 0.7396
24
+ - Rewards/margins: 1.2015
25
+ - Logps/rejected: -720.2745
26
+ - Logps/chosen: -697.6023
27
+ - Logits/rejected: 152.9660
28
+ - Logits/chosen: 153.1356
29
 
30
  ## Model description
31
 
 
62
 
63
  | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
64
  |:-------------:|:------:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
65
+ | 0.1424 | 1.8957 | 100 | 0.4722 | -0.0658 | -1.2673 | 0.7396 | 1.2015 | -720.2745 | -697.6023 | 152.9660 | 153.1356 |
66
 
67
 
68
  ### Framework versions
all_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 1.971563981042654,
3
  "total_flos": 0.0,
4
- "train_loss": 0.38755885110451627,
5
- "train_runtime": 2329.0461,
6
  "train_samples": 6750,
7
- "train_samples_per_second": 5.796,
8
  "train_steps_per_second": 0.045
9
  }
 
1
  {
2
  "epoch": 1.971563981042654,
3
  "total_flos": 0.0,
4
+ "train_loss": 0.3875045489806395,
5
+ "train_runtime": 2331.3598,
6
  "train_samples": 6750,
7
+ "train_samples_per_second": 5.791,
8
  "train_steps_per_second": 0.045
9
  }
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "/home/litan/alignment-handbook/models/zephyr-7b-gemma-sft",
3
  "architectures": [
4
  "GemmaForCausalLM"
5
  ],
 
1
  {
2
+ "_name_or_path": "tanliboy/zephyr-7b-gemma-sft",
3
  "architectures": [
4
  "GemmaForCausalLM"
5
  ],
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1e294f891ceb65882f57743942bc3dc24c4b2b502bbb076096ec25e92661e741
3
  size 4995496656
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:563e4e67cd479b49935cacef2e942c6012884ff407662bf19bc5df331e539025
3
  size 4995496656
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4204106f6e2d2ff00e095e8568ce50ddc74989dbc5991ea07cb8629ad54e031b
3
  size 4982953168
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60905c763b2b5422bd986dfd9cd9ca91c5cf5cf91f20cec4c90a53de04de867f
3
  size 4982953168
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c318a4f435751720bc413d8ee12e30b1bd2e08cc85b889991c2328190361a7da
3
  size 4982953200
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c9b965885821657872f7222d640a45d5812b610e3df4fd23dc6d0b95f41e688c
3
  size 4982953200
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c840fbdf9fdb4f8b1d76fcba6b20d0bd75ad4f7bbc5eac9198663dbf2a6c958c
3
  size 2113988336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb7604d8de752d9cab47da8539302cac4f95c5754577653ecc748f57930bea65
3
  size 2113988336
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 1.971563981042654,
3
  "total_flos": 0.0,
4
- "train_loss": 0.38755885110451627,
5
- "train_runtime": 2329.0461,
6
  "train_samples": 6750,
7
- "train_samples_per_second": 5.796,
8
  "train_steps_per_second": 0.045
9
  }
 
1
  {
2
  "epoch": 1.971563981042654,
3
  "total_flos": 0.0,
4
+ "train_loss": 0.3875045489806395,
5
+ "train_runtime": 2331.3598,
6
  "train_samples": 6750,
7
+ "train_samples_per_second": 5.791,
8
  "train_steps_per_second": 0.045
9
  }
trainer_state.json CHANGED
@@ -10,7 +10,7 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.018957345971563982,
13
- "grad_norm": 99.03845755243859,
14
  "learning_rate": 4.545454545454545e-08,
15
  "logits/chosen": 163.72256469726562,
16
  "logits/rejected": 157.14466857910156,
@@ -25,177 +25,177 @@
25
  },
26
  {
27
  "epoch": 0.1895734597156398,
28
- "grad_norm": 109.21636585235684,
29
  "learning_rate": 4.545454545454545e-07,
30
- "logits/chosen": 171.64620971679688,
31
- "logits/rejected": 172.84539794921875,
32
- "logps/chosen": -742.3294067382812,
33
- "logps/rejected": -781.19384765625,
34
- "loss": 0.7155,
35
- "rewards/accuracies": 0.4583333432674408,
36
- "rewards/chosen": 0.017745885998010635,
37
- "rewards/margins": 0.0211933646351099,
38
- "rewards/rejected": -0.0034474804997444153,
39
  "step": 10
40
  },
41
  {
42
  "epoch": 0.3791469194312796,
43
- "grad_norm": 81.70841250586862,
44
  "learning_rate": 4.885348141000122e-07,
45
- "logits/chosen": 162.9903106689453,
46
- "logits/rejected": 165.7174835205078,
47
- "logps/chosen": -710.62255859375,
48
- "logps/rejected": -750.2293701171875,
49
- "loss": 0.6502,
50
- "rewards/accuracies": 0.5874999761581421,
51
- "rewards/chosen": 0.47601765394210815,
52
- "rewards/margins": 0.17595532536506653,
53
- "rewards/rejected": 0.300062358379364,
54
  "step": 20
55
  },
56
  {
57
  "epoch": 0.5687203791469194,
58
- "grad_norm": 70.21647250732705,
59
  "learning_rate": 4.5025027361734613e-07,
60
- "logits/chosen": 183.96597290039062,
61
- "logits/rejected": 176.80657958984375,
62
- "logps/chosen": -712.5685424804688,
63
- "logps/rejected": -738.5382080078125,
64
- "loss": 0.582,
65
- "rewards/accuracies": 0.668749988079071,
66
- "rewards/chosen": 0.9081576466560364,
67
- "rewards/margins": 0.5097990036010742,
68
- "rewards/rejected": 0.3983585834503174,
69
  "step": 30
70
  },
71
  {
72
  "epoch": 0.7582938388625592,
73
- "grad_norm": 70.35328347282919,
74
  "learning_rate": 3.893311157806091e-07,
75
- "logits/chosen": 167.0669708251953,
76
- "logits/rejected": 155.26910400390625,
77
- "logps/chosen": -696.257080078125,
78
- "logps/rejected": -698.9947509765625,
79
- "loss": 0.571,
80
- "rewards/accuracies": 0.6937500238418579,
81
- "rewards/chosen": 0.4749051034450531,
82
- "rewards/margins": 0.8314126133918762,
83
- "rewards/rejected": -0.35650748014450073,
84
  "step": 40
85
  },
86
  {
87
  "epoch": 0.9478672985781991,
88
- "grad_norm": 89.79206485749032,
89
  "learning_rate": 3.126631330646801e-07,
90
- "logits/chosen": 181.7749481201172,
91
- "logits/rejected": 182.71652221679688,
92
- "logps/chosen": -772.6339721679688,
93
- "logps/rejected": -824.01171875,
94
- "loss": 0.4953,
95
- "rewards/accuracies": 0.7437499761581421,
96
- "rewards/chosen": 0.038030486553907394,
97
- "rewards/margins": 0.9203092455863953,
98
- "rewards/rejected": -0.8822787404060364,
99
  "step": 50
100
  },
101
  {
102
  "epoch": 1.1374407582938388,
103
- "grad_norm": 29.72925097875123,
104
  "learning_rate": 2.2891223348923882e-07,
105
- "logits/chosen": 174.12176513671875,
106
- "logits/rejected": 179.2202606201172,
107
- "logps/chosen": -724.5867309570312,
108
- "logps/rejected": -795.8361206054688,
109
- "loss": 0.2765,
110
- "rewards/accuracies": 0.918749988079071,
111
- "rewards/chosen": 0.7417961955070496,
112
- "rewards/margins": 2.4216690063476562,
113
- "rewards/rejected": -1.6798728704452515,
114
  "step": 60
115
  },
116
  {
117
  "epoch": 1.3270142180094786,
118
- "grad_norm": 29.451582781492476,
119
  "learning_rate": 1.4754491880085317e-07,
120
- "logits/chosen": 169.59214782714844,
121
- "logits/rejected": 167.35098266601562,
122
- "logps/chosen": -683.1103515625,
123
- "logps/rejected": -780.7704467773438,
124
- "loss": 0.177,
125
- "rewards/accuracies": 0.949999988079071,
126
- "rewards/chosen": 0.8972679972648621,
127
- "rewards/margins": 2.7340922355651855,
128
- "rewards/rejected": -1.836824655532837,
129
  "step": 70
130
  },
131
  {
132
  "epoch": 1.5165876777251186,
133
- "grad_norm": 21.98036303376898,
134
  "learning_rate": 7.775827023107834e-08,
135
- "logits/chosen": 159.87899780273438,
136
- "logits/rejected": 172.0943603515625,
137
- "logps/chosen": -684.193359375,
138
- "logps/rejected": -807.0607299804688,
139
- "loss": 0.1583,
140
- "rewards/accuracies": 0.9624999761581421,
141
- "rewards/chosen": 0.6033011674880981,
142
- "rewards/margins": 3.1811580657958984,
143
- "rewards/rejected": -2.5778567790985107,
144
  "step": 80
145
  },
146
  {
147
  "epoch": 1.7061611374407581,
148
- "grad_norm": 30.02402881967663,
149
  "learning_rate": 2.7440387297912122e-08,
150
- "logits/chosen": 158.72433471679688,
151
- "logits/rejected": 170.1360626220703,
152
- "logps/chosen": -715.9267578125,
153
- "logps/rejected": -813.0823974609375,
154
- "loss": 0.1439,
155
- "rewards/accuracies": 0.981249988079071,
156
- "rewards/chosen": 0.6878162622451782,
157
- "rewards/margins": 3.3359310626983643,
158
- "rewards/rejected": -2.6481146812438965,
159
  "step": 90
160
  },
161
  {
162
  "epoch": 1.8957345971563981,
163
- "grad_norm": 24.732786108992524,
164
  "learning_rate": 2.27878296044029e-09,
165
- "logits/chosen": 162.75802612304688,
166
- "logits/rejected": 165.20050048828125,
167
- "logps/chosen": -721.2442016601562,
168
- "logps/rejected": -793.7107543945312,
169
- "loss": 0.1441,
170
- "rewards/accuracies": 0.9437500238418579,
171
- "rewards/chosen": 0.6896085143089294,
172
- "rewards/margins": 2.8235270977020264,
173
- "rewards/rejected": -2.1339187622070312,
174
  "step": 100
175
  },
176
  {
177
  "epoch": 1.8957345971563981,
178
- "eval_logits/chosen": 152.7205047607422,
179
- "eval_logits/rejected": 152.40980529785156,
180
- "eval_logps/chosen": -699.132080078125,
181
- "eval_logps/rejected": -719.221923828125,
182
- "eval_loss": 0.48118945956230164,
183
- "eval_rewards/accuracies": 0.7083333134651184,
184
- "eval_rewards/chosen": -0.1422799676656723,
185
- "eval_rewards/margins": 1.0724023580551147,
186
- "eval_rewards/rejected": -1.2146824598312378,
187
- "eval_runtime": 105.6518,
188
- "eval_samples_per_second": 7.099,
189
- "eval_steps_per_second": 0.227,
190
  "step": 100
191
  },
192
  {
193
  "epoch": 1.971563981042654,
194
  "step": 104,
195
  "total_flos": 0.0,
196
- "train_loss": 0.38755885110451627,
197
- "train_runtime": 2329.0461,
198
- "train_samples_per_second": 5.796,
199
  "train_steps_per_second": 0.045
200
  }
201
  ],
 
10
  "log_history": [
11
  {
12
  "epoch": 0.018957345971563982,
13
+ "grad_norm": 99.03995946284127,
14
  "learning_rate": 4.545454545454545e-08,
15
  "logits/chosen": 163.72256469726562,
16
  "logits/rejected": 157.14466857910156,
 
25
  },
26
  {
27
  "epoch": 0.1895734597156398,
28
+ "grad_norm": 111.11388003021844,
29
  "learning_rate": 4.545454545454545e-07,
30
+ "logits/chosen": 171.66250610351562,
31
+ "logits/rejected": 172.8583221435547,
32
+ "logps/chosen": -742.216064453125,
33
+ "logps/rejected": -781.2522583007812,
34
+ "loss": 0.7117,
35
+ "rewards/accuracies": 0.4444444477558136,
36
+ "rewards/chosen": 0.023412303999066353,
37
+ "rewards/margins": 0.02978678233921528,
38
+ "rewards/rejected": -0.006374475546181202,
39
  "step": 10
40
  },
41
  {
42
  "epoch": 0.3791469194312796,
43
+ "grad_norm": 82.52463810141795,
44
  "learning_rate": 4.885348141000122e-07,
45
+ "logits/chosen": 163.12501525878906,
46
+ "logits/rejected": 165.84164428710938,
47
+ "logps/chosen": -709.082275390625,
48
+ "logps/rejected": -749.2286376953125,
49
+ "loss": 0.6534,
50
+ "rewards/accuracies": 0.606249988079071,
51
+ "rewards/chosen": 0.553031325340271,
52
+ "rewards/margins": 0.20293028652668,
53
+ "rewards/rejected": 0.35010096430778503,
54
  "step": 20
55
  },
56
  {
57
  "epoch": 0.5687203791469194,
58
+ "grad_norm": 69.67853625499806,
59
  "learning_rate": 4.5025027361734613e-07,
60
+ "logits/chosen": 184.1649932861328,
61
+ "logits/rejected": 176.99354553222656,
62
+ "logps/chosen": -710.6904907226562,
63
+ "logps/rejected": -736.7250366210938,
64
+ "loss": 0.5795,
65
+ "rewards/accuracies": 0.6937500238418579,
66
+ "rewards/chosen": 1.0020567178726196,
67
+ "rewards/margins": 0.513039767742157,
68
+ "rewards/rejected": 0.4890168309211731,
69
  "step": 30
70
  },
71
  {
72
  "epoch": 0.7582938388625592,
73
+ "grad_norm": 72.75906612350751,
74
  "learning_rate": 3.893311157806091e-07,
75
+ "logits/chosen": 167.30398559570312,
76
+ "logits/rejected": 155.48980712890625,
77
+ "logps/chosen": -697.3994140625,
78
+ "logps/rejected": -700.686767578125,
79
+ "loss": 0.5672,
80
+ "rewards/accuracies": 0.737500011920929,
81
+ "rewards/chosen": 0.41778701543807983,
82
+ "rewards/margins": 0.858893871307373,
83
+ "rewards/rejected": -0.44110679626464844,
84
  "step": 40
85
  },
86
  {
87
  "epoch": 0.9478672985781991,
88
+ "grad_norm": 91.77276628667218,
89
  "learning_rate": 3.126631330646801e-07,
90
+ "logits/chosen": 182.02633666992188,
91
+ "logits/rejected": 182.9945068359375,
92
+ "logps/chosen": -771.2996826171875,
93
+ "logps/rejected": -821.66943359375,
94
+ "loss": 0.4967,
95
+ "rewards/accuracies": 0.706250011920929,
96
+ "rewards/chosen": 0.10474413633346558,
97
+ "rewards/margins": 0.8699092864990234,
98
+ "rewards/rejected": -0.7651651501655579,
99
  "step": 50
100
  },
101
  {
102
  "epoch": 1.1374407582938388,
103
+ "grad_norm": 29.047916941624063,
104
  "learning_rate": 2.2891223348923882e-07,
105
+ "logits/chosen": 174.08924865722656,
106
+ "logits/rejected": 179.21060180664062,
107
+ "logps/chosen": -725.1399536132812,
108
+ "logps/rejected": -794.7996215820312,
109
+ "loss": 0.2763,
110
+ "rewards/accuracies": 0.8999999761581421,
111
+ "rewards/chosen": 0.714134693145752,
112
+ "rewards/margins": 2.342179298400879,
113
+ "rewards/rejected": -1.6280447244644165,
114
  "step": 60
115
  },
116
  {
117
  "epoch": 1.3270142180094786,
118
+ "grad_norm": 28.169902323181958,
119
  "learning_rate": 1.4754491880085317e-07,
120
+ "logits/chosen": 169.7666778564453,
121
+ "logits/rejected": 167.54342651367188,
122
+ "logps/chosen": -683.6437377929688,
123
+ "logps/rejected": -783.1070556640625,
124
+ "loss": 0.1734,
125
+ "rewards/accuracies": 0.9624999761581421,
126
+ "rewards/chosen": 0.8705979585647583,
127
+ "rewards/margins": 2.8242533206939697,
128
+ "rewards/rejected": -1.9536556005477905,
129
  "step": 70
130
  },
131
  {
132
  "epoch": 1.5165876777251186,
133
+ "grad_norm": 24.1551722040215,
134
  "learning_rate": 7.775827023107834e-08,
135
+ "logits/chosen": 160.0974578857422,
136
+ "logits/rejected": 172.2356414794922,
137
+ "logps/chosen": -684.6734619140625,
138
+ "logps/rejected": -806.4854125976562,
139
+ "loss": 0.1605,
140
+ "rewards/accuracies": 0.956250011920929,
141
+ "rewards/chosen": 0.5792978405952454,
142
+ "rewards/margins": 3.1283910274505615,
143
+ "rewards/rejected": -2.549093246459961,
144
  "step": 80
145
  },
146
  {
147
  "epoch": 1.7061611374407581,
148
+ "grad_norm": 25.406031573666652,
149
  "learning_rate": 2.7440387297912122e-08,
150
+ "logits/chosen": 158.98464965820312,
151
+ "logits/rejected": 170.3443145751953,
152
+ "logps/chosen": -717.3196411132812,
153
+ "logps/rejected": -813.6456298828125,
154
+ "loss": 0.1493,
155
+ "rewards/accuracies": 0.96875,
156
+ "rewards/chosen": 0.6181727647781372,
157
+ "rewards/margins": 3.2944443225860596,
158
+ "rewards/rejected": -2.6762712001800537,
159
  "step": 90
160
  },
161
  {
162
  "epoch": 1.8957345971563981,
163
+ "grad_norm": 22.259191659621163,
164
  "learning_rate": 2.27878296044029e-09,
165
+ "logits/chosen": 163.0000762939453,
166
+ "logits/rejected": 165.51370239257812,
167
+ "logps/chosen": -720.561767578125,
168
+ "logps/rejected": -794.7210693359375,
169
+ "loss": 0.1424,
170
+ "rewards/accuracies": 0.9312499761581421,
171
+ "rewards/chosen": 0.7237287759780884,
172
+ "rewards/margins": 2.908165454864502,
173
+ "rewards/rejected": -2.184436559677124,
174
  "step": 100
175
  },
176
  {
177
  "epoch": 1.8957345971563981,
178
+ "eval_logits/chosen": 153.1355743408203,
179
+ "eval_logits/rejected": 152.9660186767578,
180
+ "eval_logps/chosen": -697.602294921875,
181
+ "eval_logps/rejected": -720.2744750976562,
182
+ "eval_loss": 0.47219032049179077,
183
+ "eval_rewards/accuracies": 0.7395833134651184,
184
+ "eval_rewards/chosen": -0.06579157710075378,
185
+ "eval_rewards/margins": 1.2015198469161987,
186
+ "eval_rewards/rejected": -1.267311453819275,
187
+ "eval_runtime": 116.4984,
188
+ "eval_samples_per_second": 6.438,
189
+ "eval_steps_per_second": 0.206,
190
  "step": 100
191
  },
192
  {
193
  "epoch": 1.971563981042654,
194
  "step": 104,
195
  "total_flos": 0.0,
196
+ "train_loss": 0.3875045489806395,
197
+ "train_runtime": 2331.3598,
198
+ "train_samples_per_second": 5.791,
199
  "train_steps_per_second": 0.045
200
  }
201
  ],
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c24086242ff952a27d9b8b7a937bb99318bdbf5629af339f0a6ac342e25fb1ad
3
  size 6264
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62f57065711105b6cf3f7022479eff20f007bd391bba46b9f0c9d6d7fcd8a2ae
3
  size 6264