chansung commited on
Commit
7d12a2e
1 Parent(s): 94339a2

Model save

Browse files
README.md CHANGED
@@ -20,7 +20,7 @@ should probably proofread and complete it, then remove this comment. -->
20
 
21
  This model is a fine-tuned version of [google/gemma-7b](https://huggingface.co/google/gemma-7b) on the generator dataset.
22
  It achieves the following results on the evaluation set:
23
- - Loss: 1.6581
24
 
25
  ## Model description
26
 
@@ -57,7 +57,7 @@ The following hyperparameters were used during training:
57
 
58
  | Training Loss | Epoch | Step | Validation Loss |
59
  |:-------------:|:-----:|:----:|:---------------:|
60
- | 1.6561 | 1.0 | 140 | 1.6581 |
61
 
62
 
63
  ### Framework versions
 
20
 
21
  This model is a fine-tuned version of [google/gemma-7b](https://huggingface.co/google/gemma-7b) on the generator dataset.
22
  It achieves the following results on the evaluation set:
23
+ - Loss: 1.6532
24
 
25
  ## Model description
26
 
 
57
 
58
  | Training Loss | Epoch | Step | Validation Loss |
59
  |:-------------:|:-----:|:----:|:---------------:|
60
+ | 1.6467 | 1.0 | 140 | 1.6532 |
61
 
62
 
63
  ### Framework versions
all_results.json CHANGED
@@ -1,14 +1,9 @@
1
  {
2
  "epoch": 1.0,
3
- "eval_loss": 1.6581236124038696,
4
- "eval_runtime": 3.846,
5
- "eval_samples": 518,
6
- "eval_samples_per_second": 46.282,
7
- "eval_steps_per_second": 0.78,
8
  "total_flos": 4.268849030789857e+17,
9
- "train_loss": 5.955788305827549,
10
- "train_runtime": 1733.7968,
11
  "train_samples": 51241,
12
- "train_samples_per_second": 10.326,
13
- "train_steps_per_second": 0.081
14
  }
 
1
  {
2
  "epoch": 1.0,
 
 
 
 
 
3
  "total_flos": 4.268849030789857e+17,
4
+ "train_loss": 5.926357351030622,
5
+ "train_runtime": 363.5109,
6
  "train_samples": 51241,
7
+ "train_samples_per_second": 49.253,
8
+ "train_steps_per_second": 0.385
9
  }
runs/Nov17_22-39-49_main-lora-gemma7b-closedqa-0-0/events.out.tfevents.1731901785.main-lora-gemma7b-closedqa-0-0.457.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d09fcf6e9494fffe057c363976c86a106756c39545e2ddfc8819d9251fb7eae3
3
- size 11716
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8dd40eb05c9b5e3735b26cf56af1cd7ecbaa13529310ad40b014624d5b17d151
3
+ size 12341
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 1.0,
3
  "total_flos": 4.268849030789857e+17,
4
- "train_loss": 5.955788305827549,
5
- "train_runtime": 1733.7968,
6
  "train_samples": 51241,
7
- "train_samples_per_second": 10.326,
8
- "train_steps_per_second": 0.081
9
  }
 
1
  {
2
  "epoch": 1.0,
3
  "total_flos": 4.268849030789857e+17,
4
+ "train_loss": 5.926357351030622,
5
+ "train_runtime": 363.5109,
6
  "train_samples": 51241,
7
+ "train_samples_per_second": 49.253,
8
+ "train_steps_per_second": 0.385
9
  }
trainer_state.json CHANGED
@@ -10,223 +10,223 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.007142857142857143,
13
- "grad_norm": 106.10701751708984,
14
  "learning_rate": 1.4285714285714285e-05,
15
- "loss": 27.4831,
16
  "step": 1
17
  },
18
  {
19
  "epoch": 0.03571428571428571,
20
- "grad_norm": 46.149723052978516,
21
  "learning_rate": 7.142857142857143e-05,
22
- "loss": 26.2992,
23
  "step": 5
24
  },
25
  {
26
  "epoch": 0.07142857142857142,
27
- "grad_norm": 16.458189010620117,
28
  "learning_rate": 0.00014285714285714287,
29
- "loss": 20.4929,
30
  "step": 10
31
  },
32
  {
33
  "epoch": 0.10714285714285714,
34
- "grad_norm": 9.764805793762207,
35
  "learning_rate": 0.00019996891820008164,
36
- "loss": 16.0513,
37
  "step": 15
38
  },
39
  {
40
  "epoch": 0.14285714285714285,
41
- "grad_norm": 3.3296561241149902,
42
  "learning_rate": 0.00019888308262251285,
43
- "loss": 13.4526,
44
  "step": 20
45
  },
46
  {
47
  "epoch": 0.17857142857142858,
48
- "grad_norm": 3.235478162765503,
49
  "learning_rate": 0.0001962624246950012,
50
- "loss": 12.6172,
51
  "step": 25
52
  },
53
  {
54
  "epoch": 0.21428571428571427,
55
- "grad_norm": 5.432589530944824,
56
  "learning_rate": 0.00019214762118704076,
57
- "loss": 11.9808,
58
  "step": 30
59
  },
60
  {
61
  "epoch": 0.25,
62
- "grad_norm": 10.644349098205566,
63
  "learning_rate": 0.00018660254037844388,
64
- "loss": 11.0065,
65
  "step": 35
66
  },
67
  {
68
  "epoch": 0.2857142857142857,
69
- "grad_norm": 14.282907485961914,
70
  "learning_rate": 0.00017971325072229226,
71
- "loss": 9.3131,
72
  "step": 40
73
  },
74
  {
75
  "epoch": 0.32142857142857145,
76
- "grad_norm": 21.60712432861328,
77
  "learning_rate": 0.00017158668492597186,
78
- "loss": 7.1723,
79
  "step": 45
80
  },
81
  {
82
  "epoch": 0.35714285714285715,
83
- "grad_norm": 15.418853759765625,
84
  "learning_rate": 0.00016234898018587337,
85
- "loss": 4.6599,
86
  "step": 50
87
  },
88
  {
89
  "epoch": 0.39285714285714285,
90
- "grad_norm": 5.335650444030762,
91
  "learning_rate": 0.0001521435203379498,
92
- "loss": 2.855,
93
  "step": 55
94
  },
95
  {
96
  "epoch": 0.42857142857142855,
97
- "grad_norm": 4.913125038146973,
98
  "learning_rate": 0.00014112871031306119,
99
- "loss": 2.3938,
100
  "step": 60
101
  },
102
  {
103
  "epoch": 0.4642857142857143,
104
- "grad_norm": 3.951244592666626,
105
  "learning_rate": 0.00012947551744109043,
106
- "loss": 2.1646,
107
  "step": 65
108
  },
109
  {
110
  "epoch": 0.5,
111
- "grad_norm": 1.6933783292770386,
112
  "learning_rate": 0.00011736481776669306,
113
- "loss": 2.0061,
114
  "step": 70
115
  },
116
  {
117
  "epoch": 0.5357142857142857,
118
- "grad_norm": 0.9344208836555481,
119
  "learning_rate": 0.00010498458856606972,
120
- "loss": 1.9092,
121
  "step": 75
122
  },
123
  {
124
  "epoch": 0.5714285714285714,
125
- "grad_norm": 0.9437263011932373,
126
  "learning_rate": 9.252699064135758e-05,
127
- "loss": 1.8086,
128
  "step": 80
129
  },
130
  {
131
  "epoch": 0.6071428571428571,
132
- "grad_norm": 1.5205684900283813,
133
  "learning_rate": 8.018538568006027e-05,
134
- "loss": 1.7838,
135
  "step": 85
136
  },
137
  {
138
  "epoch": 0.6428571428571429,
139
- "grad_norm": 0.8009554743766785,
140
  "learning_rate": 6.815133497483157e-05,
141
- "loss": 1.7433,
142
  "step": 90
143
  },
144
  {
145
  "epoch": 0.6785714285714286,
146
- "grad_norm": 0.9393540620803833,
147
  "learning_rate": 5.6611626088244194e-05,
148
- "loss": 1.736,
149
  "step": 95
150
  },
151
  {
152
  "epoch": 0.7142857142857143,
153
- "grad_norm": 1.6479978561401367,
154
  "learning_rate": 4.574537361342407e-05,
155
- "loss": 1.6976,
156
  "step": 100
157
  },
158
  {
159
  "epoch": 0.75,
160
- "grad_norm": 0.7829992771148682,
161
  "learning_rate": 3.5721239031346066e-05,
162
- "loss": 1.6793,
163
  "step": 105
164
  },
165
  {
166
  "epoch": 0.7857142857142857,
167
- "grad_norm": 0.7236832976341248,
168
  "learning_rate": 2.669481281701739e-05,
169
- "loss": 1.7,
170
  "step": 110
171
  },
172
  {
173
  "epoch": 0.8214285714285714,
174
- "grad_norm": 0.7881686687469482,
175
  "learning_rate": 1.880619942841435e-05,
176
- "loss": 1.6778,
177
  "step": 115
178
  },
179
  {
180
  "epoch": 0.8571428571428571,
181
- "grad_norm": 1.7755261659622192,
182
  "learning_rate": 1.2177842662977135e-05,
183
- "loss": 1.6757,
184
  "step": 120
185
  },
186
  {
187
  "epoch": 0.8928571428571429,
188
- "grad_norm": 1.0512515306472778,
189
  "learning_rate": 6.9126251355795864e-06,
190
- "loss": 1.6766,
191
  "step": 125
192
  },
193
  {
194
  "epoch": 0.9285714285714286,
195
- "grad_norm": 0.7070327997207642,
196
  "learning_rate": 3.092271377092215e-06,
197
- "loss": 1.67,
198
  "step": 130
199
  },
200
  {
201
  "epoch": 0.9642857142857143,
202
- "grad_norm": 0.7091158628463745,
203
  "learning_rate": 7.760793399827937e-07,
204
- "loss": 1.6459,
205
  "step": 135
206
  },
207
  {
208
  "epoch": 1.0,
209
- "grad_norm": 0.7374704480171204,
210
  "learning_rate": 0.0,
211
- "loss": 1.6561,
212
  "step": 140
213
  },
214
  {
215
  "epoch": 1.0,
216
- "eval_loss": 1.6581236124038696,
217
- "eval_runtime": 3.8242,
218
- "eval_samples_per_second": 46.545,
219
- "eval_steps_per_second": 0.784,
220
  "step": 140
221
  },
222
  {
223
  "epoch": 1.0,
224
  "step": 140,
225
  "total_flos": 4.268849030789857e+17,
226
- "train_loss": 5.955788305827549,
227
- "train_runtime": 1733.7968,
228
- "train_samples_per_second": 10.326,
229
- "train_steps_per_second": 0.081
230
  }
231
  ],
232
  "logging_steps": 5,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.007142857142857143,
13
+ "grad_norm": 104.33383178710938,
14
  "learning_rate": 1.4285714285714285e-05,
15
+ "loss": 26.8049,
16
  "step": 1
17
  },
18
  {
19
  "epoch": 0.03571428571428571,
20
+ "grad_norm": 53.856834411621094,
21
  "learning_rate": 7.142857142857143e-05,
22
+ "loss": 26.2527,
23
  "step": 5
24
  },
25
  {
26
  "epoch": 0.07142857142857142,
27
+ "grad_norm": 16.46755599975586,
28
  "learning_rate": 0.00014285714285714287,
29
+ "loss": 20.5587,
30
  "step": 10
31
  },
32
  {
33
  "epoch": 0.10714285714285714,
34
+ "grad_norm": 9.823058128356934,
35
  "learning_rate": 0.00019996891820008164,
36
+ "loss": 16.1016,
37
  "step": 15
38
  },
39
  {
40
  "epoch": 0.14285714285714285,
41
+ "grad_norm": 3.4154016971588135,
42
  "learning_rate": 0.00019888308262251285,
43
+ "loss": 13.4739,
44
  "step": 20
45
  },
46
  {
47
  "epoch": 0.17857142857142858,
48
+ "grad_norm": 3.293179988861084,
49
  "learning_rate": 0.0001962624246950012,
50
+ "loss": 12.6248,
51
  "step": 25
52
  },
53
  {
54
  "epoch": 0.21428571428571427,
55
+ "grad_norm": 5.406515121459961,
56
  "learning_rate": 0.00019214762118704076,
57
+ "loss": 11.9858,
58
  "step": 30
59
  },
60
  {
61
  "epoch": 0.25,
62
+ "grad_norm": 10.7940034866333,
63
  "learning_rate": 0.00018660254037844388,
64
+ "loss": 11.0004,
65
  "step": 35
66
  },
67
  {
68
  "epoch": 0.2857142857142857,
69
+ "grad_norm": 14.75281047821045,
70
  "learning_rate": 0.00017971325072229226,
71
+ "loss": 9.2523,
72
  "step": 40
73
  },
74
  {
75
  "epoch": 0.32142857142857145,
76
+ "grad_norm": 22.644073486328125,
77
  "learning_rate": 0.00017158668492597186,
78
+ "loss": 7.0092,
79
  "step": 45
80
  },
81
  {
82
  "epoch": 0.35714285714285715,
83
+ "grad_norm": 14.965047836303711,
84
  "learning_rate": 0.00016234898018587337,
85
+ "loss": 4.4455,
86
  "step": 50
87
  },
88
  {
89
  "epoch": 0.39285714285714285,
90
+ "grad_norm": 5.274498462677002,
91
  "learning_rate": 0.0001521435203379498,
92
+ "loss": 2.7643,
93
  "step": 55
94
  },
95
  {
96
  "epoch": 0.42857142857142855,
97
+ "grad_norm": 4.607834339141846,
98
  "learning_rate": 0.00014112871031306119,
99
+ "loss": 2.3451,
100
  "step": 60
101
  },
102
  {
103
  "epoch": 0.4642857142857143,
104
+ "grad_norm": 3.502431631088257,
105
  "learning_rate": 0.00012947551744109043,
106
+ "loss": 2.1217,
107
  "step": 65
108
  },
109
  {
110
  "epoch": 0.5,
111
+ "grad_norm": 1.5408211946487427,
112
  "learning_rate": 0.00011736481776669306,
113
+ "loss": 1.979,
114
  "step": 70
115
  },
116
  {
117
  "epoch": 0.5357142857142857,
118
+ "grad_norm": 1.2710477113723755,
119
  "learning_rate": 0.00010498458856606972,
120
+ "loss": 1.8913,
121
  "step": 75
122
  },
123
  {
124
  "epoch": 0.5714285714285714,
125
+ "grad_norm": 1.2541048526763916,
126
  "learning_rate": 9.252699064135758e-05,
127
+ "loss": 1.7956,
128
  "step": 80
129
  },
130
  {
131
  "epoch": 0.6071428571428571,
132
+ "grad_norm": 1.3496887683868408,
133
  "learning_rate": 8.018538568006027e-05,
134
+ "loss": 1.7699,
135
  "step": 85
136
  },
137
  {
138
  "epoch": 0.6428571428571429,
139
+ "grad_norm": 1.127426028251648,
140
  "learning_rate": 6.815133497483157e-05,
141
+ "loss": 1.7305,
142
  "step": 90
143
  },
144
  {
145
  "epoch": 0.6785714285714286,
146
+ "grad_norm": 1.5040632486343384,
147
  "learning_rate": 5.6611626088244194e-05,
148
+ "loss": 1.728,
149
  "step": 95
150
  },
151
  {
152
  "epoch": 0.7142857142857143,
153
+ "grad_norm": 1.3170948028564453,
154
  "learning_rate": 4.574537361342407e-05,
155
+ "loss": 1.6858,
156
  "step": 100
157
  },
158
  {
159
  "epoch": 0.75,
160
+ "grad_norm": 0.6884583830833435,
161
  "learning_rate": 3.5721239031346066e-05,
162
+ "loss": 1.6739,
163
  "step": 105
164
  },
165
  {
166
  "epoch": 0.7857142857142857,
167
+ "grad_norm": 0.6821992993354797,
168
  "learning_rate": 2.669481281701739e-05,
169
+ "loss": 1.6896,
170
  "step": 110
171
  },
172
  {
173
  "epoch": 0.8214285714285714,
174
+ "grad_norm": 0.8719048500061035,
175
  "learning_rate": 1.880619942841435e-05,
176
+ "loss": 1.6684,
177
  "step": 115
178
  },
179
  {
180
  "epoch": 0.8571428571428571,
181
+ "grad_norm": 1.8157048225402832,
182
  "learning_rate": 1.2177842662977135e-05,
183
+ "loss": 1.6667,
184
  "step": 120
185
  },
186
  {
187
  "epoch": 0.8928571428571429,
188
+ "grad_norm": 1.1482408046722412,
189
  "learning_rate": 6.9126251355795864e-06,
190
+ "loss": 1.6691,
191
  "step": 125
192
  },
193
  {
194
  "epoch": 0.9285714285714286,
195
+ "grad_norm": 0.786839485168457,
196
  "learning_rate": 3.092271377092215e-06,
197
+ "loss": 1.6592,
198
  "step": 130
199
  },
200
  {
201
  "epoch": 0.9642857142857143,
202
+ "grad_norm": 0.6941242814064026,
203
  "learning_rate": 7.760793399827937e-07,
204
+ "loss": 1.6378,
205
  "step": 135
206
  },
207
  {
208
  "epoch": 1.0,
209
+ "grad_norm": 0.7584343552589417,
210
  "learning_rate": 0.0,
211
+ "loss": 1.6467,
212
  "step": 140
213
  },
214
  {
215
  "epoch": 1.0,
216
+ "eval_loss": 1.6531758308410645,
217
+ "eval_runtime": 1.3594,
218
+ "eval_samples_per_second": 130.936,
219
+ "eval_steps_per_second": 2.207,
220
  "step": 140
221
  },
222
  {
223
  "epoch": 1.0,
224
  "step": 140,
225
  "total_flos": 4.268849030789857e+17,
226
+ "train_loss": 5.926357351030622,
227
+ "train_runtime": 363.5109,
228
+ "train_samples_per_second": 49.253,
229
+ "train_steps_per_second": 0.385
230
  }
231
  ],
232
  "logging_steps": 5,