fats-fme commited on
Commit
1b47552
·
verified ·
1 Parent(s): 2afa3c3

Training in progress, step 32, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0f905c1cff345416341d46b46f91271dbe323fa59c6c4da2192495f4403e5c0f
3
  size 30322120
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:30ded4bce8f3aeb46976eaf0b5329b44ad473a2a4305ab8d2f19270798882d91
3
  size 30322120
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9e931c2a97cd0f7da55dc51d4070cd4be734a7fd9a9dd370fc0288b463769e2e
3
  size 60837186
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a65835a5d88bd1313380b80afe6d10fa69949be4f94bd0e3624d1a7b2e009716
3
  size 60837186
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:489aec5b0e96507c051eebbfc7b3dbb164d74248f9e9c5f2325ef812ff316a23
3
  size 14512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3d62c754a4618e6e2045bed18fcd110ce3e033eadacd83db11e3e65d6fdb46b
3
  size 14512
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ac6141f065eba1ae0b8d7809735d4d4fbb7fbfda9eb5f9c7b1690edd5a6c2c5b
3
  size 14512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed4d68a8cd0d7c336eae5ec9326f4344f8484e56ac5abb9db3c35753732b9e85
3
  size 14512
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3a7f7b5cf9c6458b2c233922c4300feef134fea79461a679e60cba05607af01c
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:814cb1a0cec135cba6a03f6f7a188e85ada15f60bf1b79a649c42e89652b64a2
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.25296442687747034,
5
  "eval_steps": 16,
6
- "global_step": 16,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -135,6 +135,126 @@
135
  "eval_samples_per_second": 13.305,
136
  "eval_steps_per_second": 3.357,
137
  "step": 16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
  }
139
  ],
140
  "logging_steps": 1,
@@ -154,7 +274,7 @@
154
  "attributes": {}
155
  }
156
  },
157
- "total_flos": 2383975687389184.0,
158
  "train_batch_size": 2,
159
  "trial_name": null,
160
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.5059288537549407,
5
  "eval_steps": 16,
6
+ "global_step": 32,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
135
  "eval_samples_per_second": 13.305,
136
  "eval_steps_per_second": 3.357,
137
  "step": 16
138
+ },
139
+ {
140
+ "epoch": 0.26877470355731226,
141
+ "grad_norm": 0.4768655002117157,
142
+ "learning_rate": 3.4000000000000007e-05,
143
+ "loss": 0.2191,
144
+ "step": 17
145
+ },
146
+ {
147
+ "epoch": 0.2845849802371542,
148
+ "grad_norm": 0.9923710823059082,
149
+ "learning_rate": 3.6e-05,
150
+ "loss": 0.384,
151
+ "step": 18
152
+ },
153
+ {
154
+ "epoch": 0.30039525691699603,
155
+ "grad_norm": 0.5063422322273254,
156
+ "learning_rate": 3.8e-05,
157
+ "loss": 0.1493,
158
+ "step": 19
159
+ },
160
+ {
161
+ "epoch": 0.31620553359683795,
162
+ "grad_norm": 0.5010712742805481,
163
+ "learning_rate": 4e-05,
164
+ "loss": 0.0675,
165
+ "step": 20
166
+ },
167
+ {
168
+ "epoch": 0.33201581027667987,
169
+ "grad_norm": 0.48852574825286865,
170
+ "learning_rate": 4.2e-05,
171
+ "loss": 0.0739,
172
+ "step": 21
173
+ },
174
+ {
175
+ "epoch": 0.34782608695652173,
176
+ "grad_norm": 0.4387320578098297,
177
+ "learning_rate": 4.4000000000000006e-05,
178
+ "loss": 0.0751,
179
+ "step": 22
180
+ },
181
+ {
182
+ "epoch": 0.36363636363636365,
183
+ "grad_norm": 0.3455885648727417,
184
+ "learning_rate": 4.600000000000001e-05,
185
+ "loss": 0.0516,
186
+ "step": 23
187
+ },
188
+ {
189
+ "epoch": 0.3794466403162055,
190
+ "grad_norm": 0.29778942465782166,
191
+ "learning_rate": 4.8e-05,
192
+ "loss": 0.0421,
193
+ "step": 24
194
+ },
195
+ {
196
+ "epoch": 0.3952569169960474,
197
+ "grad_norm": 0.3956562578678131,
198
+ "learning_rate": 5e-05,
199
+ "loss": 0.057,
200
+ "step": 25
201
+ },
202
+ {
203
+ "epoch": 0.41106719367588934,
204
+ "grad_norm": 0.8842503428459167,
205
+ "learning_rate": 5.2000000000000004e-05,
206
+ "loss": 0.1188,
207
+ "step": 26
208
+ },
209
+ {
210
+ "epoch": 0.4268774703557312,
211
+ "grad_norm": 0.9197725653648376,
212
+ "learning_rate": 5.4000000000000005e-05,
213
+ "loss": 0.1128,
214
+ "step": 27
215
+ },
216
+ {
217
+ "epoch": 0.4426877470355731,
218
+ "grad_norm": 0.9175456762313843,
219
+ "learning_rate": 5.6000000000000006e-05,
220
+ "loss": 0.0628,
221
+ "step": 28
222
+ },
223
+ {
224
+ "epoch": 0.45849802371541504,
225
+ "grad_norm": 0.5987579822540283,
226
+ "learning_rate": 5.8e-05,
227
+ "loss": 0.0499,
228
+ "step": 29
229
+ },
230
+ {
231
+ "epoch": 0.4743083003952569,
232
+ "grad_norm": 0.8026472330093384,
233
+ "learning_rate": 6e-05,
234
+ "loss": 0.0619,
235
+ "step": 30
236
+ },
237
+ {
238
+ "epoch": 0.4901185770750988,
239
+ "grad_norm": 0.5789671540260315,
240
+ "learning_rate": 6.2e-05,
241
+ "loss": 0.0394,
242
+ "step": 31
243
+ },
244
+ {
245
+ "epoch": 0.5059288537549407,
246
+ "grad_norm": 0.7335872054100037,
247
+ "learning_rate": 6.400000000000001e-05,
248
+ "loss": 0.2007,
249
+ "step": 32
250
+ },
251
+ {
252
+ "epoch": 0.5059288537549407,
253
+ "eval_loss": 0.050875596702098846,
254
+ "eval_runtime": 8.2485,
255
+ "eval_samples_per_second": 12.972,
256
+ "eval_steps_per_second": 3.273,
257
+ "step": 32
258
  }
259
  ],
260
  "logging_steps": 1,
 
274
  "attributes": {}
275
  }
276
  },
277
+ "total_flos": 4791247562604544.0,
278
  "train_batch_size": 2,
279
  "trial_name": null,
280
  "trial_params": null