Nexspear commited on
Commit
8f41503
·
verified ·
1 Parent(s): 896ccb3

Training in progress, step 84, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5d678ddcd815c64cb34bb5e65c89513607ba053e4a6c9e16f79264ea3c082326
3
  size 100966336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b6fb9b08c2e10e053f3d81ad8d2e0e24099462e2b5a2472f9154ee3481f0255
3
  size 100966336
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:905a59ce198902d98f99592337983373607e49d00ff43b167d36922497a116a4
3
  size 51613348
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:760dba386ee4560fcbe21cb431d3d8086e73cb0d13b223b37fde2ea0eec443fa
3
  size 51613348
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e576a0cf5d4d2361e5146f1dceac71aa1a3d4db54e3485b22fbce47c7516e942
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d87bc4656c379289d41186281c9cbad1876e328c641d66e6bd6c9c084caea09
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3101e5b327a48ff01ba2c03545970ed09eef14c8b179cc52c21d8ec3e72950f1
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:200db5d515f7fe0d1c4e0bac456fe6b9042173a9f2b890836ad8c5bd6dfb4d2e
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.006291899179806,
5
  "eval_steps": 42,
6
- "global_step": 42,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -121,6 +121,112 @@
121
  "eval_samples_per_second": 35.888,
122
  "eval_steps_per_second": 4.488,
123
  "step": 42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
  }
125
  ],
126
  "logging_steps": 3,
@@ -140,7 +246,7 @@
140
  "attributes": {}
141
  }
142
  },
143
- "total_flos": 1.3438839322312704e+16,
144
  "train_batch_size": 8,
145
  "trial_name": null,
146
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.012583798359612,
5
  "eval_steps": 42,
6
+ "global_step": 84,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
121
  "eval_samples_per_second": 35.888,
122
  "eval_steps_per_second": 4.488,
123
  "step": 42
124
+ },
125
+ {
126
+ "epoch": 0.006741320549792143,
127
+ "grad_norm": 0.13759616017341614,
128
+ "learning_rate": 4.937319780454559e-05,
129
+ "loss": 1.1133,
130
+ "step": 45
131
+ },
132
+ {
133
+ "epoch": 0.007190741919778285,
134
+ "grad_norm": 0.14887671172618866,
135
+ "learning_rate": 4.926169550509787e-05,
136
+ "loss": 1.1339,
137
+ "step": 48
138
+ },
139
+ {
140
+ "epoch": 0.007640163289764428,
141
+ "grad_norm": 0.13235358893871307,
142
+ "learning_rate": 4.914121772213898e-05,
143
+ "loss": 1.0057,
144
+ "step": 51
145
+ },
146
+ {
147
+ "epoch": 0.00808958465975057,
148
+ "grad_norm": 0.14951473474502563,
149
+ "learning_rate": 4.9011809025775486e-05,
150
+ "loss": 1.0919,
151
+ "step": 54
152
+ },
153
+ {
154
+ "epoch": 0.008539006029736713,
155
+ "grad_norm": 0.1409119963645935,
156
+ "learning_rate": 4.887351729005726e-05,
157
+ "loss": 1.143,
158
+ "step": 57
159
+ },
160
+ {
161
+ "epoch": 0.008988427399722857,
162
+ "grad_norm": 0.13198496401309967,
163
+ "learning_rate": 4.8726393675266716e-05,
164
+ "loss": 1.0853,
165
+ "step": 60
166
+ },
167
+ {
168
+ "epoch": 0.009437848769709,
169
+ "grad_norm": 0.15319645404815674,
170
+ "learning_rate": 4.8570492608992325e-05,
171
+ "loss": 1.0534,
172
+ "step": 63
173
+ },
174
+ {
175
+ "epoch": 0.009887270139695143,
176
+ "grad_norm": 0.1532219797372818,
177
+ "learning_rate": 4.8405871765993433e-05,
178
+ "loss": 1.1016,
179
+ "step": 66
180
+ },
181
+ {
182
+ "epoch": 0.010336691509681285,
183
+ "grad_norm": 0.14198264479637146,
184
+ "learning_rate": 4.82325920468638e-05,
185
+ "loss": 1.029,
186
+ "step": 69
187
+ },
188
+ {
189
+ "epoch": 0.010786112879667428,
190
+ "grad_norm": 0.12415461987257004,
191
+ "learning_rate": 4.805071755550177e-05,
192
+ "loss": 1.1565,
193
+ "step": 72
194
+ },
195
+ {
196
+ "epoch": 0.01123553424965357,
197
+ "grad_norm": 0.1350441873073578,
198
+ "learning_rate": 4.7860315575395316e-05,
199
+ "loss": 1.1276,
200
+ "step": 75
201
+ },
202
+ {
203
+ "epoch": 0.011684955619639713,
204
+ "grad_norm": 0.12075574696063995,
205
+ "learning_rate": 4.766145654473095e-05,
206
+ "loss": 1.0753,
207
+ "step": 78
208
+ },
209
+ {
210
+ "epoch": 0.012134376989625857,
211
+ "grad_norm": 0.11550460755825043,
212
+ "learning_rate": 4.745421403033548e-05,
213
+ "loss": 1.0078,
214
+ "step": 81
215
+ },
216
+ {
217
+ "epoch": 0.012583798359612,
218
+ "grad_norm": 0.12455905228853226,
219
+ "learning_rate": 4.72386647004603e-05,
220
+ "loss": 1.0373,
221
+ "step": 84
222
+ },
223
+ {
224
+ "epoch": 0.012583798359612,
225
+ "eval_loss": 1.1157194375991821,
226
+ "eval_runtime": 312.8234,
227
+ "eval_samples_per_second": 35.94,
228
+ "eval_steps_per_second": 4.495,
229
+ "step": 84
230
  }
231
  ],
232
  "logging_steps": 3,
 
246
  "attributes": {}
247
  }
248
  },
249
+ "total_flos": 2.765900651220173e+16,
250
  "train_batch_size": 8,
251
  "trial_name": null,
252
  "trial_params": null