Holmeister commited on
Commit
08e5526
1 Parent(s): a4bb588

End of training

Browse files
Files changed (5) hide show
  1. README.md +3 -2
  2. all_results.json +12 -0
  3. eval_results.json +7 -0
  4. train_results.json +8 -0
  5. trainer_state.json +516 -0
README.md CHANGED
@@ -3,6 +3,7 @@ license: bigscience-bloom-rail-1.0
3
  library_name: peft
4
  tags:
5
  - llama-factory
 
6
  - generated_from_trainer
7
  base_model: bigscience/bloom-7b1
8
  model-index:
@@ -15,9 +16,9 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  # BLOOM_AAID_new_mixed_train_final
17
 
18
- This model is a fine-tuned version of [bigscience/bloom-7b1](https://huggingface.co/bigscience/bloom-7b1) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
- - Loss: 0.8549
21
 
22
  ## Model description
23
 
 
3
  library_name: peft
4
  tags:
5
  - llama-factory
6
+ - lora
7
  - generated_from_trainer
8
  base_model: bigscience/bloom-7b1
9
  model-index:
 
16
 
17
  # BLOOM_AAID_new_mixed_train_final
18
 
19
+ This model is a fine-tuned version of [bigscience/bloom-7b1](https://huggingface.co/bigscience/bloom-7b1) on the AAID_new_mixed dataset.
20
  It achieves the following results on the evaluation set:
21
+ - Loss: 0.8218
22
 
23
  ## Model description
24
 
all_results.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 0.3390757451462948,
3
+ "eval_loss": 0.8218151926994324,
4
+ "eval_runtime": 209.1208,
5
+ "eval_samples_per_second": 22.532,
6
+ "eval_steps_per_second": 0.708,
7
+ "total_flos": 4.228376200709407e+17,
8
+ "train_loss": 0.4842093152384604,
9
+ "train_runtime": 19786.6932,
10
+ "train_samples_per_second": 35.484,
11
+ "train_steps_per_second": 0.139
12
+ }
eval_results.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 0.3390757451462948,
3
+ "eval_loss": 0.8218151926994324,
4
+ "eval_runtime": 209.1208,
5
+ "eval_samples_per_second": 22.532,
6
+ "eval_steps_per_second": 0.708
7
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 0.3390757451462948,
3
+ "total_flos": 4.228376200709407e+17,
4
+ "train_loss": 0.4842093152384604,
5
+ "train_runtime": 19786.6932,
6
+ "train_samples_per_second": 35.484,
7
+ "train_steps_per_second": 0.139
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,516 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.8218151926994324,
3
+ "best_model_checkpoint": "saves/BLOOM-7B/lora/train_1/checkpoint-210",
4
+ "epoch": 0.3390757451462948,
5
+ "eval_steps": 10,
6
+ "global_step": 310,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.010937927262783703,
13
+ "grad_norm": 1.1790575981140137,
14
+ "learning_rate": 0.00029999015487222375,
15
+ "loss": 1.6811,
16
+ "step": 10
17
+ },
18
+ {
19
+ "epoch": 0.010937927262783703,
20
+ "eval_loss": 0.9062243103981018,
21
+ "eval_runtime": 210.246,
22
+ "eval_samples_per_second": 22.412,
23
+ "eval_steps_per_second": 0.704,
24
+ "step": 10
25
+ },
26
+ {
27
+ "epoch": 0.021875854525567406,
28
+ "grad_norm": 0.5708920359611511,
29
+ "learning_rate": 0.00029996062078124905,
30
+ "loss": 0.6248,
31
+ "step": 20
32
+ },
33
+ {
34
+ "epoch": 0.021875854525567406,
35
+ "eval_loss": 0.8811991810798645,
36
+ "eval_runtime": 210.0954,
37
+ "eval_samples_per_second": 22.428,
38
+ "eval_steps_per_second": 0.704,
39
+ "step": 20
40
+ },
41
+ {
42
+ "epoch": 0.03281378178835111,
43
+ "grad_norm": 0.5245503783226013,
44
+ "learning_rate": 0.0002999114016039678,
45
+ "loss": 0.5466,
46
+ "step": 30
47
+ },
48
+ {
49
+ "epoch": 0.03281378178835111,
50
+ "eval_loss": 0.8906806707382202,
51
+ "eval_runtime": 210.0232,
52
+ "eval_samples_per_second": 22.436,
53
+ "eval_steps_per_second": 0.705,
54
+ "step": 30
55
+ },
56
+ {
57
+ "epoch": 0.04375170905113481,
58
+ "grad_norm": 0.31997227668762207,
59
+ "learning_rate": 0.00029984250380130117,
60
+ "loss": 0.5591,
61
+ "step": 40
62
+ },
63
+ {
64
+ "epoch": 0.04375170905113481,
65
+ "eval_loss": 0.8906031250953674,
66
+ "eval_runtime": 209.6861,
67
+ "eval_samples_per_second": 22.472,
68
+ "eval_steps_per_second": 0.706,
69
+ "step": 40
70
+ },
71
+ {
72
+ "epoch": 0.05468963631391851,
73
+ "grad_norm": 0.2958827614784241,
74
+ "learning_rate": 0.0002997539364173515,
75
+ "loss": 0.5318,
76
+ "step": 50
77
+ },
78
+ {
79
+ "epoch": 0.05468963631391851,
80
+ "eval_loss": 0.8625577688217163,
81
+ "eval_runtime": 210.182,
82
+ "eval_samples_per_second": 22.419,
83
+ "eval_steps_per_second": 0.704,
84
+ "step": 50
85
+ },
86
+ {
87
+ "epoch": 0.06562756357670221,
88
+ "grad_norm": 0.396158367395401,
89
+ "learning_rate": 0.00029964571107821494,
90
+ "loss": 0.496,
91
+ "step": 60
92
+ },
93
+ {
94
+ "epoch": 0.06562756357670221,
95
+ "eval_loss": 0.8501101136207581,
96
+ "eval_runtime": 210.0313,
97
+ "eval_samples_per_second": 22.435,
98
+ "eval_steps_per_second": 0.705,
99
+ "step": 60
100
+ },
101
+ {
102
+ "epoch": 0.07656549083948591,
103
+ "grad_norm": 0.4420228600502014,
104
+ "learning_rate": 0.00029951784199045534,
105
+ "loss": 0.4855,
106
+ "step": 70
107
+ },
108
+ {
109
+ "epoch": 0.07656549083948591,
110
+ "eval_loss": 0.8276830911636353,
111
+ "eval_runtime": 210.1084,
112
+ "eval_samples_per_second": 22.427,
113
+ "eval_steps_per_second": 0.704,
114
+ "step": 70
115
+ },
116
+ {
117
+ "epoch": 0.08750341810226962,
118
+ "grad_norm": 0.4005095958709717,
119
+ "learning_rate": 0.0002993703459392396,
120
+ "loss": 0.4746,
121
+ "step": 80
122
+ },
123
+ {
124
+ "epoch": 0.08750341810226962,
125
+ "eval_loss": 0.8740801215171814,
126
+ "eval_runtime": 210.097,
127
+ "eval_samples_per_second": 22.428,
128
+ "eval_steps_per_second": 0.704,
129
+ "step": 80
130
+ },
131
+ {
132
+ "epoch": 0.09844134536505332,
133
+ "grad_norm": 0.4457601308822632,
134
+ "learning_rate": 0.00029920324228613376,
135
+ "loss": 0.4846,
136
+ "step": 90
137
+ },
138
+ {
139
+ "epoch": 0.09844134536505332,
140
+ "eval_loss": 0.8443948030471802,
141
+ "eval_runtime": 209.5315,
142
+ "eval_samples_per_second": 22.488,
143
+ "eval_steps_per_second": 0.706,
144
+ "step": 90
145
+ },
146
+ {
147
+ "epoch": 0.10937927262783702,
148
+ "grad_norm": 0.3862694799900055,
149
+ "learning_rate": 0.0002990165529665622,
150
+ "loss": 0.4424,
151
+ "step": 100
152
+ },
153
+ {
154
+ "epoch": 0.10937927262783702,
155
+ "eval_loss": 0.843928873538971,
156
+ "eval_runtime": 210.1339,
157
+ "eval_samples_per_second": 22.424,
158
+ "eval_steps_per_second": 0.704,
159
+ "step": 100
160
+ },
161
+ {
162
+ "epoch": 0.12031719989062073,
163
+ "grad_norm": 0.32919445633888245,
164
+ "learning_rate": 0.0002988103024869277,
165
+ "loss": 0.4514,
166
+ "step": 110
167
+ },
168
+ {
169
+ "epoch": 0.12031719989062073,
170
+ "eval_loss": 0.8323902487754822,
171
+ "eval_runtime": 210.1443,
172
+ "eval_samples_per_second": 22.423,
173
+ "eval_steps_per_second": 0.704,
174
+ "step": 110
175
+ },
176
+ {
177
+ "epoch": 0.13125512715340443,
178
+ "grad_norm": 0.3766002953052521,
179
+ "learning_rate": 0.00029858451792139453,
180
+ "loss": 0.4485,
181
+ "step": 120
182
+ },
183
+ {
184
+ "epoch": 0.13125512715340443,
185
+ "eval_loss": 0.8639554381370544,
186
+ "eval_runtime": 210.1293,
187
+ "eval_samples_per_second": 22.424,
188
+ "eval_steps_per_second": 0.704,
189
+ "step": 120
190
+ },
191
+ {
192
+ "epoch": 0.14219305441618812,
193
+ "grad_norm": 0.3994982838630676,
194
+ "learning_rate": 0.0002983392289083346,
195
+ "loss": 0.443,
196
+ "step": 130
197
+ },
198
+ {
199
+ "epoch": 0.14219305441618812,
200
+ "eval_loss": 0.853752076625824,
201
+ "eval_runtime": 210.1289,
202
+ "eval_samples_per_second": 22.424,
203
+ "eval_steps_per_second": 0.704,
204
+ "step": 130
205
+ },
206
+ {
207
+ "epoch": 0.15313098167897182,
208
+ "grad_norm": 0.2923962473869324,
209
+ "learning_rate": 0.0002980744676464371,
210
+ "loss": 0.4316,
211
+ "step": 140
212
+ },
213
+ {
214
+ "epoch": 0.15313098167897182,
215
+ "eval_loss": 0.826151430606842,
216
+ "eval_runtime": 209.9359,
217
+ "eval_samples_per_second": 22.445,
218
+ "eval_steps_per_second": 0.705,
219
+ "step": 140
220
+ },
221
+ {
222
+ "epoch": 0.16406890894175555,
223
+ "grad_norm": 0.33790677785873413,
224
+ "learning_rate": 0.0002977902688904813,
225
+ "loss": 0.4435,
226
+ "step": 150
227
+ },
228
+ {
229
+ "epoch": 0.16406890894175555,
230
+ "eval_loss": 0.855368971824646,
231
+ "eval_runtime": 210.0755,
232
+ "eval_samples_per_second": 22.43,
233
+ "eval_steps_per_second": 0.705,
234
+ "step": 150
235
+ },
236
+ {
237
+ "epoch": 0.17500683620453925,
238
+ "grad_norm": 0.3131038248538971,
239
+ "learning_rate": 0.00029748666994677467,
240
+ "loss": 0.4269,
241
+ "step": 160
242
+ },
243
+ {
244
+ "epoch": 0.17500683620453925,
245
+ "eval_loss": 0.8361687660217285,
246
+ "eval_runtime": 209.5838,
247
+ "eval_samples_per_second": 22.483,
248
+ "eval_steps_per_second": 0.706,
249
+ "step": 160
250
+ },
251
+ {
252
+ "epoch": 0.18594476346732294,
253
+ "grad_norm": 0.35536178946495056,
254
+ "learning_rate": 0.00029716371066825593,
255
+ "loss": 0.4322,
256
+ "step": 170
257
+ },
258
+ {
259
+ "epoch": 0.18594476346732294,
260
+ "eval_loss": 0.8331068158149719,
261
+ "eval_runtime": 210.059,
262
+ "eval_samples_per_second": 22.432,
263
+ "eval_steps_per_second": 0.705,
264
+ "step": 170
265
+ },
266
+ {
267
+ "epoch": 0.19688269073010664,
268
+ "grad_norm": 0.3734683692455292,
269
+ "learning_rate": 0.0002968214334492632,
270
+ "loss": 0.4136,
271
+ "step": 180
272
+ },
273
+ {
274
+ "epoch": 0.19688269073010664,
275
+ "eval_loss": 0.8500174283981323,
276
+ "eval_runtime": 209.7577,
277
+ "eval_samples_per_second": 22.464,
278
+ "eval_steps_per_second": 0.706,
279
+ "step": 180
280
+ },
281
+ {
282
+ "epoch": 0.20782061799289034,
283
+ "grad_norm": 0.3501605987548828,
284
+ "learning_rate": 0.00029645988321996917,
285
+ "loss": 0.4262,
286
+ "step": 190
287
+ },
288
+ {
289
+ "epoch": 0.20782061799289034,
290
+ "eval_loss": 0.8315255641937256,
291
+ "eval_runtime": 209.8869,
292
+ "eval_samples_per_second": 22.45,
293
+ "eval_steps_per_second": 0.705,
294
+ "step": 190
295
+ },
296
+ {
297
+ "epoch": 0.21875854525567404,
298
+ "grad_norm": 0.2926044464111328,
299
+ "learning_rate": 0.00029607910744048336,
300
+ "loss": 0.4283,
301
+ "step": 200
302
+ },
303
+ {
304
+ "epoch": 0.21875854525567404,
305
+ "eval_loss": 0.8357769846916199,
306
+ "eval_runtime": 210.1683,
307
+ "eval_samples_per_second": 22.42,
308
+ "eval_steps_per_second": 0.704,
309
+ "step": 200
310
+ },
311
+ {
312
+ "epoch": 0.22969647251845776,
313
+ "grad_norm": 0.4317471385002136,
314
+ "learning_rate": 0.00029567915609462174,
315
+ "loss": 0.3983,
316
+ "step": 210
317
+ },
318
+ {
319
+ "epoch": 0.22969647251845776,
320
+ "eval_loss": 0.8218151926994324,
321
+ "eval_runtime": 210.2221,
322
+ "eval_samples_per_second": 22.414,
323
+ "eval_steps_per_second": 0.704,
324
+ "step": 210
325
+ },
326
+ {
327
+ "epoch": 0.24063439978124146,
328
+ "grad_norm": 0.4090133309364319,
329
+ "learning_rate": 0.00029526008168334573,
330
+ "loss": 0.409,
331
+ "step": 220
332
+ },
333
+ {
334
+ "epoch": 0.24063439978124146,
335
+ "eval_loss": 0.8271812200546265,
336
+ "eval_runtime": 209.5484,
337
+ "eval_samples_per_second": 22.486,
338
+ "eval_steps_per_second": 0.706,
339
+ "step": 220
340
+ },
341
+ {
342
+ "epoch": 0.25157232704402516,
343
+ "grad_norm": 0.46969935297966003,
344
+ "learning_rate": 0.0002948219392178703,
345
+ "loss": 0.4173,
346
+ "step": 230
347
+ },
348
+ {
349
+ "epoch": 0.25157232704402516,
350
+ "eval_loss": 0.8557965159416199,
351
+ "eval_runtime": 210.1229,
352
+ "eval_samples_per_second": 22.425,
353
+ "eval_steps_per_second": 0.704,
354
+ "step": 230
355
+ },
356
+ {
357
+ "epoch": 0.26251025430680885,
358
+ "grad_norm": 0.3677867650985718,
359
+ "learning_rate": 0.0002943647862124429,
360
+ "loss": 0.3847,
361
+ "step": 240
362
+ },
363
+ {
364
+ "epoch": 0.26251025430680885,
365
+ "eval_loss": 0.8389946818351746,
366
+ "eval_runtime": 210.1338,
367
+ "eval_samples_per_second": 22.424,
368
+ "eval_steps_per_second": 0.704,
369
+ "step": 240
370
+ },
371
+ {
372
+ "epoch": 0.27344818156959255,
373
+ "grad_norm": 0.43112123012542725,
374
+ "learning_rate": 0.0002938886826767936,
375
+ "loss": 0.3923,
376
+ "step": 250
377
+ },
378
+ {
379
+ "epoch": 0.27344818156959255,
380
+ "eval_loss": 0.8553555011749268,
381
+ "eval_runtime": 209.3302,
382
+ "eval_samples_per_second": 22.51,
383
+ "eval_steps_per_second": 0.707,
384
+ "step": 250
385
+ },
386
+ {
387
+ "epoch": 0.28438610883237625,
388
+ "grad_norm": 0.4160371422767639,
389
+ "learning_rate": 0.00029339369110825756,
390
+ "loss": 0.4014,
391
+ "step": 260
392
+ },
393
+ {
394
+ "epoch": 0.28438610883237625,
395
+ "eval_loss": 0.8533877730369568,
396
+ "eval_runtime": 209.8359,
397
+ "eval_samples_per_second": 22.456,
398
+ "eval_steps_per_second": 0.705,
399
+ "step": 260
400
+ },
401
+ {
402
+ "epoch": 0.29532403609515995,
403
+ "grad_norm": 0.426897794008255,
404
+ "learning_rate": 0.00029287987648357134,
405
+ "loss": 0.3967,
406
+ "step": 270
407
+ },
408
+ {
409
+ "epoch": 0.29532403609515995,
410
+ "eval_loss": 0.8450255990028381,
411
+ "eval_runtime": 209.7931,
412
+ "eval_samples_per_second": 22.46,
413
+ "eval_steps_per_second": 0.705,
414
+ "step": 270
415
+ },
416
+ {
417
+ "epoch": 0.30626196335794365,
418
+ "grad_norm": 0.42360490560531616,
419
+ "learning_rate": 0.00029234730625034343,
420
+ "loss": 0.3936,
421
+ "step": 280
422
+ },
423
+ {
424
+ "epoch": 0.30626196335794365,
425
+ "eval_loss": 0.8424352407455444,
426
+ "eval_runtime": 209.6462,
427
+ "eval_samples_per_second": 22.476,
428
+ "eval_steps_per_second": 0.706,
429
+ "step": 280
430
+ },
431
+ {
432
+ "epoch": 0.3171998906207274,
433
+ "grad_norm": 0.3254799246788025,
434
+ "learning_rate": 0.00029179605031820044,
435
+ "loss": 0.3816,
436
+ "step": 290
437
+ },
438
+ {
439
+ "epoch": 0.3171998906207274,
440
+ "eval_loss": 0.8733253479003906,
441
+ "eval_runtime": 210.002,
442
+ "eval_samples_per_second": 22.438,
443
+ "eval_steps_per_second": 0.705,
444
+ "step": 290
445
+ },
446
+ {
447
+ "epoch": 0.3281378178835111,
448
+ "grad_norm": 0.44433578848838806,
449
+ "learning_rate": 0.0002912261810496102,
450
+ "loss": 0.3852,
451
+ "step": 300
452
+ },
453
+ {
454
+ "epoch": 0.3281378178835111,
455
+ "eval_loss": 0.8514276146888733,
456
+ "eval_runtime": 209.6715,
457
+ "eval_samples_per_second": 22.473,
458
+ "eval_steps_per_second": 0.706,
459
+ "step": 300
460
+ },
461
+ {
462
+ "epoch": 0.3390757451462948,
463
+ "grad_norm": 0.3501994013786316,
464
+ "learning_rate": 0.0002906377732503829,
465
+ "loss": 0.379,
466
+ "step": 310
467
+ },
468
+ {
469
+ "epoch": 0.3390757451462948,
470
+ "eval_loss": 0.854888379573822,
471
+ "eval_runtime": 210.2495,
472
+ "eval_samples_per_second": 22.411,
473
+ "eval_steps_per_second": 0.704,
474
+ "step": 310
475
+ },
476
+ {
477
+ "epoch": 0.3390757451462948,
478
+ "step": 310,
479
+ "total_flos": 4.228376200709407e+17,
480
+ "train_loss": 0.4842093152384604,
481
+ "train_runtime": 19786.6932,
482
+ "train_samples_per_second": 35.484,
483
+ "train_steps_per_second": 0.139
484
+ }
485
+ ],
486
+ "logging_steps": 10,
487
+ "max_steps": 2742,
488
+ "num_input_tokens_seen": 0,
489
+ "num_train_epochs": 3,
490
+ "save_steps": 10,
491
+ "stateful_callbacks": {
492
+ "EarlyStoppingCallback": {
493
+ "args": {
494
+ "early_stopping_patience": 10,
495
+ "early_stopping_threshold": 0.0
496
+ },
497
+ "attributes": {
498
+ "early_stopping_patience_counter": 0
499
+ }
500
+ },
501
+ "TrainerControl": {
502
+ "args": {
503
+ "should_epoch_stop": false,
504
+ "should_evaluate": false,
505
+ "should_log": false,
506
+ "should_save": true,
507
+ "should_training_stop": true
508
+ },
509
+ "attributes": {}
510
+ }
511
+ },
512
+ "total_flos": 4.228376200709407e+17,
513
+ "train_batch_size": 32,
514
+ "trial_name": null,
515
+ "trial_params": null
516
+ }