mp-02 commited on
Commit
f4b43a7
·
1 Parent(s): d34a02d

End of training

Browse files
all_results.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
- "epoch": 10.53,
3
- "eval_accuracy": 0.833371612310519,
4
- "eval_f1": 0.8794946550048591,
5
- "eval_loss": 0.5784164071083069,
6
- "eval_precision": 0.8553875236294896,
7
- "eval_recall": 0.905,
8
- "eval_runtime": 3.53,
9
  "eval_samples": 54,
10
- "eval_samples_per_second": 15.297,
11
- "eval_steps_per_second": 1.133,
12
  "predict_accuracy": 0.8218373936014088,
13
  "predict_f1": 0.9038133181559477,
14
  "predict_loss": 0.6888472437858582,
@@ -17,9 +17,9 @@
17
  "predict_runtime": 2.0596,
18
  "predict_samples_per_second": 8.74,
19
  "predict_steps_per_second": 0.971,
20
- "train_loss": 0.50620361328125,
21
- "train_runtime": 250.4274,
22
  "train_samples": 150,
23
- "train_samples_per_second": 6.389,
24
- "train_steps_per_second": 1.597
25
  }
 
1
  {
2
+ "epoch": 16.0,
3
+ "eval_accuracy": 0.8368167202572347,
4
+ "eval_f1": 0.8891074502089993,
5
+ "eval_loss": 0.6541090607643127,
6
+ "eval_precision": 0.8746976294146106,
7
+ "eval_recall": 0.904,
8
+ "eval_runtime": 3.562,
9
  "eval_samples": 54,
10
+ "eval_samples_per_second": 15.16,
11
+ "eval_steps_per_second": 1.123,
12
  "predict_accuracy": 0.8218373936014088,
13
  "predict_f1": 0.9038133181559477,
14
  "predict_loss": 0.6888472437858582,
 
17
  "predict_runtime": 2.0596,
18
  "predict_samples_per_second": 8.74,
19
  "predict_steps_per_second": 0.971,
20
+ "train_loss": 0.43604583740234376,
21
+ "train_runtime": 307.9087,
22
  "train_samples": 150,
23
+ "train_samples_per_second": 7.795,
24
+ "train_steps_per_second": 1.299
25
  }
eval_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
- "epoch": 10.53,
3
- "eval_accuracy": 0.833371612310519,
4
- "eval_f1": 0.8794946550048591,
5
- "eval_loss": 0.5784164071083069,
6
- "eval_precision": 0.8553875236294896,
7
- "eval_recall": 0.905,
8
- "eval_runtime": 3.53,
9
  "eval_samples": 54,
10
- "eval_samples_per_second": 15.297,
11
- "eval_steps_per_second": 1.133
12
  }
 
1
  {
2
+ "epoch": 16.0,
3
+ "eval_accuracy": 0.8368167202572347,
4
+ "eval_f1": 0.8891074502089993,
5
+ "eval_loss": 0.6541090607643127,
6
+ "eval_precision": 0.8746976294146106,
7
+ "eval_recall": 0.904,
8
+ "eval_runtime": 3.562,
9
  "eval_samples": 54,
10
+ "eval_samples_per_second": 15.16,
11
+ "eval_steps_per_second": 1.123
12
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:09c539b37ae0e9a660c60e3ec980a33949184bb678cbc4ce1034cc7f63cfd9c6
3
  size 501420883
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c62f2603023811174e7f8e06b32bf0110d49f234eba1b9e7230533d7919edd56
3
  size 501420883
runs/Aug24_19-24-22_bernini/1724520274.220852/events.out.tfevents.1724520274.bernini.13887.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:949a243e5b00cd5a646ba3ec282056929391036a03d8eac637597678e58c2956
3
+ size 4665
runs/Aug24_19-24-22_bernini/events.out.tfevents.1724520274.bernini.13887.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0208a85de259cae48b3c66b9e998b2714993dfc255c4888c4664ffca6d913b95
3
+ size 11945
runs/Aug24_19-24-22_bernini/events.out.tfevents.1724520591.bernini.13887.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04382d93bf526390c5db203e5f32b221f1f8bb40d1696422279b9ee810c47f77
3
+ size 512
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 10.53,
3
- "train_loss": 0.50620361328125,
4
- "train_runtime": 250.4274,
5
  "train_samples": 150,
6
- "train_samples_per_second": 6.389,
7
- "train_steps_per_second": 1.597
8
  }
 
1
  {
2
+ "epoch": 16.0,
3
+ "train_loss": 0.43604583740234376,
4
+ "train_runtime": 307.9087,
5
  "train_samples": 150,
6
+ "train_samples_per_second": 7.795,
7
+ "train_steps_per_second": 1.299
8
  }
trainer_state.json CHANGED
@@ -1,217 +1,217 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 10.526315789473685,
5
  "global_step": 400,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 0.66,
12
- "eval_accuracy": 0.5721175930179145,
13
- "eval_f1": 0.34372003835091086,
14
- "eval_loss": 1.351142406463623,
15
- "eval_precision": 0.3301104972375691,
16
- "eval_recall": 0.3585,
17
- "eval_runtime": 3.6101,
18
- "eval_samples_per_second": 14.958,
19
- "eval_steps_per_second": 1.108,
20
  "step": 25
21
  },
22
  {
23
- "epoch": 1.32,
24
- "eval_accuracy": 0.7614836931557188,
25
- "eval_f1": 0.7229437229437228,
26
- "eval_loss": 0.905920684337616,
27
- "eval_precision": 0.6964782205746061,
28
- "eval_recall": 0.7515,
29
- "eval_runtime": 3.5063,
30
- "eval_samples_per_second": 15.401,
31
- "eval_steps_per_second": 1.141,
32
  "step": 50
33
  },
34
  {
35
- "epoch": 1.97,
36
- "eval_accuracy": 0.7796279283417548,
37
- "eval_f1": 0.7946449916327994,
38
- "eval_loss": 0.7163704633712769,
39
- "eval_precision": 0.7613376087952359,
40
- "eval_recall": 0.831,
41
- "eval_runtime": 3.5828,
42
- "eval_samples_per_second": 15.072,
43
- "eval_steps_per_second": 1.116,
44
  "step": 75
45
  },
46
  {
47
- "epoch": 2.63,
48
- "eval_accuracy": 0.799265043638034,
49
- "eval_f1": 0.8249158249158249,
50
- "eval_loss": 0.6392571926116943,
51
- "eval_precision": 0.7947173308619092,
52
- "eval_recall": 0.8575,
53
- "eval_runtime": 3.5968,
54
- "eval_samples_per_second": 15.013,
55
- "eval_steps_per_second": 1.112,
56
  "step": 100
57
  },
58
  {
59
- "epoch": 3.29,
60
- "eval_accuracy": 0.8104042259990814,
61
- "eval_f1": 0.8409859835669407,
62
- "eval_loss": 0.5755508542060852,
63
- "eval_precision": 0.813844714686623,
64
- "eval_recall": 0.87,
65
- "eval_runtime": 3.5975,
66
- "eval_samples_per_second": 15.011,
67
- "eval_steps_per_second": 1.112,
68
  "step": 125
69
  },
70
  {
71
- "epoch": 3.95,
72
- "eval_accuracy": 0.8323380799265043,
73
- "eval_f1": 0.8506134231416886,
74
- "eval_loss": 0.5508233308792114,
75
- "eval_precision": 0.8196569309225776,
76
- "eval_recall": 0.884,
77
- "eval_runtime": 3.6045,
78
- "eval_samples_per_second": 14.981,
79
- "eval_steps_per_second": 1.11,
80
  "step": 150
81
  },
82
  {
83
- "epoch": 4.61,
84
- "eval_accuracy": 0.8327974276527331,
85
- "eval_f1": 0.8600435097897026,
86
- "eval_loss": 0.5458412170410156,
87
- "eval_precision": 0.8324754328497894,
88
- "eval_recall": 0.8895,
89
- "eval_runtime": 3.5309,
90
- "eval_samples_per_second": 15.294,
91
- "eval_steps_per_second": 1.133,
92
  "step": 175
93
  },
94
  {
95
- "epoch": 5.26,
96
- "eval_accuracy": 0.826596233348645,
97
- "eval_f1": 0.8491160087188181,
98
- "eval_loss": 0.5740342736244202,
99
- "eval_precision": 0.8233912635039925,
100
- "eval_recall": 0.8765,
101
- "eval_runtime": 3.622,
102
- "eval_samples_per_second": 14.909,
103
- "eval_steps_per_second": 1.104,
104
  "step": 200
105
  },
106
  {
107
- "epoch": 5.92,
108
- "eval_accuracy": 0.8361276986678916,
109
- "eval_f1": 0.8709914320685433,
110
- "eval_loss": 0.5719187259674072,
111
- "eval_precision": 0.8532374100719424,
112
- "eval_recall": 0.8895,
113
- "eval_runtime": 3.5659,
114
- "eval_samples_per_second": 15.143,
115
- "eval_steps_per_second": 1.122,
116
  "step": 225
117
  },
118
  {
119
- "epoch": 6.58,
120
- "eval_accuracy": 0.8263665594855305,
121
- "eval_f1": 0.8736131210805596,
122
- "eval_loss": 0.5435599684715271,
123
- "eval_precision": 0.8438956197576887,
124
- "eval_recall": 0.9055,
125
- "eval_runtime": 3.5266,
126
- "eval_samples_per_second": 15.312,
127
- "eval_steps_per_second": 1.134,
128
  "step": 250
129
  },
130
  {
131
- "epoch": 7.24,
132
- "eval_accuracy": 0.8290078089113458,
133
- "eval_f1": 0.8783914728682171,
134
- "eval_loss": 0.5714461207389832,
135
- "eval_precision": 0.8519736842105263,
136
- "eval_recall": 0.9065,
137
- "eval_runtime": 3.6124,
138
- "eval_samples_per_second": 14.948,
139
- "eval_steps_per_second": 1.107,
140
  "step": 275
141
  },
142
  {
143
- "epoch": 7.89,
144
- "eval_accuracy": 0.8280891134588884,
145
- "eval_f1": 0.8791048406713695,
146
- "eval_loss": 0.5853330492973328,
147
- "eval_precision": 0.8559924206537186,
148
- "eval_recall": 0.9035,
149
- "eval_runtime": 3.6008,
150
- "eval_samples_per_second": 14.997,
151
- "eval_steps_per_second": 1.111,
152
  "step": 300
153
  },
154
  {
155
- "epoch": 8.55,
156
- "eval_accuracy": 0.8389986219568213,
157
- "eval_f1": 0.8807785888077859,
158
- "eval_loss": 0.570177435874939,
159
- "eval_precision": 0.8578199052132701,
160
- "eval_recall": 0.905,
161
- "eval_runtime": 3.5637,
162
- "eval_samples_per_second": 15.153,
163
- "eval_steps_per_second": 1.122,
164
  "step": 325
165
  },
166
  {
167
- "epoch": 9.21,
168
- "eval_accuracy": 0.8418695452457511,
169
- "eval_f1": 0.8775261748234722,
170
- "eval_loss": 0.5666728019714355,
171
- "eval_precision": 0.8552444233507357,
172
- "eval_recall": 0.901,
173
- "eval_runtime": 3.5809,
174
- "eval_samples_per_second": 15.08,
175
- "eval_steps_per_second": 1.117,
176
  "step": 350
177
  },
178
  {
179
- "epoch": 9.87,
180
- "eval_accuracy": 0.8338309600367478,
181
- "eval_f1": 0.8786773644541697,
182
- "eval_loss": 0.5793057084083557,
183
- "eval_precision": 0.8551822053951728,
184
- "eval_recall": 0.9035,
185
- "eval_runtime": 3.5472,
186
- "eval_samples_per_second": 15.223,
187
- "eval_steps_per_second": 1.128,
188
  "step": 375
189
  },
190
  {
191
- "epoch": 10.53,
192
- "eval_accuracy": 0.833371612310519,
193
- "eval_f1": 0.8794946550048591,
194
- "eval_loss": 0.5784164071083069,
195
- "eval_precision": 0.8553875236294896,
196
- "eval_recall": 0.905,
197
- "eval_runtime": 3.5919,
198
- "eval_samples_per_second": 15.034,
199
- "eval_steps_per_second": 1.114,
200
  "step": 400
201
  },
202
  {
203
- "epoch": 10.53,
204
  "step": 400,
205
- "total_flos": 416496442306560.0,
206
- "train_loss": 0.50620361328125,
207
- "train_runtime": 250.4274,
208
- "train_samples_per_second": 6.389,
209
- "train_steps_per_second": 1.597
210
  }
211
  ],
212
  "max_steps": 400,
213
- "num_train_epochs": 11,
214
- "total_flos": 416496442306560.0,
215
  "trial_name": null,
216
  "trial_params": null
217
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 16.0,
5
  "global_step": 400,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
+ "epoch": 1.0,
12
+ "eval_accuracy": 0.6092099219108865,
13
+ "eval_f1": 0.43809958885335776,
14
+ "eval_loss": 1.2830967903137207,
15
+ "eval_precision": 0.40328006728343146,
16
+ "eval_recall": 0.4795,
17
+ "eval_runtime": 3.5046,
18
+ "eval_samples_per_second": 15.408,
19
+ "eval_steps_per_second": 1.141,
20
  "step": 25
21
  },
22
  {
23
+ "epoch": 2.0,
24
+ "eval_accuracy": 0.7748047772163528,
25
+ "eval_f1": 0.7586042065009561,
26
+ "eval_loss": 0.8177912831306458,
27
+ "eval_precision": 0.7266483516483516,
28
+ "eval_recall": 0.7935,
29
+ "eval_runtime": 3.5776,
30
+ "eval_samples_per_second": 15.094,
31
+ "eval_steps_per_second": 1.118,
32
  "step": 50
33
  },
34
  {
35
+ "epoch": 3.0,
36
+ "eval_accuracy": 0.7990353697749196,
37
+ "eval_f1": 0.814344962185899,
38
+ "eval_loss": 0.6842699646949768,
39
+ "eval_precision": 0.7951405431157694,
40
+ "eval_recall": 0.8345,
41
+ "eval_runtime": 3.5751,
42
+ "eval_samples_per_second": 15.104,
43
+ "eval_steps_per_second": 1.119,
44
  "step": 75
45
  },
46
  {
47
+ "epoch": 4.0,
48
+ "eval_accuracy": 0.8161460725769407,
49
+ "eval_f1": 0.8306801736613604,
50
+ "eval_loss": 0.6316555142402649,
51
+ "eval_precision": 0.8024231127679403,
52
+ "eval_recall": 0.861,
53
+ "eval_runtime": 3.6093,
54
+ "eval_samples_per_second": 14.962,
55
+ "eval_steps_per_second": 1.108,
56
  "step": 100
57
  },
58
  {
59
+ "epoch": 5.0,
60
+ "eval_accuracy": 0.8233807992650436,
61
+ "eval_f1": 0.8600191754554171,
62
+ "eval_loss": 0.5964206457138062,
63
+ "eval_precision": 0.8259668508287292,
64
+ "eval_recall": 0.897,
65
+ "eval_runtime": 3.5003,
66
+ "eval_samples_per_second": 15.427,
67
+ "eval_steps_per_second": 1.143,
68
  "step": 125
69
  },
70
  {
71
+ "epoch": 6.0,
72
+ "eval_accuracy": 0.8207395498392283,
73
+ "eval_f1": 0.8444552293132735,
74
+ "eval_loss": 0.6049793362617493,
75
+ "eval_precision": 0.8203677510608204,
76
+ "eval_recall": 0.87,
77
+ "eval_runtime": 3.5639,
78
+ "eval_samples_per_second": 15.152,
79
+ "eval_steps_per_second": 1.122,
80
  "step": 150
81
  },
82
  {
83
+ "epoch": 7.0,
84
+ "eval_accuracy": 0.8168350941662839,
85
+ "eval_f1": 0.8474740149867052,
86
+ "eval_loss": 0.628131091594696,
87
+ "eval_precision": 0.8203088441740758,
88
+ "eval_recall": 0.8765,
89
+ "eval_runtime": 3.5404,
90
+ "eval_samples_per_second": 15.252,
91
+ "eval_steps_per_second": 1.13,
92
  "step": 175
93
  },
94
  {
95
+ "epoch": 8.0,
96
+ "eval_accuracy": 0.8234956361966008,
97
+ "eval_f1": 0.8708504967288586,
98
+ "eval_loss": 0.6227801442146301,
99
+ "eval_precision": 0.844851904090268,
100
+ "eval_recall": 0.8985,
101
+ "eval_runtime": 3.5716,
102
+ "eval_samples_per_second": 15.119,
103
+ "eval_steps_per_second": 1.12,
104
  "step": 200
105
  },
106
  {
107
+ "epoch": 9.0,
108
+ "eval_accuracy": 0.826596233348645,
109
+ "eval_f1": 0.8566561207106351,
110
+ "eval_loss": 0.621345579624176,
111
+ "eval_precision": 0.8345187292555714,
112
+ "eval_recall": 0.88,
113
+ "eval_runtime": 3.5239,
114
+ "eval_samples_per_second": 15.324,
115
+ "eval_steps_per_second": 1.135,
116
  "step": 225
117
  },
118
  {
119
+ "epoch": 10.0,
120
+ "eval_accuracy": 0.8356683509416628,
121
+ "eval_f1": 0.8702401164200824,
122
+ "eval_loss": 0.6172508597373962,
123
+ "eval_precision": 0.8450306170513424,
124
+ "eval_recall": 0.897,
125
+ "eval_runtime": 3.508,
126
+ "eval_samples_per_second": 15.394,
127
+ "eval_steps_per_second": 1.14,
128
  "step": 250
129
  },
130
  {
131
+ "epoch": 11.0,
132
+ "eval_accuracy": 0.8299265043638034,
133
+ "eval_f1": 0.8633826741082262,
134
+ "eval_loss": 0.6476383209228516,
135
+ "eval_precision": 0.8387553041018387,
136
+ "eval_recall": 0.8895,
137
+ "eval_runtime": 3.4928,
138
+ "eval_samples_per_second": 15.46,
139
+ "eval_steps_per_second": 1.145,
140
  "step": 275
141
  },
142
  {
143
+ "epoch": 12.0,
144
+ "eval_accuracy": 0.838194763435921,
145
+ "eval_f1": 0.8761018609206659,
146
+ "eval_loss": 0.6358577609062195,
147
+ "eval_precision": 0.8584452975047985,
148
+ "eval_recall": 0.8945,
149
+ "eval_runtime": 3.4999,
150
+ "eval_samples_per_second": 15.429,
151
+ "eval_steps_per_second": 1.143,
152
  "step": 300
153
  },
154
  {
155
+ "epoch": 13.0,
156
+ "eval_accuracy": 0.8394579696830501,
157
+ "eval_f1": 0.8911815278801277,
158
+ "eval_loss": 0.6469025611877441,
159
+ "eval_precision": 0.8759053597295993,
160
+ "eval_recall": 0.907,
161
+ "eval_runtime": 3.5529,
162
+ "eval_samples_per_second": 15.199,
163
+ "eval_steps_per_second": 1.126,
164
  "step": 325
165
  },
166
  {
167
+ "epoch": 14.0,
168
+ "eval_accuracy": 0.8372760679834634,
169
+ "eval_f1": 0.8879606879606879,
170
+ "eval_loss": 0.651043176651001,
171
+ "eval_precision": 0.8729468599033816,
172
+ "eval_recall": 0.9035,
173
+ "eval_runtime": 3.5289,
174
+ "eval_samples_per_second": 15.302,
175
+ "eval_steps_per_second": 1.133,
176
  "step": 350
177
  },
178
  {
179
+ "epoch": 15.0,
180
+ "eval_accuracy": 0.8354386770785485,
181
+ "eval_f1": 0.8834476003917728,
182
+ "eval_loss": 0.6554981470108032,
183
+ "eval_precision": 0.8656429942418427,
184
+ "eval_recall": 0.902,
185
+ "eval_runtime": 3.5307,
186
+ "eval_samples_per_second": 15.294,
187
+ "eval_steps_per_second": 1.133,
188
  "step": 375
189
  },
190
  {
191
+ "epoch": 16.0,
192
+ "eval_accuracy": 0.8368167202572347,
193
+ "eval_f1": 0.8891074502089993,
194
+ "eval_loss": 0.6541090607643127,
195
+ "eval_precision": 0.8746976294146106,
196
+ "eval_recall": 0.904,
197
+ "eval_runtime": 3.5967,
198
+ "eval_samples_per_second": 15.014,
199
+ "eval_steps_per_second": 1.112,
200
  "step": 400
201
  },
202
  {
203
+ "epoch": 16.0,
204
  "step": 400,
205
+ "total_flos": 632652823756800.0,
206
+ "train_loss": 0.43604583740234376,
207
+ "train_runtime": 307.9087,
208
+ "train_samples_per_second": 7.795,
209
+ "train_steps_per_second": 1.299
210
  }
211
  ],
212
  "max_steps": 400,
213
+ "num_train_epochs": 16,
214
+ "total_flos": 632652823756800.0,
215
  "trial_name": null,
216
  "trial_params": null
217
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1b9738bcfd98ccbf71720f6b0ac66e4c20f1ded32caa28f7663edc931468381e
3
  size 2927
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37424644f6eb08c9f71a2b7231711a0ffb4a5590473227e020a76cca07225420
3
  size 2927