josesantorcuato commited on
Commit
b2440f1
1 Parent(s): c6f59e8

End of training

Browse files
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 4.180952380952381,
3
- "eval_accuracy": 0.8395061728395061,
4
- "eval_loss": 0.38502195477485657,
5
- "eval_runtime": 14.6842,
6
- "eval_samples_per_second": 11.032,
7
- "eval_steps_per_second": 1.43
8
  }
 
1
  {
2
+ "epoch": 4.184905660377359,
3
+ "eval_accuracy": 0.7978142076502732,
4
+ "eval_loss": 0.5372045040130615,
5
+ "eval_runtime": 17.466,
6
+ "eval_samples_per_second": 10.478,
7
+ "eval_steps_per_second": 1.317
8
  }
runs/Oct29_18-26-07_7c56bb07786b/events.out.tfevents.1730227090.7c56bb07786b.1505.1 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6690d3d3d8135b37dc4a7d03b3914f43d99d258eb1e258b70f5a5832c983c991
3
- size 411
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:edabf379647a0aee3e379ea584cc632455901091857770579f0e324bc7853741
3
+ size 734
test_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 4.180952380952381,
3
- "eval_accuracy": 0.8395061728395061,
4
- "eval_loss": 0.38502195477485657,
5
- "eval_runtime": 14.6842,
6
- "eval_samples_per_second": 11.032,
7
- "eval_steps_per_second": 1.43
8
  }
 
1
  {
2
+ "epoch": 4.184905660377359,
3
+ "eval_accuracy": 0.7978142076502732,
4
+ "eval_loss": 0.5372045040130615,
5
+ "eval_runtime": 17.466,
6
+ "eval_samples_per_second": 10.478,
7
+ "eval_steps_per_second": 1.317
8
  }
trainer_state.json CHANGED
@@ -1,235 +1,270 @@
1
  {
2
- "best_metric": 0.9066666666666666,
3
- "best_model_checkpoint": "videomae-base-finetuned-ucf101-subset/checkpoint-210",
4
- "epoch": 4.180952380952381,
5
  "eval_steps": 500,
6
- "global_step": 210,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.047619047619047616,
13
- "grad_norm": 7.849704265594482,
14
- "learning_rate": 2.380952380952381e-05,
15
- "loss": 2.3976,
16
  "step": 10
17
  },
18
  {
19
- "epoch": 0.09523809523809523,
20
- "grad_norm": 7.76956033706665,
21
- "learning_rate": 4.761904761904762e-05,
22
- "loss": 2.3357,
23
  "step": 20
24
  },
25
  {
26
- "epoch": 0.14285714285714285,
27
- "grad_norm": 15.013307571411133,
28
- "learning_rate": 4.761904761904762e-05,
29
- "loss": 2.282,
30
  "step": 30
31
  },
32
  {
33
- "epoch": 0.19047619047619047,
34
- "grad_norm": 7.262270450592041,
35
- "learning_rate": 4.4973544973544974e-05,
36
- "loss": 2.0368,
37
  "step": 40
38
  },
39
  {
40
- "epoch": 0.20476190476190476,
41
- "eval_accuracy": 0.29333333333333333,
42
- "eval_loss": 2.0273361206054688,
43
- "eval_runtime": 7.264,
44
- "eval_samples_per_second": 10.325,
45
- "eval_steps_per_second": 1.377,
46
- "step": 43
47
  },
48
  {
49
- "epoch": 1.0333333333333334,
50
- "grad_norm": 8.3132963180542,
51
- "learning_rate": 4.232804232804233e-05,
52
- "loss": 1.947,
53
- "step": 50
 
 
54
  },
55
  {
56
- "epoch": 1.0809523809523809,
57
- "grad_norm": 14.42203426361084,
58
- "learning_rate": 3.968253968253968e-05,
59
- "loss": 1.594,
60
  "step": 60
61
  },
62
  {
63
- "epoch": 1.1285714285714286,
64
- "grad_norm": 11.382922172546387,
65
- "learning_rate": 3.7037037037037037e-05,
66
- "loss": 1.2509,
67
  "step": 70
68
  },
69
  {
70
- "epoch": 1.1761904761904762,
71
- "grad_norm": 9.775425910949707,
72
- "learning_rate": 3.439153439153439e-05,
73
- "loss": 1.1679,
74
  "step": 80
75
  },
76
  {
77
- "epoch": 1.2047619047619047,
78
- "eval_accuracy": 0.56,
79
- "eval_loss": 1.0918534994125366,
80
- "eval_runtime": 7.1505,
81
- "eval_samples_per_second": 10.489,
82
- "eval_steps_per_second": 1.398,
83
- "step": 86
84
- },
85
- {
86
- "epoch": 2.019047619047619,
87
- "grad_norm": 16.352764129638672,
88
- "learning_rate": 3.1746031746031745e-05,
89
- "loss": 1.0361,
90
  "step": 90
91
  },
92
  {
93
- "epoch": 2.066666666666667,
94
- "grad_norm": 5.250826358795166,
95
- "learning_rate": 2.91005291005291e-05,
96
- "loss": 0.7379,
97
  "step": 100
98
  },
99
  {
100
- "epoch": 2.1142857142857143,
101
- "grad_norm": 11.715250968933105,
102
- "learning_rate": 2.6455026455026456e-05,
103
- "loss": 0.7289,
104
- "step": 110
 
 
105
  },
106
  {
107
- "epoch": 2.1619047619047618,
108
- "grad_norm": 16.230215072631836,
109
- "learning_rate": 2.380952380952381e-05,
110
- "loss": 0.5097,
111
- "step": 120
112
  },
113
  {
114
- "epoch": 2.204761904761905,
115
- "eval_accuracy": 0.8266666666666667,
116
- "eval_loss": 0.5804122090339661,
117
- "eval_runtime": 6.6096,
118
- "eval_samples_per_second": 11.347,
119
- "eval_steps_per_second": 1.513,
120
- "step": 129
121
  },
122
  {
123
- "epoch": 3.0047619047619047,
124
- "grad_norm": 18.1605224609375,
125
- "learning_rate": 2.1164021164021164e-05,
126
- "loss": 0.4609,
127
  "step": 130
128
  },
129
  {
130
- "epoch": 3.052380952380952,
131
- "grad_norm": 7.160711288452148,
132
- "learning_rate": 1.8518518518518518e-05,
133
- "loss": 0.3043,
134
  "step": 140
135
  },
136
  {
137
- "epoch": 3.1,
138
- "grad_norm": 14.182634353637695,
139
- "learning_rate": 1.5873015873015872e-05,
140
- "loss": 0.336,
141
  "step": 150
142
  },
143
  {
144
- "epoch": 3.1476190476190475,
145
- "grad_norm": 2.916877031326294,
146
- "learning_rate": 1.3227513227513228e-05,
147
- "loss": 0.2992,
148
  "step": 160
149
  },
150
  {
151
- "epoch": 3.1952380952380954,
152
- "grad_norm": 8.268482208251953,
153
- "learning_rate": 1.0582010582010582e-05,
154
- "loss": 0.2293,
155
- "step": 170
 
 
156
  },
157
  {
158
- "epoch": 3.204761904761905,
159
- "eval_accuracy": 0.8266666666666667,
160
- "eval_loss": 0.4854774475097656,
161
- "eval_runtime": 6.5331,
162
- "eval_samples_per_second": 11.48,
163
- "eval_steps_per_second": 1.531,
164
- "step": 172
165
  },
166
  {
167
- "epoch": 4.038095238095238,
168
- "grad_norm": 7.389148712158203,
169
- "learning_rate": 7.936507936507936e-06,
170
- "loss": 0.3101,
171
  "step": 180
172
  },
173
  {
174
- "epoch": 4.085714285714285,
175
- "grad_norm": 4.5858259201049805,
176
- "learning_rate": 5.291005291005291e-06,
177
- "loss": 0.1873,
178
  "step": 190
179
  },
180
  {
181
- "epoch": 4.133333333333334,
182
- "grad_norm": 2.300278425216675,
183
- "learning_rate": 2.6455026455026455e-06,
184
- "loss": 0.2008,
185
  "step": 200
186
  },
187
  {
188
- "epoch": 4.180952380952381,
189
- "grad_norm": 2.982243537902832,
190
- "learning_rate": 0.0,
191
- "loss": 0.1882,
192
  "step": 210
193
  },
194
  {
195
- "epoch": 4.180952380952381,
196
- "eval_accuracy": 0.9066666666666666,
197
- "eval_loss": 0.3435481786727905,
198
- "eval_runtime": 6.9489,
199
- "eval_samples_per_second": 10.793,
200
- "eval_steps_per_second": 1.439,
201
- "step": 210
202
  },
203
  {
204
- "epoch": 4.180952380952381,
205
- "step": 210,
206
- "total_flos": 2.0736178988651643e+18,
207
- "train_loss": 0.9781298257055737,
208
- "train_runtime": 354.3465,
209
- "train_samples_per_second": 4.741,
210
- "train_steps_per_second": 0.593
211
  },
212
  {
213
- "epoch": 4.180952380952381,
214
- "eval_accuracy": 0.8395061728395061,
215
- "eval_loss": 0.3850219249725342,
216
- "eval_runtime": 60.1785,
217
- "eval_samples_per_second": 2.692,
218
- "eval_steps_per_second": 0.349,
219
- "step": 210
220
  },
221
  {
222
- "epoch": 4.180952380952381,
223
- "eval_accuracy": 0.8395061728395061,
224
- "eval_loss": 0.38502195477485657,
225
- "eval_runtime": 14.6842,
226
- "eval_samples_per_second": 11.032,
227
- "eval_steps_per_second": 1.43,
228
- "step": 210
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
229
  }
230
  ],
231
  "logging_steps": 10,
232
- "max_steps": 210,
233
  "num_input_tokens_seen": 0,
234
  "num_train_epochs": 9223372036854775807,
235
  "save_steps": 500,
@@ -245,7 +280,7 @@
245
  "attributes": {}
246
  }
247
  },
248
- "total_flos": 2.0736178988651643e+18,
249
  "train_batch_size": 8,
250
  "trial_name": null,
251
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.8452380952380952,
3
+ "best_model_checkpoint": "videomae-base-finetuned-ucf101-subset/checkpoint-265",
4
+ "epoch": 4.184905660377359,
5
  "eval_steps": 500,
6
+ "global_step": 265,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.03773584905660377,
13
+ "grad_norm": 8.033324241638184,
14
+ "learning_rate": 1.8518518518518518e-05,
15
+ "loss": 2.7609,
16
  "step": 10
17
  },
18
  {
19
+ "epoch": 0.07547169811320754,
20
+ "grad_norm": 7.882028102874756,
21
+ "learning_rate": 3.7037037037037037e-05,
22
+ "loss": 2.6513,
23
  "step": 20
24
  },
25
  {
26
+ "epoch": 0.11320754716981132,
27
+ "grad_norm": 8.582988739013672,
28
+ "learning_rate": 4.936974789915967e-05,
29
+ "loss": 2.5533,
30
  "step": 30
31
  },
32
  {
33
+ "epoch": 0.1509433962264151,
34
+ "grad_norm": 9.038335800170898,
35
+ "learning_rate": 4.726890756302521e-05,
36
+ "loss": 2.3765,
37
  "step": 40
38
  },
39
  {
40
+ "epoch": 0.18867924528301888,
41
+ "grad_norm": 10.162276268005371,
42
+ "learning_rate": 4.516806722689076e-05,
43
+ "loss": 2.2682,
44
+ "step": 50
 
 
45
  },
46
  {
47
+ "epoch": 0.2037735849056604,
48
+ "eval_accuracy": 0.32142857142857145,
49
+ "eval_loss": 2.0374372005462646,
50
+ "eval_runtime": 21.9106,
51
+ "eval_samples_per_second": 3.834,
52
+ "eval_steps_per_second": 0.502,
53
+ "step": 54
54
  },
55
  {
56
+ "epoch": 1.0226415094339623,
57
+ "grad_norm": 8.538351058959961,
58
+ "learning_rate": 4.3067226890756305e-05,
59
+ "loss": 1.9532,
60
  "step": 60
61
  },
62
  {
63
+ "epoch": 1.060377358490566,
64
+ "grad_norm": 8.961767196655273,
65
+ "learning_rate": 4.096638655462185e-05,
66
+ "loss": 1.5809,
67
  "step": 70
68
  },
69
  {
70
+ "epoch": 1.0981132075471698,
71
+ "grad_norm": 8.71930980682373,
72
+ "learning_rate": 3.88655462184874e-05,
73
+ "loss": 1.4653,
74
  "step": 80
75
  },
76
  {
77
+ "epoch": 1.1358490566037736,
78
+ "grad_norm": 11.393306732177734,
79
+ "learning_rate": 3.6764705882352945e-05,
80
+ "loss": 1.3126,
 
 
 
 
 
 
 
 
 
81
  "step": 90
82
  },
83
  {
84
+ "epoch": 1.1735849056603773,
85
+ "grad_norm": 8.893975257873535,
86
+ "learning_rate": 3.466386554621849e-05,
87
+ "loss": 1.0997,
88
  "step": 100
89
  },
90
  {
91
+ "epoch": 1.2037735849056603,
92
+ "eval_accuracy": 0.6666666666666666,
93
+ "eval_loss": 1.0029432773590088,
94
+ "eval_runtime": 7.8357,
95
+ "eval_samples_per_second": 10.72,
96
+ "eval_steps_per_second": 1.404,
97
+ "step": 108
98
  },
99
  {
100
+ "epoch": 2.0075471698113208,
101
+ "grad_norm": 8.756331443786621,
102
+ "learning_rate": 3.2563025210084034e-05,
103
+ "loss": 1.0144,
104
+ "step": 110
105
  },
106
  {
107
+ "epoch": 2.0452830188679245,
108
+ "grad_norm": 15.8713960647583,
109
+ "learning_rate": 3.0462184873949578e-05,
110
+ "loss": 0.9754,
111
+ "step": 120
 
 
112
  },
113
  {
114
+ "epoch": 2.0830188679245283,
115
+ "grad_norm": 25.470157623291016,
116
+ "learning_rate": 2.8361344537815126e-05,
117
+ "loss": 1.1946,
118
  "step": 130
119
  },
120
  {
121
+ "epoch": 2.120754716981132,
122
+ "grad_norm": 6.735860347747803,
123
+ "learning_rate": 2.6260504201680674e-05,
124
+ "loss": 0.782,
125
  "step": 140
126
  },
127
  {
128
+ "epoch": 2.158490566037736,
129
+ "grad_norm": 8.800786018371582,
130
+ "learning_rate": 2.415966386554622e-05,
131
+ "loss": 0.8976,
132
  "step": 150
133
  },
134
  {
135
+ "epoch": 2.1962264150943396,
136
+ "grad_norm": 8.588472366333008,
137
+ "learning_rate": 2.2058823529411766e-05,
138
+ "loss": 0.6996,
139
  "step": 160
140
  },
141
  {
142
+ "epoch": 2.2037735849056603,
143
+ "eval_accuracy": 0.7857142857142857,
144
+ "eval_loss": 0.7632536292076111,
145
+ "eval_runtime": 8.3664,
146
+ "eval_samples_per_second": 10.04,
147
+ "eval_steps_per_second": 1.315,
148
+ "step": 162
149
  },
150
  {
151
+ "epoch": 3.030188679245283,
152
+ "grad_norm": 7.108087539672852,
153
+ "learning_rate": 1.9957983193277314e-05,
154
+ "loss": 0.8053,
155
+ "step": 170
 
 
156
  },
157
  {
158
+ "epoch": 3.0679245283018868,
159
+ "grad_norm": 8.610198974609375,
160
+ "learning_rate": 1.785714285714286e-05,
161
+ "loss": 0.6667,
162
  "step": 180
163
  },
164
  {
165
+ "epoch": 3.1056603773584905,
166
+ "grad_norm": 11.434289932250977,
167
+ "learning_rate": 1.5756302521008403e-05,
168
+ "loss": 0.6006,
169
  "step": 190
170
  },
171
  {
172
+ "epoch": 3.1433962264150943,
173
+ "grad_norm": 4.931293487548828,
174
+ "learning_rate": 1.3655462184873949e-05,
175
+ "loss": 0.635,
176
  "step": 200
177
  },
178
  {
179
+ "epoch": 3.181132075471698,
180
+ "grad_norm": 6.236601829528809,
181
+ "learning_rate": 1.1554621848739497e-05,
182
+ "loss": 0.7031,
183
  "step": 210
184
  },
185
  {
186
+ "epoch": 3.2037735849056603,
187
+ "eval_accuracy": 0.7857142857142857,
188
+ "eval_loss": 0.5939908027648926,
189
+ "eval_runtime": 7.8516,
190
+ "eval_samples_per_second": 10.698,
191
+ "eval_steps_per_second": 1.401,
192
+ "step": 216
193
  },
194
  {
195
+ "epoch": 4.0150943396226415,
196
+ "grad_norm": 5.458017349243164,
197
+ "learning_rate": 9.453781512605041e-06,
198
+ "loss": 0.5998,
199
+ "step": 220
 
 
200
  },
201
  {
202
+ "epoch": 4.052830188679246,
203
+ "grad_norm": 4.542972087860107,
204
+ "learning_rate": 7.3529411764705884e-06,
205
+ "loss": 0.5022,
206
+ "step": 230
 
 
207
  },
208
  {
209
+ "epoch": 4.090566037735849,
210
+ "grad_norm": 17.187719345092773,
211
+ "learning_rate": 5.252100840336135e-06,
212
+ "loss": 0.4509,
213
+ "step": 240
214
+ },
215
+ {
216
+ "epoch": 4.128301886792453,
217
+ "grad_norm": 18.115018844604492,
218
+ "learning_rate": 3.1512605042016808e-06,
219
+ "loss": 0.5752,
220
+ "step": 250
221
+ },
222
+ {
223
+ "epoch": 4.166037735849057,
224
+ "grad_norm": 6.622856616973877,
225
+ "learning_rate": 1.0504201680672271e-06,
226
+ "loss": 0.4078,
227
+ "step": 260
228
+ },
229
+ {
230
+ "epoch": 4.184905660377359,
231
+ "eval_accuracy": 0.8452380952380952,
232
+ "eval_loss": 0.5297083854675293,
233
+ "eval_runtime": 8.7988,
234
+ "eval_samples_per_second": 9.547,
235
+ "eval_steps_per_second": 1.25,
236
+ "step": 265
237
+ },
238
+ {
239
+ "epoch": 4.184905660377359,
240
+ "step": 265,
241
+ "total_flos": 2.631970050168324e+18,
242
+ "train_loss": 1.195451885799192,
243
+ "train_runtime": 648.8571,
244
+ "train_samples_per_second": 3.267,
245
+ "train_steps_per_second": 0.408
246
+ },
247
+ {
248
+ "epoch": 4.184905660377359,
249
+ "eval_accuracy": 0.7978142076502732,
250
+ "eval_loss": 0.5372046828269958,
251
+ "eval_runtime": 49.961,
252
+ "eval_samples_per_second": 3.663,
253
+ "eval_steps_per_second": 0.46,
254
+ "step": 265
255
+ },
256
+ {
257
+ "epoch": 4.184905660377359,
258
+ "eval_accuracy": 0.7978142076502732,
259
+ "eval_loss": 0.5372045040130615,
260
+ "eval_runtime": 17.466,
261
+ "eval_samples_per_second": 10.478,
262
+ "eval_steps_per_second": 1.317,
263
+ "step": 265
264
  }
265
  ],
266
  "logging_steps": 10,
267
+ "max_steps": 265,
268
  "num_input_tokens_seen": 0,
269
  "num_train_epochs": 9223372036854775807,
270
  "save_steps": 500,
 
280
  "attributes": {}
281
  }
282
  },
283
+ "total_flos": 2.631970050168324e+18,
284
  "train_batch_size": 8,
285
  "trial_name": null,
286
  "trial_params": null