JoshuaKelleyDs commited on
Commit
b43ccf0
1 Parent(s): 98b0f03
README.md ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: other
3
+ base_model: google/mobilenet_v2_1.0_224
4
+ tags:
5
+ - generated_from_trainer
6
+ metrics:
7
+ - accuracy
8
+ model-index:
9
+ - name: doodle_mobilenet
10
+ results: []
11
+ ---
12
+
13
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
14
+ should probably proofread and complete it, then remove this comment. -->
15
+
16
+ # doodle_mobilenet
17
+
18
+ This model is a fine-tuned version of [google/mobilenet_v2_1.0_224](https://huggingface.co/google/mobilenet_v2_1.0_224) on an unknown dataset.
19
+ It achieves the following results on the evaluation set:
20
+ - Loss: 4.4124
21
+ - Accuracy: 0.3565
22
+
23
+ ## Model description
24
+
25
+ More information needed
26
+
27
+ ## Intended uses & limitations
28
+
29
+ More information needed
30
+
31
+ ## Training and evaluation data
32
+
33
+ More information needed
34
+
35
+ ## Training procedure
36
+
37
+ ### Training hyperparameters
38
+
39
+ The following hyperparameters were used during training:
40
+ - learning_rate: 0.0008
41
+ - train_batch_size: 512
42
+ - eval_batch_size: 512
43
+ - seed: 42
44
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
45
+ - lr_scheduler_type: linear
46
+ - num_epochs: 10
47
+ - mixed_precision_training: Native AMP
48
+
49
+ ### Training results
50
+
51
+ | Training Loss | Epoch | Step | Validation Loss | Accuracy |
52
+ |:-------------:|:------:|:-----:|:---------------:|:--------:|
53
+ | 1.4546 | 0.5688 | 5000 | 1.4383 | 0.6474 |
54
+ | 1.3759 | 1.1377 | 10000 | 1.3850 | 0.6610 |
55
+ | 1.3508 | 1.7065 | 15000 | 1.3163 | 0.6737 |
56
+ | 1.294 | 2.2753 | 20000 | 1.2832 | 0.6829 |
57
+ | 1.2811 | 2.8441 | 25000 | 1.2581 | 0.6881 |
58
+ | 1.2331 | 3.4130 | 30000 | 1.2387 | 0.6926 |
59
+ | 1.2276 | 3.9818 | 35000 | 1.2227 | 0.6978 |
60
+ | 1.1964 | 4.5506 | 40000 | 1.2196 | 0.6990 |
61
+ | 1.1498 | 5.1195 | 45000 | 1.1994 | 0.7036 |
62
+ | 1.1548 | 5.6883 | 50000 | 1.1900 | 0.7052 |
63
+ | 1.1232 | 6.2571 | 55000 | 1.1831 | 0.7075 |
64
+ | 1.1264 | 6.8259 | 60000 | 1.1695 | 0.7100 |
65
+ | 1.0896 | 7.3948 | 65000 | 1.1584 | 0.7128 |
66
+ | 1.0917 | 7.9636 | 70000 | 1.1535 | 0.7155 |
67
+ | 1.0654 | 8.5324 | 75000 | 1.1545 | 0.7144 |
68
+ | 1.0395 | 9.1013 | 80000 | 1.1471 | 0.7169 |
69
+ | 1.0383 | 9.6701 | 85000 | 1.1722 | 0.7136 |
70
+
71
+
72
+ ### Framework versions
73
+
74
+ - Transformers 4.40.0
75
+ - Pytorch 2.2.1
76
+ - Datasets 2.19.0
77
+ - Tokenizers 0.19.1
all_results.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 10.0,
3
+ "eval_accuracy": 0.35654,
4
+ "eval_loss": 4.412439346313477,
5
+ "eval_runtime": 16.0962,
6
+ "eval_samples_per_second": 15531.592,
7
+ "eval_steps_per_second": 30.38,
8
+ "total_flos": 5.6417821488e+17,
9
+ "train_loss": 1.2023330011465443,
10
+ "train_runtime": 3087.8654,
11
+ "train_samples_per_second": 14573.174,
12
+ "train_steps_per_second": 28.466
13
+ }
config.json ADDED
@@ -0,0 +1,719 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google/mobilenet_v2_1.0_224",
3
+ "architectures": [
4
+ "MobileNetV2ForImageClassification"
5
+ ],
6
+ "classifier_dropout_prob": 0.2,
7
+ "depth_divisible_by": 8,
8
+ "depth_multiplier": 1.0,
9
+ "expand_ratio": 6,
10
+ "finegrained_output": true,
11
+ "first_layer_is_expansion": true,
12
+ "hidden_act": "relu6",
13
+ "id2label": {
14
+ "0": "aircraft carrier",
15
+ "1": "airplane",
16
+ "10": "asparagus",
17
+ "100": "dumbbell",
18
+ "101": "ear",
19
+ "102": "elbow",
20
+ "103": "elephant",
21
+ "104": "envelope",
22
+ "105": "eraser",
23
+ "106": "eye",
24
+ "107": "eyeglasses",
25
+ "108": "face",
26
+ "109": "fan",
27
+ "11": "axe",
28
+ "110": "feather",
29
+ "111": "fence",
30
+ "112": "finger",
31
+ "113": "fire hydrant",
32
+ "114": "fireplace",
33
+ "115": "firetruck",
34
+ "116": "fish",
35
+ "117": "flamingo",
36
+ "118": "flashlight",
37
+ "119": "flip flops",
38
+ "12": "backpack",
39
+ "120": "floor lamp",
40
+ "121": "flower",
41
+ "122": "flying saucer",
42
+ "123": "foot",
43
+ "124": "fork",
44
+ "125": "frog",
45
+ "126": "frying pan",
46
+ "127": "garden hose",
47
+ "128": "garden",
48
+ "129": "giraffe",
49
+ "13": "banana",
50
+ "130": "goatee",
51
+ "131": "golf club",
52
+ "132": "grapes",
53
+ "133": "grass",
54
+ "134": "guitar",
55
+ "135": "hamburger",
56
+ "136": "hammer",
57
+ "137": "hand",
58
+ "138": "harp",
59
+ "139": "hat",
60
+ "14": "bandage",
61
+ "140": "headphones",
62
+ "141": "hedgehog",
63
+ "142": "helicopter",
64
+ "143": "helmet",
65
+ "144": "hexagon",
66
+ "145": "hockey puck",
67
+ "146": "hockey stick",
68
+ "147": "horse",
69
+ "148": "hospital",
70
+ "149": "hot air balloon",
71
+ "15": "barn",
72
+ "150": "hot dog",
73
+ "151": "hot tub",
74
+ "152": "hourglass",
75
+ "153": "house plant",
76
+ "154": "house",
77
+ "155": "hurricane",
78
+ "156": "ice cream",
79
+ "157": "jacket",
80
+ "158": "jail",
81
+ "159": "kangaroo",
82
+ "16": "baseball bat",
83
+ "160": "key",
84
+ "161": "keyboard",
85
+ "162": "knee",
86
+ "163": "knife",
87
+ "164": "ladder",
88
+ "165": "lantern",
89
+ "166": "laptop",
90
+ "167": "leaf",
91
+ "168": "leg",
92
+ "169": "light bulb",
93
+ "17": "baseball",
94
+ "170": "lighter",
95
+ "171": "lighthouse",
96
+ "172": "lightning",
97
+ "173": "line",
98
+ "174": "lion",
99
+ "175": "lipstick",
100
+ "176": "lobster",
101
+ "177": "lollipop",
102
+ "178": "mailbox",
103
+ "179": "map",
104
+ "18": "basket",
105
+ "180": "marker",
106
+ "181": "matches",
107
+ "182": "megaphone",
108
+ "183": "mermaid",
109
+ "184": "microphone",
110
+ "185": "microwave",
111
+ "186": "monkey",
112
+ "187": "moon",
113
+ "188": "mosquito",
114
+ "189": "motorbike",
115
+ "19": "basketball",
116
+ "190": "mountain",
117
+ "191": "mouse",
118
+ "192": "moustache",
119
+ "193": "mouth",
120
+ "194": "mug",
121
+ "195": "mushroom",
122
+ "196": "nail",
123
+ "197": "necklace",
124
+ "198": "nose",
125
+ "199": "ocean",
126
+ "2": "alarm clock",
127
+ "20": "bat",
128
+ "200": "octagon",
129
+ "201": "octopus",
130
+ "202": "onion",
131
+ "203": "oven",
132
+ "204": "owl",
133
+ "205": "paint can",
134
+ "206": "paintbrush",
135
+ "207": "palm tree",
136
+ "208": "panda",
137
+ "209": "pants",
138
+ "21": "bathtub",
139
+ "210": "paper clip",
140
+ "211": "parachute",
141
+ "212": "parrot",
142
+ "213": "passport",
143
+ "214": "peanut",
144
+ "215": "pear",
145
+ "216": "peas",
146
+ "217": "pencil",
147
+ "218": "penguin",
148
+ "219": "piano",
149
+ "22": "beach",
150
+ "220": "pickup truck",
151
+ "221": "picture frame",
152
+ "222": "pig",
153
+ "223": "pillow",
154
+ "224": "pineapple",
155
+ "225": "pizza",
156
+ "226": "pliers",
157
+ "227": "police car",
158
+ "228": "pond",
159
+ "229": "pool",
160
+ "23": "bear",
161
+ "230": "popsicle",
162
+ "231": "postcard",
163
+ "232": "potato",
164
+ "233": "power outlet",
165
+ "234": "purse",
166
+ "235": "rabbit",
167
+ "236": "raccoon",
168
+ "237": "radio",
169
+ "238": "rain",
170
+ "239": "rainbow",
171
+ "24": "beard",
172
+ "240": "rake",
173
+ "241": "remote control",
174
+ "242": "rhinoceros",
175
+ "243": "rifle",
176
+ "244": "river",
177
+ "245": "roller coaster",
178
+ "246": "rollerskates",
179
+ "247": "sailboat",
180
+ "248": "sandwich",
181
+ "249": "saw",
182
+ "25": "bed",
183
+ "250": "saxophone",
184
+ "251": "school bus",
185
+ "252": "scissors",
186
+ "253": "scorpion",
187
+ "254": "screwdriver",
188
+ "255": "sea turtle",
189
+ "256": "see saw",
190
+ "257": "shark",
191
+ "258": "sheep",
192
+ "259": "shoe",
193
+ "26": "bee",
194
+ "260": "shorts",
195
+ "261": "shovel",
196
+ "262": "sink",
197
+ "263": "skateboard",
198
+ "264": "skull",
199
+ "265": "skyscraper",
200
+ "266": "sleeping bag",
201
+ "267": "smiley face",
202
+ "268": "snail",
203
+ "269": "snake",
204
+ "27": "belt",
205
+ "270": "snorkel",
206
+ "271": "snowflake",
207
+ "272": "snowman",
208
+ "273": "soccer ball",
209
+ "274": "sock",
210
+ "275": "speedboat",
211
+ "276": "spider",
212
+ "277": "spoon",
213
+ "278": "spreadsheet",
214
+ "279": "square",
215
+ "28": "bench",
216
+ "280": "squiggle",
217
+ "281": "squirrel",
218
+ "282": "stairs",
219
+ "283": "star",
220
+ "284": "steak",
221
+ "285": "stereo",
222
+ "286": "stethoscope",
223
+ "287": "stitches",
224
+ "288": "stop sign",
225
+ "289": "stove",
226
+ "29": "bicycle",
227
+ "290": "strawberry",
228
+ "291": "streetlight",
229
+ "292": "string bean",
230
+ "293": "submarine",
231
+ "294": "suitcase",
232
+ "295": "sun",
233
+ "296": "swan",
234
+ "297": "sweater",
235
+ "298": "swing set",
236
+ "299": "sword",
237
+ "3": "ambulance",
238
+ "30": "binoculars",
239
+ "300": "syringe",
240
+ "301": "t-shirt",
241
+ "302": "table",
242
+ "303": "teapot",
243
+ "304": "teddy-bear",
244
+ "305": "telephone",
245
+ "306": "television",
246
+ "307": "tennis racquet",
247
+ "308": "tent",
248
+ "309": "The Eiffel Tower",
249
+ "31": "bird",
250
+ "310": "The Great Wall of China",
251
+ "311": "The Mona Lisa",
252
+ "312": "tiger",
253
+ "313": "toaster",
254
+ "314": "toe",
255
+ "315": "toilet",
256
+ "316": "tooth",
257
+ "317": "toothbrush",
258
+ "318": "toothpaste",
259
+ "319": "tornado",
260
+ "32": "birthday cake",
261
+ "320": "tractor",
262
+ "321": "traffic light",
263
+ "322": "train",
264
+ "323": "tree",
265
+ "324": "triangle",
266
+ "325": "trombone",
267
+ "326": "truck",
268
+ "327": "trumpet",
269
+ "328": "umbrella",
270
+ "329": "underwear",
271
+ "33": "blackberry",
272
+ "330": "van",
273
+ "331": "vase",
274
+ "332": "violin",
275
+ "333": "washing machine",
276
+ "334": "watermelon",
277
+ "335": "waterslide",
278
+ "336": "whale",
279
+ "337": "wheel",
280
+ "338": "windmill",
281
+ "339": "wine bottle",
282
+ "34": "blueberry",
283
+ "340": "wine glass",
284
+ "341": "wristwatch",
285
+ "342": "yoga",
286
+ "343": "zebra",
287
+ "344": "zigzag",
288
+ "35": "book",
289
+ "36": "boomerang",
290
+ "37": "bottlecap",
291
+ "38": "bowtie",
292
+ "39": "bracelet",
293
+ "4": "angel",
294
+ "40": "brain",
295
+ "41": "bread",
296
+ "42": "bridge",
297
+ "43": "broccoli",
298
+ "44": "broom",
299
+ "45": "bucket",
300
+ "46": "bulldozer",
301
+ "47": "bus",
302
+ "48": "bush",
303
+ "49": "butterfly",
304
+ "5": "animal migration",
305
+ "50": "cactus",
306
+ "51": "cake",
307
+ "52": "calculator",
308
+ "53": "calendar",
309
+ "54": "camel",
310
+ "55": "camera",
311
+ "56": "camouflage",
312
+ "57": "campfire",
313
+ "58": "candle",
314
+ "59": "cannon",
315
+ "6": "ant",
316
+ "60": "canoe",
317
+ "61": "car",
318
+ "62": "carrot",
319
+ "63": "castle",
320
+ "64": "cat",
321
+ "65": "ceiling fan",
322
+ "66": "cell phone",
323
+ "67": "cello",
324
+ "68": "chair",
325
+ "69": "chandelier",
326
+ "7": "anvil",
327
+ "70": "church",
328
+ "71": "circle",
329
+ "72": "clarinet",
330
+ "73": "clock",
331
+ "74": "cloud",
332
+ "75": "coffee cup",
333
+ "76": "compass",
334
+ "77": "computer",
335
+ "78": "cookie",
336
+ "79": "cooler",
337
+ "8": "apple",
338
+ "80": "couch",
339
+ "81": "cow",
340
+ "82": "crab",
341
+ "83": "crayon",
342
+ "84": "crocodile",
343
+ "85": "crown",
344
+ "86": "cruise ship",
345
+ "87": "cup",
346
+ "88": "diamond",
347
+ "89": "dishwasher",
348
+ "9": "arm",
349
+ "90": "diving board",
350
+ "91": "dog",
351
+ "92": "dolphin",
352
+ "93": "donut",
353
+ "94": "door",
354
+ "95": "dragon",
355
+ "96": "dresser",
356
+ "97": "drill",
357
+ "98": "drums",
358
+ "99": "duck"
359
+ },
360
+ "image_size": 28,
361
+ "initializer_range": 0.02,
362
+ "label2id": {
363
+ "The Eiffel Tower": "309",
364
+ "The Great Wall of China": "310",
365
+ "The Mona Lisa": "311",
366
+ "aircraft carrier": "0",
367
+ "airplane": "1",
368
+ "alarm clock": "2",
369
+ "ambulance": "3",
370
+ "angel": "4",
371
+ "animal migration": "5",
372
+ "ant": "6",
373
+ "anvil": "7",
374
+ "apple": "8",
375
+ "arm": "9",
376
+ "asparagus": "10",
377
+ "axe": "11",
378
+ "backpack": "12",
379
+ "banana": "13",
380
+ "bandage": "14",
381
+ "barn": "15",
382
+ "baseball": "17",
383
+ "baseball bat": "16",
384
+ "basket": "18",
385
+ "basketball": "19",
386
+ "bat": "20",
387
+ "bathtub": "21",
388
+ "beach": "22",
389
+ "bear": "23",
390
+ "beard": "24",
391
+ "bed": "25",
392
+ "bee": "26",
393
+ "belt": "27",
394
+ "bench": "28",
395
+ "bicycle": "29",
396
+ "binoculars": "30",
397
+ "bird": "31",
398
+ "birthday cake": "32",
399
+ "blackberry": "33",
400
+ "blueberry": "34",
401
+ "book": "35",
402
+ "boomerang": "36",
403
+ "bottlecap": "37",
404
+ "bowtie": "38",
405
+ "bracelet": "39",
406
+ "brain": "40",
407
+ "bread": "41",
408
+ "bridge": "42",
409
+ "broccoli": "43",
410
+ "broom": "44",
411
+ "bucket": "45",
412
+ "bulldozer": "46",
413
+ "bus": "47",
414
+ "bush": "48",
415
+ "butterfly": "49",
416
+ "cactus": "50",
417
+ "cake": "51",
418
+ "calculator": "52",
419
+ "calendar": "53",
420
+ "camel": "54",
421
+ "camera": "55",
422
+ "camouflage": "56",
423
+ "campfire": "57",
424
+ "candle": "58",
425
+ "cannon": "59",
426
+ "canoe": "60",
427
+ "car": "61",
428
+ "carrot": "62",
429
+ "castle": "63",
430
+ "cat": "64",
431
+ "ceiling fan": "65",
432
+ "cell phone": "66",
433
+ "cello": "67",
434
+ "chair": "68",
435
+ "chandelier": "69",
436
+ "church": "70",
437
+ "circle": "71",
438
+ "clarinet": "72",
439
+ "clock": "73",
440
+ "cloud": "74",
441
+ "coffee cup": "75",
442
+ "compass": "76",
443
+ "computer": "77",
444
+ "cookie": "78",
445
+ "cooler": "79",
446
+ "couch": "80",
447
+ "cow": "81",
448
+ "crab": "82",
449
+ "crayon": "83",
450
+ "crocodile": "84",
451
+ "crown": "85",
452
+ "cruise ship": "86",
453
+ "cup": "87",
454
+ "diamond": "88",
455
+ "dishwasher": "89",
456
+ "diving board": "90",
457
+ "dog": "91",
458
+ "dolphin": "92",
459
+ "donut": "93",
460
+ "door": "94",
461
+ "dragon": "95",
462
+ "dresser": "96",
463
+ "drill": "97",
464
+ "drums": "98",
465
+ "duck": "99",
466
+ "dumbbell": "100",
467
+ "ear": "101",
468
+ "elbow": "102",
469
+ "elephant": "103",
470
+ "envelope": "104",
471
+ "eraser": "105",
472
+ "eye": "106",
473
+ "eyeglasses": "107",
474
+ "face": "108",
475
+ "fan": "109",
476
+ "feather": "110",
477
+ "fence": "111",
478
+ "finger": "112",
479
+ "fire hydrant": "113",
480
+ "fireplace": "114",
481
+ "firetruck": "115",
482
+ "fish": "116",
483
+ "flamingo": "117",
484
+ "flashlight": "118",
485
+ "flip flops": "119",
486
+ "floor lamp": "120",
487
+ "flower": "121",
488
+ "flying saucer": "122",
489
+ "foot": "123",
490
+ "fork": "124",
491
+ "frog": "125",
492
+ "frying pan": "126",
493
+ "garden": "128",
494
+ "garden hose": "127",
495
+ "giraffe": "129",
496
+ "goatee": "130",
497
+ "golf club": "131",
498
+ "grapes": "132",
499
+ "grass": "133",
500
+ "guitar": "134",
501
+ "hamburger": "135",
502
+ "hammer": "136",
503
+ "hand": "137",
504
+ "harp": "138",
505
+ "hat": "139",
506
+ "headphones": "140",
507
+ "hedgehog": "141",
508
+ "helicopter": "142",
509
+ "helmet": "143",
510
+ "hexagon": "144",
511
+ "hockey puck": "145",
512
+ "hockey stick": "146",
513
+ "horse": "147",
514
+ "hospital": "148",
515
+ "hot air balloon": "149",
516
+ "hot dog": "150",
517
+ "hot tub": "151",
518
+ "hourglass": "152",
519
+ "house": "154",
520
+ "house plant": "153",
521
+ "hurricane": "155",
522
+ "ice cream": "156",
523
+ "jacket": "157",
524
+ "jail": "158",
525
+ "kangaroo": "159",
526
+ "key": "160",
527
+ "keyboard": "161",
528
+ "knee": "162",
529
+ "knife": "163",
530
+ "ladder": "164",
531
+ "lantern": "165",
532
+ "laptop": "166",
533
+ "leaf": "167",
534
+ "leg": "168",
535
+ "light bulb": "169",
536
+ "lighter": "170",
537
+ "lighthouse": "171",
538
+ "lightning": "172",
539
+ "line": "173",
540
+ "lion": "174",
541
+ "lipstick": "175",
542
+ "lobster": "176",
543
+ "lollipop": "177",
544
+ "mailbox": "178",
545
+ "map": "179",
546
+ "marker": "180",
547
+ "matches": "181",
548
+ "megaphone": "182",
549
+ "mermaid": "183",
550
+ "microphone": "184",
551
+ "microwave": "185",
552
+ "monkey": "186",
553
+ "moon": "187",
554
+ "mosquito": "188",
555
+ "motorbike": "189",
556
+ "mountain": "190",
557
+ "mouse": "191",
558
+ "moustache": "192",
559
+ "mouth": "193",
560
+ "mug": "194",
561
+ "mushroom": "195",
562
+ "nail": "196",
563
+ "necklace": "197",
564
+ "nose": "198",
565
+ "ocean": "199",
566
+ "octagon": "200",
567
+ "octopus": "201",
568
+ "onion": "202",
569
+ "oven": "203",
570
+ "owl": "204",
571
+ "paint can": "205",
572
+ "paintbrush": "206",
573
+ "palm tree": "207",
574
+ "panda": "208",
575
+ "pants": "209",
576
+ "paper clip": "210",
577
+ "parachute": "211",
578
+ "parrot": "212",
579
+ "passport": "213",
580
+ "peanut": "214",
581
+ "pear": "215",
582
+ "peas": "216",
583
+ "pencil": "217",
584
+ "penguin": "218",
585
+ "piano": "219",
586
+ "pickup truck": "220",
587
+ "picture frame": "221",
588
+ "pig": "222",
589
+ "pillow": "223",
590
+ "pineapple": "224",
591
+ "pizza": "225",
592
+ "pliers": "226",
593
+ "police car": "227",
594
+ "pond": "228",
595
+ "pool": "229",
596
+ "popsicle": "230",
597
+ "postcard": "231",
598
+ "potato": "232",
599
+ "power outlet": "233",
600
+ "purse": "234",
601
+ "rabbit": "235",
602
+ "raccoon": "236",
603
+ "radio": "237",
604
+ "rain": "238",
605
+ "rainbow": "239",
606
+ "rake": "240",
607
+ "remote control": "241",
608
+ "rhinoceros": "242",
609
+ "rifle": "243",
610
+ "river": "244",
611
+ "roller coaster": "245",
612
+ "rollerskates": "246",
613
+ "sailboat": "247",
614
+ "sandwich": "248",
615
+ "saw": "249",
616
+ "saxophone": "250",
617
+ "school bus": "251",
618
+ "scissors": "252",
619
+ "scorpion": "253",
620
+ "screwdriver": "254",
621
+ "sea turtle": "255",
622
+ "see saw": "256",
623
+ "shark": "257",
624
+ "sheep": "258",
625
+ "shoe": "259",
626
+ "shorts": "260",
627
+ "shovel": "261",
628
+ "sink": "262",
629
+ "skateboard": "263",
630
+ "skull": "264",
631
+ "skyscraper": "265",
632
+ "sleeping bag": "266",
633
+ "smiley face": "267",
634
+ "snail": "268",
635
+ "snake": "269",
636
+ "snorkel": "270",
637
+ "snowflake": "271",
638
+ "snowman": "272",
639
+ "soccer ball": "273",
640
+ "sock": "274",
641
+ "speedboat": "275",
642
+ "spider": "276",
643
+ "spoon": "277",
644
+ "spreadsheet": "278",
645
+ "square": "279",
646
+ "squiggle": "280",
647
+ "squirrel": "281",
648
+ "stairs": "282",
649
+ "star": "283",
650
+ "steak": "284",
651
+ "stereo": "285",
652
+ "stethoscope": "286",
653
+ "stitches": "287",
654
+ "stop sign": "288",
655
+ "stove": "289",
656
+ "strawberry": "290",
657
+ "streetlight": "291",
658
+ "string bean": "292",
659
+ "submarine": "293",
660
+ "suitcase": "294",
661
+ "sun": "295",
662
+ "swan": "296",
663
+ "sweater": "297",
664
+ "swing set": "298",
665
+ "sword": "299",
666
+ "syringe": "300",
667
+ "t-shirt": "301",
668
+ "table": "302",
669
+ "teapot": "303",
670
+ "teddy-bear": "304",
671
+ "telephone": "305",
672
+ "television": "306",
673
+ "tennis racquet": "307",
674
+ "tent": "308",
675
+ "tiger": "312",
676
+ "toaster": "313",
677
+ "toe": "314",
678
+ "toilet": "315",
679
+ "tooth": "316",
680
+ "toothbrush": "317",
681
+ "toothpaste": "318",
682
+ "tornado": "319",
683
+ "tractor": "320",
684
+ "traffic light": "321",
685
+ "train": "322",
686
+ "tree": "323",
687
+ "triangle": "324",
688
+ "trombone": "325",
689
+ "truck": "326",
690
+ "trumpet": "327",
691
+ "umbrella": "328",
692
+ "underwear": "329",
693
+ "van": "330",
694
+ "vase": "331",
695
+ "violin": "332",
696
+ "washing machine": "333",
697
+ "watermelon": "334",
698
+ "waterslide": "335",
699
+ "whale": "336",
700
+ "wheel": "337",
701
+ "windmill": "338",
702
+ "wine bottle": "339",
703
+ "wine glass": "340",
704
+ "wristwatch": "341",
705
+ "yoga": "342",
706
+ "zebra": "343",
707
+ "zigzag": "344"
708
+ },
709
+ "layer_norm_eps": 0.001,
710
+ "min_depth": 8,
711
+ "model_type": "mobilenet_v2",
712
+ "num_channels": 1,
713
+ "output_stride": 32,
714
+ "problem_type": "single_label_classification",
715
+ "semantic_loss_ignore_index": 255,
716
+ "tf_padding": true,
717
+ "torch_dtype": "float32",
718
+ "transformers_version": "4.40.0"
719
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48a2c7a84bc5b3b33a8893b4d011df0e8abc179b5597681d87fd1cff423f4385
3
+ size 10835548
preprocessor_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_valid_processor_keys": [
3
+ "images",
4
+ "do_resize",
5
+ "size",
6
+ "resample",
7
+ "do_center_crop",
8
+ "crop_size",
9
+ "do_rescale",
10
+ "rescale_factor",
11
+ "do_normalize",
12
+ "image_mean",
13
+ "image_std",
14
+ "return_tensors",
15
+ "data_format",
16
+ "input_data_format"
17
+ ],
18
+ "crop_size": {
19
+ "height": 28,
20
+ "width": 28
21
+ },
22
+ "do_center_crop": true,
23
+ "do_normalize": true,
24
+ "do_rescale": true,
25
+ "do_resize": true,
26
+ "image_mean": [
27
+ 0.5
28
+ ],
29
+ "image_processor_type": "MobileNetV2ImageProcessor",
30
+ "image_std": [
31
+ 0.5
32
+ ],
33
+ "resample": 2,
34
+ "rescale_factor": 0.00392156862745098,
35
+ "size": {
36
+ "shortest_edge": 28
37
+ }
38
+ }
test_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 10.0,
3
+ "eval_accuracy": 0.35654,
4
+ "eval_loss": 4.412439346313477,
5
+ "eval_runtime": 16.0962,
6
+ "eval_samples_per_second": 15531.592,
7
+ "eval_steps_per_second": 30.38
8
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 10.0,
3
+ "total_flos": 5.6417821488e+17,
4
+ "train_loss": 1.2023330011465443,
5
+ "train_runtime": 3087.8654,
6
+ "train_samples_per_second": 14573.174,
7
+ "train_steps_per_second": 28.466
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,792 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 10.0,
5
+ "eval_steps": 5000,
6
+ "global_step": 87900,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.11376564277588168,
13
+ "grad_norm": 1.9390705823898315,
14
+ "learning_rate": 0.0007909078498293515,
15
+ "loss": 1.5809,
16
+ "step": 1000
17
+ },
18
+ {
19
+ "epoch": 0.22753128555176336,
20
+ "grad_norm": 1.703497052192688,
21
+ "learning_rate": 0.000781806598407281,
22
+ "loss": 1.54,
23
+ "step": 2000
24
+ },
25
+ {
26
+ "epoch": 0.3412969283276451,
27
+ "grad_norm": 1.7551511526107788,
28
+ "learning_rate": 0.0007727144482366326,
29
+ "loss": 1.5087,
30
+ "step": 3000
31
+ },
32
+ {
33
+ "epoch": 0.4550625711035267,
34
+ "grad_norm": 1.5709869861602783,
35
+ "learning_rate": 0.000763613196814562,
36
+ "loss": 1.4773,
37
+ "step": 4000
38
+ },
39
+ {
40
+ "epoch": 0.5688282138794084,
41
+ "grad_norm": 1.5395598411560059,
42
+ "learning_rate": 0.0007545119453924914,
43
+ "loss": 1.4546,
44
+ "step": 5000
45
+ },
46
+ {
47
+ "epoch": 0.5688282138794084,
48
+ "eval_accuracy": 0.647436,
49
+ "eval_loss": 1.4382679462432861,
50
+ "eval_runtime": 16.1443,
51
+ "eval_samples_per_second": 15485.324,
52
+ "eval_steps_per_second": 30.289,
53
+ "step": 5000
54
+ },
55
+ {
56
+ "epoch": 0.6825938566552902,
57
+ "grad_norm": 1.6133095026016235,
58
+ "learning_rate": 0.0007454106939704209,
59
+ "loss": 1.4513,
60
+ "step": 6000
61
+ },
62
+ {
63
+ "epoch": 0.7963594994311718,
64
+ "grad_norm": 1.3529345989227295,
65
+ "learning_rate": 0.0007363185437997725,
66
+ "loss": 1.459,
67
+ "step": 7000
68
+ },
69
+ {
70
+ "epoch": 0.9101251422070534,
71
+ "grad_norm": 1.4212840795516968,
72
+ "learning_rate": 0.000727217292377702,
73
+ "loss": 1.4393,
74
+ "step": 8000
75
+ },
76
+ {
77
+ "epoch": 1.023890784982935,
78
+ "grad_norm": 1.3942997455596924,
79
+ "learning_rate": 0.0007181342434584756,
80
+ "loss": 1.4183,
81
+ "step": 9000
82
+ },
83
+ {
84
+ "epoch": 1.1376564277588168,
85
+ "grad_norm": 1.584731936454773,
86
+ "learning_rate": 0.0007090329920364051,
87
+ "loss": 1.3759,
88
+ "step": 10000
89
+ },
90
+ {
91
+ "epoch": 1.1376564277588168,
92
+ "eval_accuracy": 0.660984,
93
+ "eval_loss": 1.38503897190094,
94
+ "eval_runtime": 16.2019,
95
+ "eval_samples_per_second": 15430.245,
96
+ "eval_steps_per_second": 30.182,
97
+ "step": 10000
98
+ },
99
+ {
100
+ "epoch": 1.2514220705346986,
101
+ "grad_norm": 1.4144625663757324,
102
+ "learning_rate": 0.0006999317406143345,
103
+ "loss": 1.375,
104
+ "step": 11000
105
+ },
106
+ {
107
+ "epoch": 1.36518771331058,
108
+ "grad_norm": 1.3004510402679443,
109
+ "learning_rate": 0.0006908395904436861,
110
+ "loss": 1.3729,
111
+ "step": 12000
112
+ },
113
+ {
114
+ "epoch": 1.4789533560864618,
115
+ "grad_norm": 1.3783901929855347,
116
+ "learning_rate": 0.0006817474402730376,
117
+ "loss": 1.3562,
118
+ "step": 13000
119
+ },
120
+ {
121
+ "epoch": 1.5927189988623436,
122
+ "grad_norm": 1.309706449508667,
123
+ "learning_rate": 0.000672646188850967,
124
+ "loss": 1.355,
125
+ "step": 14000
126
+ },
127
+ {
128
+ "epoch": 1.7064846416382253,
129
+ "grad_norm": 3.742795944213867,
130
+ "learning_rate": 0.0006635540386803186,
131
+ "loss": 1.3508,
132
+ "step": 15000
133
+ },
134
+ {
135
+ "epoch": 1.7064846416382253,
136
+ "eval_accuracy": 0.673728,
137
+ "eval_loss": 1.316284418106079,
138
+ "eval_runtime": 16.2031,
139
+ "eval_samples_per_second": 15429.139,
140
+ "eval_steps_per_second": 30.179,
141
+ "step": 15000
142
+ },
143
+ {
144
+ "epoch": 1.820250284414107,
145
+ "grad_norm": 1.2620598077774048,
146
+ "learning_rate": 0.0006544527872582481,
147
+ "loss": 1.3472,
148
+ "step": 16000
149
+ },
150
+ {
151
+ "epoch": 1.9340159271899886,
152
+ "grad_norm": 1.3602592945098877,
153
+ "learning_rate": 0.0006453515358361775,
154
+ "loss": 1.3371,
155
+ "step": 17000
156
+ },
157
+ {
158
+ "epoch": 2.04778156996587,
159
+ "grad_norm": 1.3070189952850342,
160
+ "learning_rate": 0.000636259385665529,
161
+ "loss": 1.3145,
162
+ "step": 18000
163
+ },
164
+ {
165
+ "epoch": 2.161547212741752,
166
+ "grad_norm": 1.2134970426559448,
167
+ "learning_rate": 0.0006271581342434585,
168
+ "loss": 1.2917,
169
+ "step": 19000
170
+ },
171
+ {
172
+ "epoch": 2.2753128555176336,
173
+ "grad_norm": 1.3796401023864746,
174
+ "learning_rate": 0.00061806598407281,
175
+ "loss": 1.294,
176
+ "step": 20000
177
+ },
178
+ {
179
+ "epoch": 2.2753128555176336,
180
+ "eval_accuracy": 0.682924,
181
+ "eval_loss": 1.283160924911499,
182
+ "eval_runtime": 16.1194,
183
+ "eval_samples_per_second": 15509.309,
184
+ "eval_steps_per_second": 30.336,
185
+ "step": 20000
186
+ },
187
+ {
188
+ "epoch": 2.3890784982935154,
189
+ "grad_norm": 1.357393741607666,
190
+ "learning_rate": 0.0006089738339021616,
191
+ "loss": 1.2936,
192
+ "step": 21000
193
+ },
194
+ {
195
+ "epoch": 2.502844141069397,
196
+ "grad_norm": 1.2381339073181152,
197
+ "learning_rate": 0.0005998725824800911,
198
+ "loss": 1.2859,
199
+ "step": 22000
200
+ },
201
+ {
202
+ "epoch": 2.616609783845279,
203
+ "grad_norm": 1.256423830986023,
204
+ "learning_rate": 0.0005907713310580204,
205
+ "loss": 1.2899,
206
+ "step": 23000
207
+ },
208
+ {
209
+ "epoch": 2.73037542662116,
210
+ "grad_norm": 1.1443513631820679,
211
+ "learning_rate": 0.000581679180887372,
212
+ "loss": 1.2846,
213
+ "step": 24000
214
+ },
215
+ {
216
+ "epoch": 2.8441410693970424,
217
+ "grad_norm": 1.2000058889389038,
218
+ "learning_rate": 0.0005725870307167236,
219
+ "loss": 1.2811,
220
+ "step": 25000
221
+ },
222
+ {
223
+ "epoch": 2.8441410693970424,
224
+ "eval_accuracy": 0.688052,
225
+ "eval_loss": 1.2580605745315552,
226
+ "eval_runtime": 16.1237,
227
+ "eval_samples_per_second": 15505.095,
228
+ "eval_steps_per_second": 30.328,
229
+ "step": 25000
230
+ },
231
+ {
232
+ "epoch": 2.9579067121729237,
233
+ "grad_norm": 1.2849873304367065,
234
+ "learning_rate": 0.0005634857792946531,
235
+ "loss": 1.2779,
236
+ "step": 26000
237
+ },
238
+ {
239
+ "epoch": 3.0716723549488054,
240
+ "grad_norm": 1.2703396081924438,
241
+ "learning_rate": 0.0005543936291240047,
242
+ "loss": 1.2444,
243
+ "step": 27000
244
+ },
245
+ {
246
+ "epoch": 3.185437997724687,
247
+ "grad_norm": 1.356720209121704,
248
+ "learning_rate": 0.000545292377701934,
249
+ "loss": 1.2303,
250
+ "step": 28000
251
+ },
252
+ {
253
+ "epoch": 3.299203640500569,
254
+ "grad_norm": 1.128195881843567,
255
+ "learning_rate": 0.0005361911262798635,
256
+ "loss": 1.2321,
257
+ "step": 29000
258
+ },
259
+ {
260
+ "epoch": 3.4129692832764507,
261
+ "grad_norm": 1.2033754587173462,
262
+ "learning_rate": 0.0005270989761092151,
263
+ "loss": 1.2331,
264
+ "step": 30000
265
+ },
266
+ {
267
+ "epoch": 3.4129692832764507,
268
+ "eval_accuracy": 0.69262,
269
+ "eval_loss": 1.2387434244155884,
270
+ "eval_runtime": 16.2457,
271
+ "eval_samples_per_second": 15388.688,
272
+ "eval_steps_per_second": 30.1,
273
+ "step": 30000
274
+ },
275
+ {
276
+ "epoch": 3.526734926052332,
277
+ "grad_norm": 1.2216309309005737,
278
+ "learning_rate": 0.0005179977246871446,
279
+ "loss": 1.2384,
280
+ "step": 31000
281
+ },
282
+ {
283
+ "epoch": 3.640500568828214,
284
+ "grad_norm": 1.3189234733581543,
285
+ "learning_rate": 0.000508896473265074,
286
+ "loss": 1.239,
287
+ "step": 32000
288
+ },
289
+ {
290
+ "epoch": 3.7542662116040955,
291
+ "grad_norm": 1.193328857421875,
292
+ "learning_rate": 0.0004998043230944255,
293
+ "loss": 1.2282,
294
+ "step": 33000
295
+ },
296
+ {
297
+ "epoch": 3.868031854379977,
298
+ "grad_norm": 1.3810237646102905,
299
+ "learning_rate": 0.000490703071672355,
300
+ "loss": 1.2301,
301
+ "step": 34000
302
+ },
303
+ {
304
+ "epoch": 3.981797497155859,
305
+ "grad_norm": 1.477654218673706,
306
+ "learning_rate": 0.0004816018202502845,
307
+ "loss": 1.2276,
308
+ "step": 35000
309
+ },
310
+ {
311
+ "epoch": 3.981797497155859,
312
+ "eval_accuracy": 0.697844,
313
+ "eval_loss": 1.2226529121398926,
314
+ "eval_runtime": 16.1466,
315
+ "eval_samples_per_second": 15483.136,
316
+ "eval_steps_per_second": 30.285,
317
+ "step": 35000
318
+ },
319
+ {
320
+ "epoch": 4.09556313993174,
321
+ "grad_norm": 2.5721781253814697,
322
+ "learning_rate": 0.00047250056882821396,
323
+ "loss": 1.2011,
324
+ "step": 36000
325
+ },
326
+ {
327
+ "epoch": 4.2093287827076225,
328
+ "grad_norm": 1.233066439628601,
329
+ "learning_rate": 0.00046340841865756544,
330
+ "loss": 1.1882,
331
+ "step": 37000
332
+ },
333
+ {
334
+ "epoch": 4.323094425483504,
335
+ "grad_norm": 15.391983032226562,
336
+ "learning_rate": 0.0004543071672354949,
337
+ "loss": 1.1856,
338
+ "step": 38000
339
+ },
340
+ {
341
+ "epoch": 4.436860068259386,
342
+ "grad_norm": 1.2283698320388794,
343
+ "learning_rate": 0.0004452059158134244,
344
+ "loss": 1.1972,
345
+ "step": 39000
346
+ },
347
+ {
348
+ "epoch": 4.550625711035267,
349
+ "grad_norm": 1.1042656898498535,
350
+ "learning_rate": 0.0004361046643913539,
351
+ "loss": 1.1964,
352
+ "step": 40000
353
+ },
354
+ {
355
+ "epoch": 4.550625711035267,
356
+ "eval_accuracy": 0.698972,
357
+ "eval_loss": 1.2195725440979004,
358
+ "eval_runtime": 16.22,
359
+ "eval_samples_per_second": 15413.078,
360
+ "eval_steps_per_second": 30.148,
361
+ "step": 40000
362
+ },
363
+ {
364
+ "epoch": 4.664391353811149,
365
+ "grad_norm": 1.2379703521728516,
366
+ "learning_rate": 0.00042701251422070535,
367
+ "loss": 1.194,
368
+ "step": 41000
369
+ },
370
+ {
371
+ "epoch": 4.778156996587031,
372
+ "grad_norm": 1.3536499738693237,
373
+ "learning_rate": 0.00041792036405005693,
374
+ "loss": 1.1939,
375
+ "step": 42000
376
+ },
377
+ {
378
+ "epoch": 4.891922639362912,
379
+ "grad_norm": 1.1571460962295532,
380
+ "learning_rate": 0.00040881911262798635,
381
+ "loss": 1.1952,
382
+ "step": 43000
383
+ },
384
+ {
385
+ "epoch": 5.005688282138794,
386
+ "grad_norm": 1.1833922863006592,
387
+ "learning_rate": 0.00039972696245733794,
388
+ "loss": 1.1908,
389
+ "step": 44000
390
+ },
391
+ {
392
+ "epoch": 5.1194539249146755,
393
+ "grad_norm": 1.4700716733932495,
394
+ "learning_rate": 0.00039062571103526736,
395
+ "loss": 1.1498,
396
+ "step": 45000
397
+ },
398
+ {
399
+ "epoch": 5.1194539249146755,
400
+ "eval_accuracy": 0.703608,
401
+ "eval_loss": 1.1993978023529053,
402
+ "eval_runtime": 16.3707,
403
+ "eval_samples_per_second": 15271.187,
404
+ "eval_steps_per_second": 29.87,
405
+ "step": 45000
406
+ },
407
+ {
408
+ "epoch": 5.233219567690558,
409
+ "grad_norm": 1.3525902032852173,
410
+ "learning_rate": 0.00038152445961319684,
411
+ "loss": 1.1507,
412
+ "step": 46000
413
+ },
414
+ {
415
+ "epoch": 5.346985210466439,
416
+ "grad_norm": 1.3642832040786743,
417
+ "learning_rate": 0.0003724232081911263,
418
+ "loss": 1.1551,
419
+ "step": 47000
420
+ },
421
+ {
422
+ "epoch": 5.460750853242321,
423
+ "grad_norm": 1.2102240324020386,
424
+ "learning_rate": 0.0003633219567690558,
425
+ "loss": 1.1574,
426
+ "step": 48000
427
+ },
428
+ {
429
+ "epoch": 5.5745164960182025,
430
+ "grad_norm": 1.1597959995269775,
431
+ "learning_rate": 0.0003542207053469852,
432
+ "loss": 1.1545,
433
+ "step": 49000
434
+ },
435
+ {
436
+ "epoch": 5.688282138794084,
437
+ "grad_norm": 1.2223830223083496,
438
+ "learning_rate": 0.00034512855517633675,
439
+ "loss": 1.1548,
440
+ "step": 50000
441
+ },
442
+ {
443
+ "epoch": 5.688282138794084,
444
+ "eval_accuracy": 0.705224,
445
+ "eval_loss": 1.1899733543395996,
446
+ "eval_runtime": 16.029,
447
+ "eval_samples_per_second": 15596.716,
448
+ "eval_steps_per_second": 30.507,
449
+ "step": 50000
450
+ },
451
+ {
452
+ "epoch": 5.802047781569966,
453
+ "grad_norm": 1.1772878170013428,
454
+ "learning_rate": 0.0003360364050056883,
455
+ "loss": 1.1543,
456
+ "step": 51000
457
+ },
458
+ {
459
+ "epoch": 5.915813424345847,
460
+ "grad_norm": 1.286970615386963,
461
+ "learning_rate": 0.00032693515358361776,
462
+ "loss": 1.1566,
463
+ "step": 52000
464
+ },
465
+ {
466
+ "epoch": 6.0295790671217295,
467
+ "grad_norm": 1.1497869491577148,
468
+ "learning_rate": 0.00031783390216154724,
469
+ "loss": 1.1471,
470
+ "step": 53000
471
+ },
472
+ {
473
+ "epoch": 6.143344709897611,
474
+ "grad_norm": 1.2324450016021729,
475
+ "learning_rate": 0.00030873265073947667,
476
+ "loss": 1.1141,
477
+ "step": 54000
478
+ },
479
+ {
480
+ "epoch": 6.257110352673493,
481
+ "grad_norm": 1.175905466079712,
482
+ "learning_rate": 0.00029963139931740615,
483
+ "loss": 1.1232,
484
+ "step": 55000
485
+ },
486
+ {
487
+ "epoch": 6.257110352673493,
488
+ "eval_accuracy": 0.707532,
489
+ "eval_loss": 1.183059573173523,
490
+ "eval_runtime": 16.1679,
491
+ "eval_samples_per_second": 15462.772,
492
+ "eval_steps_per_second": 30.245,
493
+ "step": 55000
494
+ },
495
+ {
496
+ "epoch": 6.370875995449374,
497
+ "grad_norm": 1.133489966392517,
498
+ "learning_rate": 0.00029053924914675767,
499
+ "loss": 1.1213,
500
+ "step": 56000
501
+ },
502
+ {
503
+ "epoch": 6.484641638225256,
504
+ "grad_norm": 1.3633593320846558,
505
+ "learning_rate": 0.00028143799772468715,
506
+ "loss": 1.1206,
507
+ "step": 57000
508
+ },
509
+ {
510
+ "epoch": 6.598407281001138,
511
+ "grad_norm": 1.2622781991958618,
512
+ "learning_rate": 0.00027233674630261663,
513
+ "loss": 1.1241,
514
+ "step": 58000
515
+ },
516
+ {
517
+ "epoch": 6.712172923777019,
518
+ "grad_norm": 1.2032582759857178,
519
+ "learning_rate": 0.00026324459613196816,
520
+ "loss": 1.1276,
521
+ "step": 59000
522
+ },
523
+ {
524
+ "epoch": 6.825938566552901,
525
+ "grad_norm": 1.166924238204956,
526
+ "learning_rate": 0.00025414334470989764,
527
+ "loss": 1.1264,
528
+ "step": 60000
529
+ },
530
+ {
531
+ "epoch": 6.825938566552901,
532
+ "eval_accuracy": 0.710036,
533
+ "eval_loss": 1.1695001125335693,
534
+ "eval_runtime": 16.198,
535
+ "eval_samples_per_second": 15434.001,
536
+ "eval_steps_per_second": 30.189,
537
+ "step": 60000
538
+ },
539
+ {
540
+ "epoch": 6.939704209328783,
541
+ "grad_norm": 1.236396074295044,
542
+ "learning_rate": 0.00024505119453924917,
543
+ "loss": 1.1196,
544
+ "step": 61000
545
+ },
546
+ {
547
+ "epoch": 7.053469852104665,
548
+ "grad_norm": 1.2301005125045776,
549
+ "learning_rate": 0.00023594994311717865,
550
+ "loss": 1.1065,
551
+ "step": 62000
552
+ },
553
+ {
554
+ "epoch": 7.167235494880546,
555
+ "grad_norm": 1.1987460851669312,
556
+ "learning_rate": 0.00022685779294653017,
557
+ "loss": 1.0845,
558
+ "step": 63000
559
+ },
560
+ {
561
+ "epoch": 7.281001137656427,
562
+ "grad_norm": 1.367330551147461,
563
+ "learning_rate": 0.0002177565415244596,
564
+ "loss": 1.0915,
565
+ "step": 64000
566
+ },
567
+ {
568
+ "epoch": 7.39476678043231,
569
+ "grad_norm": 1.2554900646209717,
570
+ "learning_rate": 0.00020865529010238908,
571
+ "loss": 1.0896,
572
+ "step": 65000
573
+ },
574
+ {
575
+ "epoch": 7.39476678043231,
576
+ "eval_accuracy": 0.712788,
577
+ "eval_loss": 1.1583917140960693,
578
+ "eval_runtime": 15.94,
579
+ "eval_samples_per_second": 15683.855,
580
+ "eval_steps_per_second": 30.678,
581
+ "step": 65000
582
+ },
583
+ {
584
+ "epoch": 7.508532423208191,
585
+ "grad_norm": 1.1475346088409424,
586
+ "learning_rate": 0.00019955403868031853,
587
+ "loss": 1.0937,
588
+ "step": 66000
589
+ },
590
+ {
591
+ "epoch": 7.622298065984073,
592
+ "grad_norm": 1.2330896854400635,
593
+ "learning_rate": 0.000190452787258248,
594
+ "loss": 1.095,
595
+ "step": 67000
596
+ },
597
+ {
598
+ "epoch": 7.736063708759954,
599
+ "grad_norm": 1.3467962741851807,
600
+ "learning_rate": 0.0001813515358361775,
601
+ "loss": 1.0945,
602
+ "step": 68000
603
+ },
604
+ {
605
+ "epoch": 7.849829351535837,
606
+ "grad_norm": 1.144555926322937,
607
+ "learning_rate": 0.00017225938566552902,
608
+ "loss": 1.0943,
609
+ "step": 69000
610
+ },
611
+ {
612
+ "epoch": 7.963594994311718,
613
+ "grad_norm": 1.39180326461792,
614
+ "learning_rate": 0.0001631581342434585,
615
+ "loss": 1.0917,
616
+ "step": 70000
617
+ },
618
+ {
619
+ "epoch": 7.963594994311718,
620
+ "eval_accuracy": 0.715496,
621
+ "eval_loss": 1.1535059213638306,
622
+ "eval_runtime": 16.0681,
623
+ "eval_samples_per_second": 15558.787,
624
+ "eval_steps_per_second": 30.433,
625
+ "step": 70000
626
+ },
627
+ {
628
+ "epoch": 8.0773606370876,
629
+ "grad_norm": 1.277241587638855,
630
+ "learning_rate": 0.00015405688282138795,
631
+ "loss": 1.0693,
632
+ "step": 71000
633
+ },
634
+ {
635
+ "epoch": 8.19112627986348,
636
+ "grad_norm": 1.3388996124267578,
637
+ "learning_rate": 0.00014496473265073948,
638
+ "loss": 1.064,
639
+ "step": 72000
640
+ },
641
+ {
642
+ "epoch": 8.304891922639364,
643
+ "grad_norm": 1.1635925769805908,
644
+ "learning_rate": 0.00013588168373151308,
645
+ "loss": 1.0617,
646
+ "step": 73000
647
+ },
648
+ {
649
+ "epoch": 8.418657565415245,
650
+ "grad_norm": 1.1681923866271973,
651
+ "learning_rate": 0.00012678043230944256,
652
+ "loss": 1.0664,
653
+ "step": 74000
654
+ },
655
+ {
656
+ "epoch": 8.532423208191126,
657
+ "grad_norm": 1.3212028741836548,
658
+ "learning_rate": 0.00011767918088737203,
659
+ "loss": 1.0654,
660
+ "step": 75000
661
+ },
662
+ {
663
+ "epoch": 8.532423208191126,
664
+ "eval_accuracy": 0.714384,
665
+ "eval_loss": 1.154496192932129,
666
+ "eval_runtime": 16.158,
667
+ "eval_samples_per_second": 15472.18,
668
+ "eval_steps_per_second": 30.264,
669
+ "step": 75000
670
+ },
671
+ {
672
+ "epoch": 8.646188850967008,
673
+ "grad_norm": 1.341015100479126,
674
+ "learning_rate": 0.00010857792946530148,
675
+ "loss": 1.0618,
676
+ "step": 76000
677
+ },
678
+ {
679
+ "epoch": 8.759954493742889,
680
+ "grad_norm": 1.2505824565887451,
681
+ "learning_rate": 9.947667804323096e-05,
682
+ "loss": 1.0674,
683
+ "step": 77000
684
+ },
685
+ {
686
+ "epoch": 8.873720136518772,
687
+ "grad_norm": 1.2615190744400024,
688
+ "learning_rate": 9.037542662116041e-05,
689
+ "loss": 1.0638,
690
+ "step": 78000
691
+ },
692
+ {
693
+ "epoch": 8.987485779294653,
694
+ "grad_norm": 1.2935796976089478,
695
+ "learning_rate": 8.128327645051195e-05,
696
+ "loss": 1.0616,
697
+ "step": 79000
698
+ },
699
+ {
700
+ "epoch": 9.101251422070535,
701
+ "grad_norm": 1.3248777389526367,
702
+ "learning_rate": 7.218202502844142e-05,
703
+ "loss": 1.0395,
704
+ "step": 80000
705
+ },
706
+ {
707
+ "epoch": 9.101251422070535,
708
+ "eval_accuracy": 0.716892,
709
+ "eval_loss": 1.1470571756362915,
710
+ "eval_runtime": 16.0825,
711
+ "eval_samples_per_second": 15544.827,
712
+ "eval_steps_per_second": 30.406,
713
+ "step": 80000
714
+ },
715
+ {
716
+ "epoch": 9.215017064846416,
717
+ "grad_norm": 1.379506230354309,
718
+ "learning_rate": 6.308077360637088e-05,
719
+ "loss": 1.0436,
720
+ "step": 81000
721
+ },
722
+ {
723
+ "epoch": 9.328782707622299,
724
+ "grad_norm": 1.1906781196594238,
725
+ "learning_rate": 5.398862343572242e-05,
726
+ "loss": 1.0417,
727
+ "step": 82000
728
+ },
729
+ {
730
+ "epoch": 9.44254835039818,
731
+ "grad_norm": 1.1397643089294434,
732
+ "learning_rate": 4.489647326507395e-05,
733
+ "loss": 1.0376,
734
+ "step": 83000
735
+ },
736
+ {
737
+ "epoch": 9.556313993174061,
738
+ "grad_norm": 1.0807147026062012,
739
+ "learning_rate": 3.5813424345847554e-05,
740
+ "loss": 1.0381,
741
+ "step": 84000
742
+ },
743
+ {
744
+ "epoch": 9.670079635949943,
745
+ "grad_norm": 1.3149391412734985,
746
+ "learning_rate": 2.6712172923777017e-05,
747
+ "loss": 1.0383,
748
+ "step": 85000
749
+ },
750
+ {
751
+ "epoch": 9.670079635949943,
752
+ "eval_accuracy": 0.713636,
753
+ "eval_loss": 1.1722280979156494,
754
+ "eval_runtime": 16.186,
755
+ "eval_samples_per_second": 15445.423,
756
+ "eval_steps_per_second": 30.211,
757
+ "step": 85000
758
+ },
759
+ {
760
+ "epoch": 9.783845278725824,
761
+ "grad_norm": 1.227634072303772,
762
+ "learning_rate": 1.7610921501706483e-05,
763
+ "loss": 1.0359,
764
+ "step": 86000
765
+ },
766
+ {
767
+ "epoch": 9.897610921501707,
768
+ "grad_norm": 1.2846591472625732,
769
+ "learning_rate": 8.509670079635951e-06,
770
+ "loss": 1.0337,
771
+ "step": 87000
772
+ },
773
+ {
774
+ "epoch": 10.0,
775
+ "step": 87900,
776
+ "total_flos": 5.6417821488e+17,
777
+ "train_loss": 1.2023330011465443,
778
+ "train_runtime": 3087.8654,
779
+ "train_samples_per_second": 14573.174,
780
+ "train_steps_per_second": 28.466
781
+ }
782
+ ],
783
+ "logging_steps": 1000,
784
+ "max_steps": 87900,
785
+ "num_input_tokens_seen": 0,
786
+ "num_train_epochs": 10,
787
+ "save_steps": 5000,
788
+ "total_flos": 5.6417821488e+17,
789
+ "train_batch_size": 512,
790
+ "trial_name": null,
791
+ "trial_params": null
792
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9cb83c53dad265eea2b2575de9c35416e393e7c9c7d7cf436ca11a228b78fb59
3
+ size 4984