alyzbane commited on
Commit
dbed317
·
verified ·
1 Parent(s): 8bdbf18

End of training

Browse files
README.md ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: transformers
3
+ license: apache-2.0
4
+ base_model: microsoft/resnet-50
5
+ tags:
6
+ - generated_from_trainer
7
+ metrics:
8
+ - precision
9
+ - recall
10
+ - f1
11
+ - accuracy
12
+ model-index:
13
+ - name: resnet-50-finetuned-barkley
14
+ results: []
15
+ ---
16
+
17
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
18
+ should probably proofread and complete it, then remove this comment. -->
19
+
20
+ # resnet-50-finetuned-barkley
21
+
22
+ This model is a fine-tuned version of [microsoft/resnet-50](https://huggingface.co/microsoft/resnet-50) on an unknown dataset.
23
+ It achieves the following results on the evaluation set:
24
+ - Loss: 0.9221
25
+ - Precision: 0.8780
26
+ - Recall: 0.8618
27
+ - F1: 0.8574
28
+ - Accuracy: 0.8744
29
+ - Top1 Accuracy: 0.8618
30
+ - Error Rate: 0.1256
31
+
32
+ ## Model description
33
+
34
+ More information needed
35
+
36
+ ## Intended uses & limitations
37
+
38
+ More information needed
39
+
40
+ ## Training and evaluation data
41
+
42
+ More information needed
43
+
44
+ ## Training procedure
45
+
46
+ ### Training hyperparameters
47
+
48
+ The following hyperparameters were used during training:
49
+ - learning_rate: 0.0002
50
+ - train_batch_size: 32
51
+ - eval_batch_size: 32
52
+ - seed: 42
53
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
54
+ - lr_scheduler_type: cosine
55
+ - lr_scheduler_warmup_ratio: 0.1
56
+ - num_epochs: 30
57
+ - mixed_precision_training: Native AMP
58
+
59
+ ### Training results
60
+
61
+ | Training Loss | Epoch | Step | Validation Loss | Precision | Recall | F1 | Accuracy | Top1 Accuracy | Error Rate |
62
+ |:-------------:|:-----:|:----:|:---------------:|:---------:|:------:|:------:|:--------:|:-------------:|:----------:|
63
+ | 1.6171 | 1.0 | 38 | 1.6195 | 0.0663 | 0.1513 | 0.0664 | 0.1738 | 0.1513 | 0.8262 |
64
+ | 1.6149 | 2.0 | 76 | 1.6160 | 0.2953 | 0.1579 | 0.0802 | 0.1785 | 0.1579 | 0.8215 |
65
+ | 1.6119 | 3.0 | 114 | 1.6112 | 0.0804 | 0.1579 | 0.0834 | 0.1772 | 0.1579 | 0.8228 |
66
+ | 1.6041 | 4.0 | 152 | 1.6015 | 0.4161 | 0.1974 | 0.1461 | 0.2155 | 0.1974 | 0.7845 |
67
+ | 1.5945 | 5.0 | 190 | 1.5895 | 0.4089 | 0.2895 | 0.2428 | 0.3092 | 0.2895 | 0.6908 |
68
+ | 1.5777 | 6.0 | 228 | 1.5710 | 0.5764 | 0.4408 | 0.3944 | 0.4663 | 0.4408 | 0.5337 |
69
+ | 1.561 | 7.0 | 266 | 1.5490 | 0.6013 | 0.4934 | 0.4516 | 0.5173 | 0.5 | 0.4827 |
70
+ | 1.536 | 8.0 | 304 | 1.5222 | 0.6377 | 0.5132 | 0.4711 | 0.5450 | 0.5132 | 0.4550 |
71
+ | 1.5081 | 9.0 | 342 | 1.4912 | 0.7595 | 0.5987 | 0.5869 | 0.6250 | 0.5987 | 0.3750 |
72
+ | 1.4756 | 10.0 | 380 | 1.4566 | 0.7579 | 0.6447 | 0.6293 | 0.6683 | 0.6447 | 0.3317 |
73
+ | 1.4387 | 11.0 | 418 | 1.4156 | 0.7914 | 0.6776 | 0.6692 | 0.6985 | 0.6776 | 0.3015 |
74
+ | 1.3993 | 12.0 | 456 | 1.3737 | 0.7997 | 0.6842 | 0.6732 | 0.7080 | 0.6842 | 0.2920 |
75
+ | 1.358 | 13.0 | 494 | 1.3288 | 0.8290 | 0.7039 | 0.7048 | 0.7232 | 0.7039 | 0.2768 |
76
+ | 1.3139 | 14.0 | 532 | 1.2806 | 0.8277 | 0.7434 | 0.7373 | 0.7592 | 0.75 | 0.2408 |
77
+ | 1.262 | 15.0 | 570 | 1.2345 | 0.8478 | 0.7697 | 0.7664 | 0.7829 | 0.7697 | 0.2171 |
78
+ | 1.2184 | 16.0 | 608 | 1.1887 | 0.8323 | 0.7697 | 0.7654 | 0.7818 | 0.7697 | 0.2182 |
79
+ | 1.1803 | 17.0 | 646 | 1.1408 | 0.8423 | 0.7763 | 0.7735 | 0.7931 | 0.7763 | 0.2069 |
80
+ | 1.1422 | 18.0 | 684 | 1.0966 | 0.8594 | 0.8158 | 0.8100 | 0.8317 | 0.8158 | 0.1683 |
81
+ | 1.1032 | 19.0 | 722 | 1.0587 | 0.8431 | 0.8026 | 0.7969 | 0.8145 | 0.8026 | 0.1855 |
82
+ | 1.058 | 20.0 | 760 | 1.0289 | 0.8610 | 0.8355 | 0.8301 | 0.8487 | 0.8355 | 0.1513 |
83
+ | 1.0252 | 21.0 | 798 | 0.9918 | 0.8576 | 0.8421 | 0.8370 | 0.8534 | 0.8421 | 0.1466 |
84
+ | 1.002 | 22.0 | 836 | 0.9727 | 0.8677 | 0.8487 | 0.8435 | 0.8611 | 0.8487 | 0.1389 |
85
+ | 0.9812 | 23.0 | 874 | 0.9465 | 0.8795 | 0.8553 | 0.8497 | 0.8678 | 0.8553 | 0.1322 |
86
+ | 0.9636 | 24.0 | 912 | 0.9331 | 0.8820 | 0.8553 | 0.8485 | 0.8699 | 0.8553 | 0.1301 |
87
+ | 0.9591 | 25.0 | 950 | 0.9221 | 0.8780 | 0.8618 | 0.8574 | 0.8744 | 0.8618 | 0.1256 |
88
+ | 0.948 | 26.0 | 988 | 0.9158 | 0.8780 | 0.8618 | 0.8574 | 0.8744 | 0.8684 | 0.1256 |
89
+ | 0.9384 | 27.0 | 1026 | 0.9017 | 0.8685 | 0.8487 | 0.8431 | 0.8601 | 0.8487 | 0.1399 |
90
+
91
+
92
+ ### Framework versions
93
+
94
+ - Transformers 4.45.2
95
+ - Pytorch 2.5.0+cu121
96
+ - Datasets 3.0.1
97
+ - Tokenizers 0.20.1
all_results.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 27.0,
3
+ "eval_accuracy": 0.8743740573152337,
4
+ "eval_error_rate": 0.12562594268476635,
5
+ "eval_f1": 0.8573701550510042,
6
+ "eval_loss": 0.92206209897995,
7
+ "eval_precision": 0.877970723615921,
8
+ "eval_recall": 0.8618421052631579,
9
+ "eval_runtime": 25.6714,
10
+ "eval_samples_per_second": 5.921,
11
+ "eval_steps_per_second": 0.195,
12
+ "eval_top1_accuracy": 0.8618421052631579,
13
+ "total_flos": 6.9738304117683e+17,
14
+ "train_loss": 1.2960130829095375,
15
+ "train_runtime": 5177.2299,
16
+ "train_samples_per_second": 7.046,
17
+ "train_steps_per_second": 0.22
18
+ }
config.json ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "microsoft/resnet-50",
3
+ "architectures": [
4
+ "ResNetForImageClassification"
5
+ ],
6
+ "depths": [
7
+ 3,
8
+ 4,
9
+ 6,
10
+ 3
11
+ ],
12
+ "downsample_in_bottleneck": false,
13
+ "downsample_in_first_stage": false,
14
+ "embedding_size": 64,
15
+ "hidden_act": "relu",
16
+ "hidden_sizes": [
17
+ 256,
18
+ 512,
19
+ 1024,
20
+ 2048
21
+ ],
22
+ "id2label": {
23
+ "0": "Iinstia bijuga",
24
+ "1": "Mangifera indica",
25
+ "2": "Pterocarpus indicus",
26
+ "3": "Roystonea regia",
27
+ "4": "Tabebuia"
28
+ },
29
+ "label2id": {
30
+ "Iinstia bijuga": 0,
31
+ "Mangifera indica": 1,
32
+ "Pterocarpus indicus": 2,
33
+ "Roystonea regia": 3,
34
+ "Tabebuia": 4
35
+ },
36
+ "layer_type": "bottleneck",
37
+ "model_type": "resnet",
38
+ "num_channels": 3,
39
+ "out_features": [
40
+ "stage4"
41
+ ],
42
+ "out_indices": [
43
+ 4
44
+ ],
45
+ "problem_type": "single_label_classification",
46
+ "stage_names": [
47
+ "stem",
48
+ "stage1",
49
+ "stage2",
50
+ "stage3",
51
+ "stage4"
52
+ ],
53
+ "torch_dtype": "float32",
54
+ "transformers_version": "4.45.2"
55
+ }
eval_results.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 27.0,
3
+ "eval_accuracy": 0.8743740573152337,
4
+ "eval_error_rate": 0.12562594268476635,
5
+ "eval_f1": 0.8573701550510042,
6
+ "eval_loss": 0.92206209897995,
7
+ "eval_precision": 0.877970723615921,
8
+ "eval_recall": 0.8618421052631579,
9
+ "eval_runtime": 25.6714,
10
+ "eval_samples_per_second": 5.921,
11
+ "eval_steps_per_second": 0.195,
12
+ "eval_top1_accuracy": 0.8618421052631579
13
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c37f9ee1e794f5fcb4a63671450f0c9575850efd363d42801b4a1b34fd4054e
3
+ size 94327540
preprocessor_config.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "crop_pct": 0.875,
3
+ "do_normalize": true,
4
+ "do_rescale": true,
5
+ "do_resize": true,
6
+ "image_mean": [
7
+ 0.485,
8
+ 0.456,
9
+ 0.406
10
+ ],
11
+ "image_processor_type": "ConvNextImageProcessor",
12
+ "image_std": [
13
+ 0.229,
14
+ 0.224,
15
+ 0.225
16
+ ],
17
+ "resample": 3,
18
+ "rescale_factor": 0.00392156862745098,
19
+ "size": {
20
+ "shortest_edge": 224
21
+ }
22
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 27.0,
3
+ "total_flos": 6.9738304117683e+17,
4
+ "train_loss": 1.2960130829095375,
5
+ "train_runtime": 5177.2299,
6
+ "train_samples_per_second": 7.046,
7
+ "train_steps_per_second": 0.22
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,726 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.8743740573152337,
3
+ "best_model_checkpoint": "resnet-50-finetuned-barkley\\checkpoint-950",
4
+ "epoch": 27.0,
5
+ "eval_steps": 500,
6
+ "global_step": 1026,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 1.0,
13
+ "train_accuracy": 0.16611842105263158
14
+ },
15
+ {
16
+ "epoch": 1.0,
17
+ "grad_norm": 1.3611191511154175,
18
+ "learning_rate": 1.3823166234831842e-06,
19
+ "loss": 1.6171,
20
+ "step": 38
21
+ },
22
+ {
23
+ "epoch": 1.0,
24
+ "eval_accuracy": 0.17376068376068377,
25
+ "eval_error_rate": 0.8262393162393162,
26
+ "eval_f1": 0.06643250805361339,
27
+ "eval_loss": 1.619532585144043,
28
+ "eval_precision": 0.06626276231021362,
29
+ "eval_recall": 0.1513157894736842,
30
+ "eval_runtime": 26.7886,
31
+ "eval_samples_per_second": 5.674,
32
+ "eval_steps_per_second": 0.187,
33
+ "eval_top1_accuracy": 0.1513157894736842,
34
+ "step": 38
35
+ },
36
+ {
37
+ "epoch": 2.0,
38
+ "train_accuracy": 0.15862573099415206
39
+ },
40
+ {
41
+ "epoch": 2.0,
42
+ "grad_norm": 1.6634056568145752,
43
+ "learning_rate": 3.058622191852561e-06,
44
+ "loss": 1.6149,
45
+ "step": 76
46
+ },
47
+ {
48
+ "epoch": 2.0,
49
+ "eval_accuracy": 0.17853192559074912,
50
+ "eval_error_rate": 0.8214680744092508,
51
+ "eval_f1": 0.08020327273951958,
52
+ "eval_loss": 1.6160310506820679,
53
+ "eval_precision": 0.29525518341307816,
54
+ "eval_recall": 0.15789473684210525,
55
+ "eval_runtime": 26.6794,
56
+ "eval_samples_per_second": 5.697,
57
+ "eval_steps_per_second": 0.187,
58
+ "eval_top1_accuracy": 0.15789473684210525,
59
+ "step": 76
60
+ },
61
+ {
62
+ "epoch": 3.0,
63
+ "train_accuracy": 0.1783625730994152
64
+ },
65
+ {
66
+ "epoch": 3.0,
67
+ "grad_norm": 1.5223407745361328,
68
+ "learning_rate": 5.625554080420859e-06,
69
+ "loss": 1.6119,
70
+ "step": 114
71
+ },
72
+ {
73
+ "epoch": 3.0,
74
+ "eval_accuracy": 0.1771794871794872,
75
+ "eval_error_rate": 0.8228205128205128,
76
+ "eval_f1": 0.08341165413533834,
77
+ "eval_loss": 1.6111546754837036,
78
+ "eval_precision": 0.08044783010156971,
79
+ "eval_recall": 0.15789473684210525,
80
+ "eval_runtime": 26.2938,
81
+ "eval_samples_per_second": 5.781,
82
+ "eval_steps_per_second": 0.19,
83
+ "eval_top1_accuracy": 0.15789473684210525,
84
+ "step": 114
85
+ },
86
+ {
87
+ "epoch": 4.0,
88
+ "train_accuracy": 0.20833333333333334
89
+ },
90
+ {
91
+ "epoch": 4.0,
92
+ "grad_norm": 1.3961833715438843,
93
+ "learning_rate": 8.771702474591739e-06,
94
+ "loss": 1.6041,
95
+ "step": 152
96
+ },
97
+ {
98
+ "epoch": 4.0,
99
+ "eval_accuracy": 0.21552538964303675,
100
+ "eval_error_rate": 0.7844746103569633,
101
+ "eval_f1": 0.14610577502522265,
102
+ "eval_loss": 1.6015431880950928,
103
+ "eval_precision": 0.4161313363641264,
104
+ "eval_recall": 0.19736842105263158,
105
+ "eval_runtime": 28.8611,
106
+ "eval_samples_per_second": 5.267,
107
+ "eval_steps_per_second": 0.173,
108
+ "eval_top1_accuracy": 0.19736842105263158,
109
+ "step": 152
110
+ },
111
+ {
112
+ "epoch": 5.0,
113
+ "train_accuracy": 0.24926900584795322
114
+ },
115
+ {
116
+ "epoch": 5.0,
117
+ "grad_norm": 1.5497902631759644,
118
+ "learning_rate": 1.2115389351475484e-05,
119
+ "loss": 1.5945,
120
+ "step": 190
121
+ },
122
+ {
123
+ "epoch": 5.0,
124
+ "eval_accuracy": 0.30917043740573147,
125
+ "eval_error_rate": 0.6908295625942685,
126
+ "eval_f1": 0.24280704310495774,
127
+ "eval_loss": 1.5894904136657715,
128
+ "eval_precision": 0.40893317853457173,
129
+ "eval_recall": 0.2894736842105263,
130
+ "eval_runtime": 27.1975,
131
+ "eval_samples_per_second": 5.589,
132
+ "eval_steps_per_second": 0.184,
133
+ "eval_top1_accuracy": 0.2894736842105263,
134
+ "step": 190
135
+ },
136
+ {
137
+ "epoch": 6.0,
138
+ "train_accuracy": 0.33260233918128657
139
+ },
140
+ {
141
+ "epoch": 6.0,
142
+ "grad_norm": 1.6292859315872192,
143
+ "learning_rate": 1.5250972116877936e-05,
144
+ "loss": 1.5777,
145
+ "step": 228
146
+ },
147
+ {
148
+ "epoch": 6.0,
149
+ "eval_accuracy": 0.4663499245852186,
150
+ "eval_error_rate": 0.5336500754147814,
151
+ "eval_f1": 0.39436167836929775,
152
+ "eval_loss": 1.5709806680679321,
153
+ "eval_precision": 0.5764411027568922,
154
+ "eval_recall": 0.4407894736842105,
155
+ "eval_runtime": 26.1121,
156
+ "eval_samples_per_second": 5.821,
157
+ "eval_steps_per_second": 0.191,
158
+ "eval_top1_accuracy": 0.4407894736842105,
159
+ "step": 228
160
+ },
161
+ {
162
+ "epoch": 7.0,
163
+ "train_accuracy": 0.4093567251461988
164
+ },
165
+ {
166
+ "epoch": 7.0,
167
+ "grad_norm": 1.4761221408843994,
168
+ "learning_rate": 1.7798054527340503e-05,
169
+ "loss": 1.561,
170
+ "step": 266
171
+ },
172
+ {
173
+ "epoch": 7.0,
174
+ "eval_accuracy": 0.5173303167420814,
175
+ "eval_error_rate": 0.4826696832579186,
176
+ "eval_f1": 0.45158707610515547,
177
+ "eval_loss": 1.5490015745162964,
178
+ "eval_precision": 0.6013273211996122,
179
+ "eval_recall": 0.4934210526315789,
180
+ "eval_runtime": 27.8381,
181
+ "eval_samples_per_second": 5.46,
182
+ "eval_steps_per_second": 0.18,
183
+ "eval_top1_accuracy": 0.5,
184
+ "step": 266
185
+ },
186
+ {
187
+ "epoch": 8.0,
188
+ "train_accuracy": 0.47733918128654973
189
+ },
190
+ {
191
+ "epoch": 8.0,
192
+ "grad_norm": 1.691532850265503,
193
+ "learning_rate": 1.9447634826822778e-05,
194
+ "loss": 1.536,
195
+ "step": 304
196
+ },
197
+ {
198
+ "epoch": 8.0,
199
+ "eval_accuracy": 0.5450075414781297,
200
+ "eval_error_rate": 0.4549924585218703,
201
+ "eval_f1": 0.4711077818086626,
202
+ "eval_loss": 1.5221654176712036,
203
+ "eval_precision": 0.63772879167616,
204
+ "eval_recall": 0.5131578947368421,
205
+ "eval_runtime": 26.2831,
206
+ "eval_samples_per_second": 5.783,
207
+ "eval_steps_per_second": 0.19,
208
+ "eval_top1_accuracy": 0.5131578947368421,
209
+ "step": 304
210
+ },
211
+ {
212
+ "epoch": 9.0,
213
+ "train_accuracy": 0.5372807017543859
214
+ },
215
+ {
216
+ "epoch": 9.0,
217
+ "grad_norm": 1.69563889503479,
218
+ "learning_rate": 1.9999922507143676e-05,
219
+ "loss": 1.5081,
220
+ "step": 342
221
+ },
222
+ {
223
+ "epoch": 9.0,
224
+ "eval_accuracy": 0.625032679738562,
225
+ "eval_error_rate": 0.374967320261438,
226
+ "eval_f1": 0.5868892868172693,
227
+ "eval_loss": 1.4911595582962036,
228
+ "eval_precision": 0.7594769272604533,
229
+ "eval_recall": 0.5986842105263158,
230
+ "eval_runtime": 27.44,
231
+ "eval_samples_per_second": 5.539,
232
+ "eval_steps_per_second": 0.182,
233
+ "eval_top1_accuracy": 0.5986842105263158,
234
+ "step": 342
235
+ },
236
+ {
237
+ "epoch": 10.0,
238
+ "train_accuracy": 0.5957602339181286
239
+ },
240
+ {
241
+ "epoch": 10.0,
242
+ "grad_norm": 1.694950819015503,
243
+ "learning_rate": 1.9882364575351117e-05,
244
+ "loss": 1.4756,
245
+ "step": 380
246
+ },
247
+ {
248
+ "epoch": 10.0,
249
+ "eval_accuracy": 0.6683459024635495,
250
+ "eval_error_rate": 0.3316540975364505,
251
+ "eval_f1": 0.6293044227869377,
252
+ "eval_loss": 1.4565558433532715,
253
+ "eval_precision": 0.7578870575407971,
254
+ "eval_recall": 0.6447368421052632,
255
+ "eval_runtime": 25.8912,
256
+ "eval_samples_per_second": 5.871,
257
+ "eval_steps_per_second": 0.193,
258
+ "eval_top1_accuracy": 0.6447368421052632,
259
+ "step": 380
260
+ },
261
+ {
262
+ "epoch": 11.0,
263
+ "train_accuracy": 0.6111111111111112
264
+ },
265
+ {
266
+ "epoch": 11.0,
267
+ "grad_norm": 1.591986894607544,
268
+ "learning_rate": 1.9544051842595e-05,
269
+ "loss": 1.4387,
270
+ "step": 418
271
+ },
272
+ {
273
+ "epoch": 11.0,
274
+ "eval_accuracy": 0.698541980894922,
275
+ "eval_error_rate": 0.30145801910507797,
276
+ "eval_f1": 0.6692242337767512,
277
+ "eval_loss": 1.4155722856521606,
278
+ "eval_precision": 0.7913987173816895,
279
+ "eval_recall": 0.6776315789473685,
280
+ "eval_runtime": 26.7774,
281
+ "eval_samples_per_second": 5.676,
282
+ "eval_steps_per_second": 0.187,
283
+ "eval_top1_accuracy": 0.6776315789473685,
284
+ "step": 418
285
+ },
286
+ {
287
+ "epoch": 12.0,
288
+ "train_accuracy": 0.6469298245614035
289
+ },
290
+ {
291
+ "epoch": 12.0,
292
+ "grad_norm": 1.8263485431671143,
293
+ "learning_rate": 1.9009692640269474e-05,
294
+ "loss": 1.3993,
295
+ "step": 456
296
+ },
297
+ {
298
+ "epoch": 12.0,
299
+ "eval_accuracy": 0.7080442433383609,
300
+ "eval_error_rate": 0.2919557566616391,
301
+ "eval_f1": 0.6731791852575238,
302
+ "eval_loss": 1.3737214803695679,
303
+ "eval_precision": 0.7997336700204268,
304
+ "eval_recall": 0.6842105263157895,
305
+ "eval_runtime": 25.8778,
306
+ "eval_samples_per_second": 5.874,
307
+ "eval_steps_per_second": 0.193,
308
+ "eval_top1_accuracy": 0.6842105263157895,
309
+ "step": 456
310
+ },
311
+ {
312
+ "epoch": 13.0,
313
+ "train_accuracy": 0.6527777777777778
314
+ },
315
+ {
316
+ "epoch": 13.0,
317
+ "grad_norm": 1.8651676177978516,
318
+ "learning_rate": 1.826239469360898e-05,
319
+ "loss": 1.358,
320
+ "step": 494
321
+ },
322
+ {
323
+ "epoch": 13.0,
324
+ "eval_accuracy": 0.7232277526395173,
325
+ "eval_error_rate": 0.27677224736048267,
326
+ "eval_f1": 0.7048030719072474,
327
+ "eval_loss": 1.3288253545761108,
328
+ "eval_precision": 0.8290264820356354,
329
+ "eval_recall": 0.7039473684210527,
330
+ "eval_runtime": 26.9677,
331
+ "eval_samples_per_second": 5.636,
332
+ "eval_steps_per_second": 0.185,
333
+ "eval_top1_accuracy": 0.7039473684210527,
334
+ "step": 494
335
+ },
336
+ {
337
+ "epoch": 14.0,
338
+ "train_accuracy": 0.7017543859649122
339
+ },
340
+ {
341
+ "epoch": 14.0,
342
+ "grad_norm": 2.0590460300445557,
343
+ "learning_rate": 1.733052939622339e-05,
344
+ "loss": 1.3139,
345
+ "step": 532
346
+ },
347
+ {
348
+ "epoch": 14.0,
349
+ "eval_accuracy": 0.7592207139265963,
350
+ "eval_error_rate": 0.2407792860734037,
351
+ "eval_f1": 0.7373063276167947,
352
+ "eval_loss": 1.2805979251861572,
353
+ "eval_precision": 0.8276704808408175,
354
+ "eval_recall": 0.743421052631579,
355
+ "eval_runtime": 24.8464,
356
+ "eval_samples_per_second": 6.118,
357
+ "eval_steps_per_second": 0.201,
358
+ "eval_top1_accuracy": 0.75,
359
+ "step": 532
360
+ },
361
+ {
362
+ "epoch": 15.0,
363
+ "train_accuracy": 0.7185672514619883
364
+ },
365
+ {
366
+ "epoch": 15.0,
367
+ "grad_norm": 2.4823436737060547,
368
+ "learning_rate": 1.6234913078995263e-05,
369
+ "loss": 1.262,
370
+ "step": 570
371
+ },
372
+ {
373
+ "epoch": 15.0,
374
+ "eval_accuracy": 0.7828808446455506,
375
+ "eval_error_rate": 0.21711915535444937,
376
+ "eval_f1": 0.7664102924938837,
377
+ "eval_loss": 1.2344970703125,
378
+ "eval_precision": 0.8477553968860445,
379
+ "eval_recall": 0.7697368421052632,
380
+ "eval_runtime": 26.3117,
381
+ "eval_samples_per_second": 5.777,
382
+ "eval_steps_per_second": 0.19,
383
+ "eval_top1_accuracy": 0.7697368421052632,
384
+ "step": 570
385
+ },
386
+ {
387
+ "epoch": 16.0,
388
+ "train_accuracy": 0.7236842105263158
389
+ },
390
+ {
391
+ "epoch": 16.0,
392
+ "grad_norm": 2.0972795486450195,
393
+ "learning_rate": 1.5000020000000002e-05,
394
+ "loss": 1.2184,
395
+ "step": 608
396
+ },
397
+ {
398
+ "epoch": 16.0,
399
+ "eval_accuracy": 0.7817697335344395,
400
+ "eval_error_rate": 0.21823026646556054,
401
+ "eval_f1": 0.7654284912445182,
402
+ "eval_loss": 1.1887174844741821,
403
+ "eval_precision": 0.8322669418644651,
404
+ "eval_recall": 0.7697368421052632,
405
+ "eval_runtime": 25.3816,
406
+ "eval_samples_per_second": 5.989,
407
+ "eval_steps_per_second": 0.197,
408
+ "eval_top1_accuracy": 0.7697368421052632,
409
+ "step": 608
410
+ },
411
+ {
412
+ "epoch": 17.0,
413
+ "train_accuracy": 0.7448830409356725
414
+ },
415
+ {
416
+ "epoch": 17.0,
417
+ "grad_norm": 1.9592589139938354,
418
+ "learning_rate": 1.365343563002298e-05,
419
+ "loss": 1.1803,
420
+ "step": 646
421
+ },
422
+ {
423
+ "epoch": 17.0,
424
+ "eval_accuracy": 0.7930819507290096,
425
+ "eval_error_rate": 0.20691804927099045,
426
+ "eval_f1": 0.773531629357028,
427
+ "eval_loss": 1.1408498287200928,
428
+ "eval_precision": 0.8423109913821989,
429
+ "eval_recall": 0.7763157894736842,
430
+ "eval_runtime": 26.1892,
431
+ "eval_samples_per_second": 5.804,
432
+ "eval_steps_per_second": 0.191,
433
+ "eval_top1_accuracy": 0.7763157894736842,
434
+ "step": 646
435
+ },
436
+ {
437
+ "epoch": 18.0,
438
+ "train_accuracy": 0.7580409356725146
439
+ },
440
+ {
441
+ "epoch": 18.0,
442
+ "grad_norm": 2.348762035369873,
443
+ "learning_rate": 1.2225240438725788e-05,
444
+ "loss": 1.1422,
445
+ "step": 684
446
+ },
447
+ {
448
+ "epoch": 18.0,
449
+ "eval_accuracy": 0.83166918049271,
450
+ "eval_error_rate": 0.16833081950729,
451
+ "eval_f1": 0.810022076785411,
452
+ "eval_loss": 1.096580147743225,
453
+ "eval_precision": 0.8594414607948442,
454
+ "eval_recall": 0.8157894736842105,
455
+ "eval_runtime": 25.0285,
456
+ "eval_samples_per_second": 6.073,
457
+ "eval_steps_per_second": 0.2,
458
+ "eval_top1_accuracy": 0.8157894736842105,
459
+ "step": 684
460
+ },
461
+ {
462
+ "epoch": 19.0,
463
+ "train_accuracy": 0.7733918128654971
464
+ },
465
+ {
466
+ "epoch": 19.0,
467
+ "grad_norm": 2.2395124435424805,
468
+ "learning_rate": 1.0747337946660503e-05,
469
+ "loss": 1.1032,
470
+ "step": 722
471
+ },
472
+ {
473
+ "epoch": 19.0,
474
+ "eval_accuracy": 0.8144746103569632,
475
+ "eval_error_rate": 0.18552538964303678,
476
+ "eval_f1": 0.7968691491574786,
477
+ "eval_loss": 1.0586965084075928,
478
+ "eval_precision": 0.8430972766845342,
479
+ "eval_recall": 0.8026315789473685,
480
+ "eval_runtime": 25.4802,
481
+ "eval_samples_per_second": 5.965,
482
+ "eval_steps_per_second": 0.196,
483
+ "eval_top1_accuracy": 0.8026315789473685,
484
+ "step": 722
485
+ },
486
+ {
487
+ "epoch": 20.0,
488
+ "train_accuracy": 0.7850877192982456
489
+ },
490
+ {
491
+ "epoch": 20.0,
492
+ "grad_norm": 2.3096365928649902,
493
+ "learning_rate": 9.252742053339503e-06,
494
+ "loss": 1.058,
495
+ "step": 760
496
+ },
497
+ {
498
+ "epoch": 20.0,
499
+ "eval_accuracy": 0.8486626445449975,
500
+ "eval_error_rate": 0.1513373554550025,
501
+ "eval_f1": 0.8300500291649842,
502
+ "eval_loss": 1.0289386510849,
503
+ "eval_precision": 0.8609899749373433,
504
+ "eval_recall": 0.8355263157894737,
505
+ "eval_runtime": 27.6424,
506
+ "eval_samples_per_second": 5.499,
507
+ "eval_steps_per_second": 0.181,
508
+ "eval_top1_accuracy": 0.8355263157894737,
509
+ "step": 760
510
+ },
511
+ {
512
+ "epoch": 21.0,
513
+ "train_accuracy": 0.8084795321637427
514
+ },
515
+ {
516
+ "epoch": 21.0,
517
+ "grad_norm": 2.616567611694336,
518
+ "learning_rate": 7.774839561274216e-06,
519
+ "loss": 1.0252,
520
+ "step": 798
521
+ },
522
+ {
523
+ "epoch": 21.0,
524
+ "eval_accuracy": 0.8534338863750628,
525
+ "eval_error_rate": 0.1465661136249372,
526
+ "eval_f1": 0.8370304500033733,
527
+ "eval_loss": 0.9917795062065125,
528
+ "eval_precision": 0.8575724637681159,
529
+ "eval_recall": 0.8421052631578947,
530
+ "eval_runtime": 24.0656,
531
+ "eval_samples_per_second": 6.316,
532
+ "eval_steps_per_second": 0.208,
533
+ "eval_top1_accuracy": 0.8421052631578947,
534
+ "step": 798
535
+ },
536
+ {
537
+ "epoch": 22.0,
538
+ "train_accuracy": 0.814327485380117
539
+ },
540
+ {
541
+ "epoch": 22.0,
542
+ "grad_norm": 2.2312376499176025,
543
+ "learning_rate": 6.346644369977025e-06,
544
+ "loss": 1.002,
545
+ "step": 836
546
+ },
547
+ {
548
+ "epoch": 22.0,
549
+ "eval_accuracy": 0.8611261940673707,
550
+ "eval_error_rate": 0.13887380593262932,
551
+ "eval_f1": 0.8435398841932419,
552
+ "eval_loss": 0.9727317690849304,
553
+ "eval_precision": 0.8677232854864433,
554
+ "eval_recall": 0.8486842105263158,
555
+ "eval_runtime": 25.3651,
556
+ "eval_samples_per_second": 5.992,
557
+ "eval_steps_per_second": 0.197,
558
+ "eval_top1_accuracy": 0.8486842105263158,
559
+ "step": 836
560
+ },
561
+ {
562
+ "epoch": 23.0,
563
+ "train_accuracy": 0.8179824561403509
564
+ },
565
+ {
566
+ "epoch": 23.0,
567
+ "grad_norm": 2.2069149017333984,
568
+ "learning_rate": 5.000060000000003e-06,
569
+ "loss": 0.9812,
570
+ "step": 874
571
+ },
572
+ {
573
+ "epoch": 23.0,
574
+ "eval_accuracy": 0.8677928607340373,
575
+ "eval_error_rate": 0.13220713926596273,
576
+ "eval_f1": 0.8497099815147097,
577
+ "eval_loss": 0.9464592933654785,
578
+ "eval_precision": 0.8795061782362883,
579
+ "eval_recall": 0.8552631578947368,
580
+ "eval_runtime": 26.8567,
581
+ "eval_samples_per_second": 5.66,
582
+ "eval_steps_per_second": 0.186,
583
+ "eval_top1_accuracy": 0.8552631578947368,
584
+ "step": 874
585
+ },
586
+ {
587
+ "epoch": 24.0,
588
+ "train_accuracy": 0.8187134502923976
589
+ },
590
+ {
591
+ "epoch": 24.0,
592
+ "grad_norm": 2.32728910446167,
593
+ "learning_rate": 3.76516692100474e-06,
594
+ "loss": 0.9636,
595
+ "step": 912
596
+ },
597
+ {
598
+ "epoch": 24.0,
599
+ "eval_accuracy": 0.8699296128707893,
600
+ "eval_error_rate": 0.1300703871292107,
601
+ "eval_f1": 0.8485082765446309,
602
+ "eval_loss": 0.93310546875,
603
+ "eval_precision": 0.8820191260980734,
604
+ "eval_recall": 0.8552631578947368,
605
+ "eval_runtime": 27.6614,
606
+ "eval_samples_per_second": 5.495,
607
+ "eval_steps_per_second": 0.181,
608
+ "eval_top1_accuracy": 0.8552631578947368,
609
+ "step": 912
610
+ },
611
+ {
612
+ "epoch": 25.0,
613
+ "train_accuracy": 0.8150584795321637
614
+ },
615
+ {
616
+ "epoch": 25.0,
617
+ "grad_norm": 2.2066571712493896,
618
+ "learning_rate": 2.6963844978948743e-06,
619
+ "loss": 0.9591,
620
+ "step": 950
621
+ },
622
+ {
623
+ "epoch": 25.0,
624
+ "eval_accuracy": 0.8743740573152337,
625
+ "eval_error_rate": 0.12562594268476635,
626
+ "eval_f1": 0.8573701550510042,
627
+ "eval_loss": 0.92206209897995,
628
+ "eval_precision": 0.877970723615921,
629
+ "eval_recall": 0.8618421052631579,
630
+ "eval_runtime": 26.1743,
631
+ "eval_samples_per_second": 5.807,
632
+ "eval_steps_per_second": 0.191,
633
+ "eval_top1_accuracy": 0.8618421052631579,
634
+ "step": 950
635
+ },
636
+ {
637
+ "epoch": 26.0,
638
+ "train_accuracy": 0.8194444444444444
639
+ },
640
+ {
641
+ "epoch": 26.0,
642
+ "grad_norm": 2.345517873764038,
643
+ "learning_rate": 1.75992615737436e-06,
644
+ "loss": 0.948,
645
+ "step": 988
646
+ },
647
+ {
648
+ "epoch": 26.0,
649
+ "eval_accuracy": 0.8743740573152337,
650
+ "eval_error_rate": 0.12562594268476635,
651
+ "eval_f1": 0.8573701550510042,
652
+ "eval_loss": 0.9158027768135071,
653
+ "eval_precision": 0.877970723615921,
654
+ "eval_recall": 0.8618421052631579,
655
+ "eval_runtime": 26.0018,
656
+ "eval_samples_per_second": 5.846,
657
+ "eval_steps_per_second": 0.192,
658
+ "eval_top1_accuracy": 0.868421052631579,
659
+ "step": 988
660
+ },
661
+ {
662
+ "epoch": 27.0,
663
+ "train_accuracy": 0.8201754385964912
664
+ },
665
+ {
666
+ "epoch": 27.0,
667
+ "grad_norm": 2.246762990951538,
668
+ "learning_rate": 1.0075383437198693e-06,
669
+ "loss": 0.9384,
670
+ "step": 1026
671
+ },
672
+ {
673
+ "epoch": 27.0,
674
+ "eval_accuracy": 0.8601005530417295,
675
+ "eval_error_rate": 0.1398994469582705,
676
+ "eval_f1": 0.8431034082329946,
677
+ "eval_loss": 0.901735782623291,
678
+ "eval_precision": 0.8684637995623307,
679
+ "eval_recall": 0.8486842105263158,
680
+ "eval_runtime": 25.9225,
681
+ "eval_samples_per_second": 5.864,
682
+ "eval_steps_per_second": 0.193,
683
+ "eval_top1_accuracy": 0.8486842105263158,
684
+ "step": 1026
685
+ },
686
+ {
687
+ "epoch": 27.0,
688
+ "step": 1026,
689
+ "total_flos": 6.9738304117683e+17,
690
+ "train_loss": 1.2960130829095375,
691
+ "train_runtime": 5177.2299,
692
+ "train_samples_per_second": 7.046,
693
+ "train_steps_per_second": 0.22
694
+ }
695
+ ],
696
+ "logging_steps": 500,
697
+ "max_steps": 1140,
698
+ "num_input_tokens_seen": 0,
699
+ "num_train_epochs": 30,
700
+ "save_steps": 500,
701
+ "stateful_callbacks": {
702
+ "EarlyStoppingCallback": {
703
+ "args": {
704
+ "early_stopping_patience": 2,
705
+ "early_stopping_threshold": 0.0
706
+ },
707
+ "attributes": {
708
+ "early_stopping_patience_counter": 2
709
+ }
710
+ },
711
+ "TrainerControl": {
712
+ "args": {
713
+ "should_epoch_stop": false,
714
+ "should_evaluate": false,
715
+ "should_log": false,
716
+ "should_save": true,
717
+ "should_training_stop": true
718
+ },
719
+ "attributes": {}
720
+ }
721
+ },
722
+ "total_flos": 6.9738304117683e+17,
723
+ "train_batch_size": 32,
724
+ "trial_name": null,
725
+ "trial_params": null
726
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1260b76ac737c5c8e660b9ecd23bc303fdc0340adc4e3b91ce34f96f50eca543
3
+ size 5176