cristianglezm commited on
Commit
4b71e58
1 Parent(s): 8cc1bed

new model version with better rouge

Browse files
README.md CHANGED
@@ -24,16 +24,16 @@ pipeline_tag: image-to-text
24
  library_name: transformers.js
25
  ---
26
 
27
- # ViT-GPT2-FlowerCaptioner-ONNX
28
 
29
  This model is a fine-tuned version of [nlpconnect/vit-gpt2-image-captioning](https://huggingface.co/nlpconnect/vit-gpt2-image-captioning) on the [FlowerEvolver-dataset](https://huggingface.co/datasets/cristianglezm/FlowerEvolver-Dataset) dataset.
30
  It achieves the following results on the evaluation set:
31
- - Loss: 0.3075
32
- - Rouge1: 66.3702
33
- - Rouge2: 45.5642
34
- - Rougel: 61.401
35
- - Rougelsum: 64.0587
36
- - Gen Len: 49.97
37
 
38
  ## sample running code
39
 
@@ -74,15 +74,37 @@ The following hyperparameters were used during training:
74
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
75
  - lr_scheduler_type: linear
76
  - lr_scheduler_warmup_steps: 500
77
- - num_epochs: 3
78
 
79
  ### Training results
80
 
81
  | Training Loss | Epoch | Step | Validation Loss | Rouge1 | Rouge2 | Rougel | Rougelsum | Gen Len |
82
  |:-------------:|:-----:|:----:|:---------------:|:-------:|:-------:|:-------:|:---------:|:-------:|
83
- | 0.6755 | 1.0 | 100 | 0.5339 | 60.9402 | 39.3331 | 54.6889 | 59.45 | 36.75 |
84
- | 0.3666 | 2.0 | 200 | 0.3331 | 65.5149 | 43.0245 | 59.3121 | 62.7329 | 52.82 |
85
- | 0.2983 | 3.0 | 300 | 0.3075 | 66.3702 | 45.5642 | 61.401 | 64.0587 | 49.97 |
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
 
87
 
88
  ### Framework versions
 
24
  library_name: transformers.js
25
  ---
26
 
27
+ # ViT-GPT2-FlowerCaptioner
28
 
29
  This model is a fine-tuned version of [nlpconnect/vit-gpt2-image-captioning](https://huggingface.co/nlpconnect/vit-gpt2-image-captioning) on the [FlowerEvolver-dataset](https://huggingface.co/datasets/cristianglezm/FlowerEvolver-Dataset) dataset.
30
  It achieves the following results on the evaluation set:
31
+ - Loss: 0.4930
32
+ - Rouge1: 68.3498
33
+ - Rouge2: 46.7534
34
+ - Rougel: 62.3763
35
+ - Rougelsum: 65.9575
36
+ - Gen Len: 49.82
37
 
38
  ## sample running code
39
 
 
74
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
75
  - lr_scheduler_type: linear
76
  - lr_scheduler_warmup_steps: 500
77
+ - num_epochs: 25
78
 
79
  ### Training results
80
 
81
  | Training Loss | Epoch | Step | Validation Loss | Rouge1 | Rouge2 | Rougel | Rougelsum | Gen Len |
82
  |:-------------:|:-----:|:----:|:---------------:|:-------:|:-------:|:-------:|:---------:|:-------:|
83
+ | 0.6986 | 1.0 | 100 | 0.5339 | 64.9813 | 42.4686 | 58.2586 | 63.3933 | 47.25 |
84
+ | 0.3408 | 2.0 | 200 | 0.3263 | 67.5461 | 46.5219 | 62.7962 | 65.6509 | 47.39 |
85
+ | 0.2797 | 3.0 | 300 | 0.2829 | 65.0704 | 42.0682 | 58.4268 | 63.2368 | 56.8 |
86
+ | 0.2584 | 4.0 | 400 | 0.2588 | 65.5074 | 45.227 | 60.2469 | 63.4253 | 52.25 |
87
+ | 0.2589 | 5.0 | 500 | 0.2607 | 66.7346 | 45.8264 | 61.7373 | 64.8857 | 50.64 |
88
+ | 0.2179 | 6.0 | 600 | 0.2697 | 63.8334 | 42.997 | 58.1585 | 61.7704 | 52.43 |
89
+ | 0.1662 | 7.0 | 700 | 0.2631 | 68.6188 | 48.3329 | 63.9474 | 66.6006 | 46.94 |
90
+ | 0.161 | 8.0 | 800 | 0.2749 | 69.0046 | 48.1421 | 63.7844 | 66.8317 | 49.74 |
91
+ | 0.1207 | 9.0 | 900 | 0.3117 | 70.0357 | 48.9002 | 64.416 | 67.7582 | 48.66 |
92
+ | 0.0909 | 10.0 | 1000 | 0.3408 | 65.9578 | 45.2324 | 60.2838 | 63.7493 | 46.92 |
93
+ | 0.0749 | 11.0 | 1100 | 0.3516 | 67.4244 | 46.1985 | 61.6408 | 65.5371 | 46.61 |
94
+ | 0.0665 | 12.0 | 1200 | 0.3730 | 68.6911 | 47.7089 | 63.0381 | 66.6956 | 47.89 |
95
+ | 0.0522 | 13.0 | 1300 | 0.3891 | 67.2365 | 45.4165 | 61.4063 | 64.857 | 48.91 |
96
+ | 0.0355 | 14.0 | 1400 | 0.4128 | 69.1494 | 47.9278 | 63.3334 | 66.5969 | 50.55 |
97
+ | 0.0309 | 15.0 | 1500 | 0.4221 | 66.2447 | 44.937 | 60.1403 | 63.8541 | 50.71 |
98
+ | 0.0265 | 16.0 | 1600 | 0.4343 | 67.8178 | 46.7084 | 61.8173 | 65.4375 | 50.85 |
99
+ | 0.0158 | 17.0 | 1700 | 0.4577 | 67.9846 | 45.9562 | 61.6353 | 65.7207 | 50.81 |
100
+ | 0.0166 | 18.0 | 1800 | 0.4731 | 69.0971 | 47.7001 | 62.856 | 66.7796 | 50.01 |
101
+ | 0.0121 | 19.0 | 1900 | 0.4657 | 68.1397 | 46.4258 | 62.2696 | 65.9332 | 49.15 |
102
+ | 0.0095 | 20.0 | 2000 | 0.4793 | 68.6497 | 47.9446 | 63.0466 | 66.5409 | 50.96 |
103
+ | 0.0086 | 21.0 | 2100 | 0.4780 | 68.4363 | 46.7296 | 62.359 | 66.2626 | 50.02 |
104
+ | 0.0068 | 22.0 | 2200 | 0.4863 | 67.5415 | 46.0821 | 61.57 | 65.4613 | 49.5 |
105
+ | 0.0061 | 23.0 | 2300 | 0.4892 | 68.1283 | 46.5802 | 62.0832 | 66.0203 | 50.21 |
106
+ | 0.006 | 24.0 | 2400 | 0.4912 | 68.1723 | 46.3239 | 62.2007 | 65.6725 | 49.89 |
107
+ | 0.0057 | 25.0 | 2500 | 0.4930 | 68.3498 | 46.7534 | 62.3763 | 65.9575 | 49.82 |
108
 
109
 
110
  ### Framework versions
onnx/decoder_model.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ca2595f2af7fb6d879eb3e80f4e0ee2958c5c2dce039ec9bfaddd677a7001b43
3
  size 613153019
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f51820bb5c0f5a4ed7a56f8cebc0941ea4e76cbcd8b92029e6a692fb01e5a078
3
  size 613153019
onnx/decoder_model_merged.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cdc6df54706d46d199dbe8bee353757c905d9dd2f4355b9f7771a33ca7a24f8d
3
  size 615070521
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc7e62770352f6844e064cdf22cabb6b00c8f7d166c0839e07156f7d2bae73c5
3
  size 615070521
onnx/decoder_model_merged_quantized.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2585e1276bdb3da15ed1041c53593268721a174b0b39c0a28863544db6245ad2
3
  size 158063351
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b482ca61962a11ddd30d2a279a5680feedb4cc19e207fee8c2d29860e548deda
3
  size 158063351
onnx/decoder_model_quantized.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d67a2e2c9bd64f893cd4afe1126550ec9175309bfb522663205e6ed8bec23ba1
3
  size 155710792
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81802b5229986a911d3ad6e9007fb540702b3ee6b42cbb8e687b816dfd948712
3
  size 155710792
onnx/decoder_with_past_model.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:21dd43d250fc5300859594e7a111e393adc5822798186aa1c18a125351258197
3
  size 613149344
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d8e6cb92689b1b31c3a6f9e4f04e8cf052e6f00e1b433553dc399f4df833ea01
3
  size 613149344
onnx/decoder_with_past_model_quantized.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0c23e86396e4ce9b690ae55f5922f68cd58cffcd8ad1068deaefbcff4e3cea1c
3
  size 155701341
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bcac5392f1f43d1fb64a0c4c4a9d8e32cc87d88213905997f43e72e73e660750
3
  size 155701341
onnx/encoder_model.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:091df2b5b67fde5a63f2323d5346063e0e1b312caa3e8aeeb62c382c4344d77c
3
  size 343410667
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d18e307852a5dca94a0e2d2457e4eb3d397820ca509caa7d80aac41fba4b9aa
3
  size 343410667
onnx/encoder_model_quantized.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fabd01cd11696da81627401da9ef9bc6dea70d4995be902034b0ddfa2acdacbd
3
- size 87000252
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7f417ac897abb1ab3f6df34df104737f9da1beb7ecafab5798bbb128fd9f90e
3
+ size 87000254
quantize_config.json CHANGED
@@ -4,114 +4,114 @@
4
  "per_model_config": {
5
  "decoder_model": {
6
  "op_types": [
7
- "Squeeze",
8
- "MatMul",
9
  "Pow",
10
- "Tanh",
11
- "Gather",
 
 
 
 
 
 
 
12
  "Sub",
 
 
13
  "ConstantOfShape",
14
- "Mul",
 
 
15
  "Where",
16
- "Concat",
17
- "Transpose",
18
  "Div",
19
- "Add",
20
- "Slice",
 
21
  "Unsqueeze",
22
- "Softmax",
23
- "Gemm",
24
- "Constant",
25
- "Range",
26
- "LayerNormalization",
27
- "Reshape",
28
- "Shape",
29
- "Cast",
30
- "Split"
31
  ],
32
  "weight_type": "QInt8"
33
  },
34
  "decoder_model_merged": {
35
  "op_types": [
36
- "Squeeze",
37
- "MatMul",
38
  "Pow",
39
- "Tanh",
40
- "Gather",
41
  "If",
 
 
 
 
 
 
 
 
 
42
  "Sub",
 
 
43
  "ConstantOfShape",
44
- "Mul",
 
 
45
  "Where",
46
- "Concat",
47
- "Transpose",
48
  "Div",
49
- "Add",
50
- "Slice",
 
51
  "Unsqueeze",
52
- "Softmax",
53
- "Gemm",
54
- "Constant",
55
- "Range",
56
- "LayerNormalization",
57
- "Reshape",
58
- "Shape",
59
- "Cast",
60
- "Split"
61
  ],
62
  "weight_type": "QInt8"
63
  },
64
  "decoder_with_past_model": {
65
  "op_types": [
66
- "Squeeze",
67
- "MatMul",
68
  "Pow",
69
- "Tanh",
70
- "Gather",
 
 
 
 
 
 
 
71
  "Sub",
 
 
72
  "ConstantOfShape",
73
- "Mul",
 
 
74
  "Where",
75
- "Concat",
76
- "Transpose",
77
  "Div",
78
- "Add",
79
- "Slice",
 
80
  "Unsqueeze",
81
- "Softmax",
82
- "Gemm",
83
- "Constant",
84
- "Range",
85
- "LayerNormalization",
86
- "Reshape",
87
- "Shape",
88
- "Cast",
89
- "Split"
90
  ],
91
  "weight_type": "QInt8"
92
  },
93
  "encoder_model": {
94
  "op_types": [
 
 
 
 
 
95
  "MatMul",
96
- "Gather",
97
- "ConstantOfShape",
98
- "Where",
99
  "Mul",
100
- "Expand",
 
 
101
  "Concat",
 
 
 
 
 
102
  "Transpose",
103
- "Conv",
104
  "Div",
105
- "Add",
106
- "Equal",
107
- "Slice",
108
  "Unsqueeze",
109
- "Softmax",
110
- "Constant",
111
- "LayerNormalization",
112
- "Reshape",
113
- "Shape",
114
- "Erf"
115
  ],
116
  "weight_type": "QUInt8"
117
  }
 
4
  "per_model_config": {
5
  "decoder_model": {
6
  "op_types": [
 
 
7
  "Pow",
8
+ "Split",
9
+ "Softmax",
10
+ "Reshape",
11
+ "Range",
12
+ "Slice",
13
+ "Mul",
14
+ "Constant",
15
+ "Gemm",
16
+ "Shape",
17
  "Sub",
18
+ "Concat",
19
+ "Tanh",
20
  "ConstantOfShape",
21
+ "LayerNormalization",
22
+ "Cast",
23
+ "Squeeze",
24
  "Where",
 
 
25
  "Div",
26
+ "Gather",
27
+ "Transpose",
28
+ "MatMul",
29
  "Unsqueeze",
30
+ "Add"
 
 
 
 
 
 
 
 
31
  ],
32
  "weight_type": "QInt8"
33
  },
34
  "decoder_model_merged": {
35
  "op_types": [
 
 
36
  "Pow",
 
 
37
  "If",
38
+ "Split",
39
+ "Softmax",
40
+ "Reshape",
41
+ "Range",
42
+ "Slice",
43
+ "Mul",
44
+ "Constant",
45
+ "Gemm",
46
+ "Shape",
47
  "Sub",
48
+ "Concat",
49
+ "Tanh",
50
  "ConstantOfShape",
51
+ "LayerNormalization",
52
+ "Cast",
53
+ "Squeeze",
54
  "Where",
 
 
55
  "Div",
56
+ "Gather",
57
+ "Transpose",
58
+ "MatMul",
59
  "Unsqueeze",
60
+ "Add"
 
 
 
 
 
 
 
 
61
  ],
62
  "weight_type": "QInt8"
63
  },
64
  "decoder_with_past_model": {
65
  "op_types": [
 
 
66
  "Pow",
67
+ "Split",
68
+ "Softmax",
69
+ "Reshape",
70
+ "Range",
71
+ "Slice",
72
+ "Mul",
73
+ "Constant",
74
+ "Gemm",
75
+ "Shape",
76
  "Sub",
77
+ "Concat",
78
+ "Tanh",
79
  "ConstantOfShape",
80
+ "LayerNormalization",
81
+ "Cast",
82
+ "Squeeze",
83
  "Where",
 
 
84
  "Div",
85
+ "Gather",
86
+ "Transpose",
87
+ "MatMul",
88
  "Unsqueeze",
89
+ "Add"
 
 
 
 
 
 
 
 
90
  ],
91
  "weight_type": "QInt8"
92
  },
93
  "encoder_model": {
94
  "op_types": [
95
+ "Softmax",
96
+ "Reshape",
97
+ "Conv",
98
+ "Expand",
99
+ "Slice",
100
  "MatMul",
 
 
 
101
  "Mul",
102
+ "Constant",
103
+ "Erf",
104
+ "Shape",
105
  "Concat",
106
+ "ConstantOfShape",
107
+ "LayerNormalization",
108
+ "Equal",
109
+ "Where",
110
+ "Gather",
111
  "Transpose",
 
112
  "Div",
 
 
 
113
  "Unsqueeze",
114
+ "Add"
 
 
 
 
 
115
  ],
116
  "weight_type": "QUInt8"
117
  }