Upload optimized ONNX files w/ GQA
Browse files- .gitattributes +1 -0
- onnx/model.onnx +3 -0
- onnx/model.onnx_data +3 -0
- onnx/model_bnb4.onnx +3 -0
- onnx/model_fp16.onnx +3 -0
- onnx/model_fp16.onnx_data +3 -0
- onnx/model_int8.onnx +3 -0
- onnx/model_q4.onnx +3 -0
- onnx/model_q4f16.onnx +3 -0
- onnx/model_quantized.onnx +3 -0
- onnx/model_uint8.onnx +3 -0
.gitattributes
CHANGED
@@ -38,3 +38,4 @@ onnx/decoder_model_merged.onnx_data filter=lfs diff=lfs merge=lfs -text
|
|
38 |
onnx/decoder_with_past_model.onnx_data filter=lfs diff=lfs merge=lfs -text
|
39 |
onnx/decoder_model_merged_fp16.onnx_data filter=lfs diff=lfs merge=lfs -text
|
40 |
onnx/model.onnx_data filter=lfs diff=lfs merge=lfs -text
|
|
|
|
38 |
onnx/decoder_with_past_model.onnx_data filter=lfs diff=lfs merge=lfs -text
|
39 |
onnx/decoder_model_merged_fp16.onnx_data filter=lfs diff=lfs merge=lfs -text
|
40 |
onnx/model.onnx_data filter=lfs diff=lfs merge=lfs -text
|
41 |
+
onnx/model_fp16.onnx_data filter=lfs diff=lfs merge=lfs -text
|
onnx/model.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d43d54a6859856fd4080f27bd01c99217fa04fc3c1d59b05046e45054e1060da
|
3 |
+
size 200635
|
onnx/model.onnx_data
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8404f2cf7e57b91060649f9a71385ef090ac3756a197fb0a4714c0818bf71ea9
|
3 |
+
size 5253914836
|
onnx/model_bnb4.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b814079a830ffa6942d9f023c308b97b39a2883f6911cff6c333910ed21d992e
|
3 |
+
size 741313259
|
onnx/model_fp16.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:987cf7de6563e88ea89e215eec9d164711d41f05a80459e8e885394fb5aa86f4
|
3 |
+
size 546701313
|
onnx/model_fp16.onnx_data
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:707a678ab9e378fb739d8574b2668b73e8b7354fb2bc275a34e97b81215c098c
|
3 |
+
size 2080374784
|
onnx/model_int8.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:187df70b79da653768efe78a8e693142357ed8230f3c3c0f418292d3b38f92ba
|
3 |
+
size 1315801598
|
onnx/model_q4.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:00e5f3de4b5329d46d90190909a5b53eac6883a616c5f973f433d60756272f3c
|
3 |
+
size 739917026
|
onnx/model_q4f16.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:35462bc13ce0af9df13a0233fb3e214cf6587ea6a1c51ab0c8fd616619f7ee1e
|
3 |
+
size 739917090
|
onnx/model_quantized.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:187df70b79da653768efe78a8e693142357ed8230f3c3c0f418292d3b38f92ba
|
3 |
+
size 1315801598
|
onnx/model_uint8.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2d59781bbbbe8ba295f17f3c70e06947fb356bf362f8b74a3c3fd4ddf31f3511
|
3 |
+
size 1315801678
|