rawsh-rubrik
commited on
Commit
•
ddb01c4
1
Parent(s):
3c6cd1b
quant
Browse files- .gitignore +2 -0
- README.md +12 -1
- model_quantized.onnx +3 -0
- pyproject.toml +18 -0
- quantize.py +11 -0
.gitignore
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
poetry.lock
|
2 |
+
|
README.md
CHANGED
@@ -5,6 +5,17 @@ language:
|
|
5 |
tags:
|
6 |
- ColBERT
|
7 |
---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
<p align="center">
|
9 |
<img align="center" src="docs/images/colbertofficial.png" width="430px" />
|
10 |
</p>
|
@@ -212,4 +223,4 @@ http://localhost:8893/api/search?query=Who won the 2022 FIFA world cup&k=25
|
|
212 |
|
213 |
## Acknowledgments
|
214 |
|
215 |
-
ColBERT logo designed by Chuyi Zhang.
|
|
|
5 |
tags:
|
6 |
- ColBERT
|
7 |
---
|
8 |
+
|
9 |
+
|
10 |
+
```
|
11 |
+
python -m onnxruntime.transformers.optimizer --input model.onnx --output model.ops.onnx
|
12 |
+
python -m onnxruntime.quantization.preprocess --skip_symbolic_shape 1 --input model.ops.onnx --output model.ops-infer.onnx
|
13 |
+
|
14 |
+
```
|
15 |
+
|
16 |
+
|
17 |
+
----
|
18 |
+
|
19 |
<p align="center">
|
20 |
<img align="center" src="docs/images/colbertofficial.png" width="430px" />
|
21 |
</p>
|
|
|
223 |
|
224 |
## Acknowledgments
|
225 |
|
226 |
+
ColBERT logo designed by Chuyi Zhang.
|
model_quantized.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:477ae7fb6f1baa0006d1e348431cfa4ab3528c78e3951f51a91eb665b1dae000
|
3 |
+
size 109775105
|
pyproject.toml
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[tool.poetry]
|
2 |
+
name = "colbertv2-0"
|
3 |
+
version = "0.1.0"
|
4 |
+
description = ""
|
5 |
+
authors = ["rawsh <rawashbourne@gmail.com>"]
|
6 |
+
readme = "README.md"
|
7 |
+
packages = [{include = "colbertv2"}]
|
8 |
+
|
9 |
+
[tool.poetry.dependencies]
|
10 |
+
python = "^3.12"
|
11 |
+
onnxruntime = "^1.17.1"
|
12 |
+
onnx = "^1.15.0"
|
13 |
+
torch = "^2.2.1"
|
14 |
+
|
15 |
+
|
16 |
+
[build-system]
|
17 |
+
requires = ["poetry-core"]
|
18 |
+
build-backend = "poetry.core.masonry.api"
|
quantize.py
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
def quantize_onnx_model(onnx_model_path, quantized_model_path):
|
2 |
+
from onnxruntime.quantization import quantize_dynamic, QuantType
|
3 |
+
import onnx
|
4 |
+
onnx_opt_model = onnx.load(onnx_model_path)
|
5 |
+
quantize_dynamic(onnx_model_path,
|
6 |
+
quantized_model_path,
|
7 |
+
weight_type=QuantType.QInt8)
|
8 |
+
|
9 |
+
print(f"quantized model saved to:{quantized_model_path}")
|
10 |
+
|
11 |
+
quantize_onnx_model("model.ops-infer.onnx", "model_quantized.onnx")
|