rawsh-rubrik commited on
Commit
ddb01c4
1 Parent(s): 3c6cd1b
Files changed (5) hide show
  1. .gitignore +2 -0
  2. README.md +12 -1
  3. model_quantized.onnx +3 -0
  4. pyproject.toml +18 -0
  5. quantize.py +11 -0
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ poetry.lock
2
+
README.md CHANGED
@@ -5,6 +5,17 @@ language:
5
  tags:
6
  - ColBERT
7
  ---
 
 
 
 
 
 
 
 
 
 
 
8
  <p align="center">
9
  <img align="center" src="docs/images/colbertofficial.png" width="430px" />
10
  </p>
@@ -212,4 +223,4 @@ http://localhost:8893/api/search?query=Who won the 2022 FIFA world cup&k=25
212
 
213
  ## Acknowledgments
214
 
215
- ColBERT logo designed by Chuyi Zhang.
 
5
  tags:
6
  - ColBERT
7
  ---
8
+
9
+
10
+ ```
11
+ python -m onnxruntime.transformers.optimizer --input model.onnx --output model.ops.onnx
12
+ python -m onnxruntime.quantization.preprocess --skip_symbolic_shape 1 --input model.ops.onnx --output model.ops-infer.onnx
13
+
14
+ ```
15
+
16
+
17
+ ----
18
+
19
  <p align="center">
20
  <img align="center" src="docs/images/colbertofficial.png" width="430px" />
21
  </p>
 
223
 
224
  ## Acknowledgments
225
 
226
+ ColBERT logo designed by Chuyi Zhang.
model_quantized.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:477ae7fb6f1baa0006d1e348431cfa4ab3528c78e3951f51a91eb665b1dae000
3
+ size 109775105
pyproject.toml ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [tool.poetry]
2
+ name = "colbertv2-0"
3
+ version = "0.1.0"
4
+ description = ""
5
+ authors = ["rawsh <rawashbourne@gmail.com>"]
6
+ readme = "README.md"
7
+ packages = [{include = "colbertv2"}]
8
+
9
+ [tool.poetry.dependencies]
10
+ python = "^3.12"
11
+ onnxruntime = "^1.17.1"
12
+ onnx = "^1.15.0"
13
+ torch = "^2.2.1"
14
+
15
+
16
+ [build-system]
17
+ requires = ["poetry-core"]
18
+ build-backend = "poetry.core.masonry.api"
quantize.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def quantize_onnx_model(onnx_model_path, quantized_model_path):
2
+ from onnxruntime.quantization import quantize_dynamic, QuantType
3
+ import onnx
4
+ onnx_opt_model = onnx.load(onnx_model_path)
5
+ quantize_dynamic(onnx_model_path,
6
+ quantized_model_path,
7
+ weight_type=QuantType.QInt8)
8
+
9
+ print(f"quantized model saved to:{quantized_model_path}")
10
+
11
+ quantize_onnx_model("model.ops-infer.onnx", "model_quantized.onnx")