quant

Files changed (5) hide show

.gitignore ADDED Viewed

README.md CHANGED Viewed

@@ -5,6 +5,17 @@ language:
 tags:
 - ColBERT
 ---
 <p align="center">
   <img align="center" src="docs/images/colbertofficial.png" width="430px" />
 </p>
@@ -212,4 +223,4 @@ http://localhost:8893/api/search?query=Who won the 2022 FIFA world cup&k=25
 ## Acknowledgments
-ColBERT logo designed by Chuyi Zhang.

 tags:
 - ColBERT
 ---
+```
+python -m onnxruntime.transformers.optimizer --input model.onnx --output model.ops.onnx
+python -m onnxruntime.quantization.preprocess --skip_symbolic_shape 1 --input model.ops.onnx --output model.ops-infer.onnx
+```
+----
 <p align="center">
   <img align="center" src="docs/images/colbertofficial.png" width="430px" />
 </p>
 ## Acknowledgments
+ColBERT logo designed by Chuyi Zhang.

model_quantized.onnx ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:477ae7fb6f1baa0006d1e348431cfa4ab3528c78e3951f51a91eb665b1dae000
+size 109775105

pyproject.toml ADDED Viewed

+[tool.poetry]
+name = "colbertv2-0"
+version = "0.1.0"
+description = ""
+authors = ["rawsh <rawashbourne@gmail.com>"]
+readme = "README.md"
+packages = [{include = "colbertv2"}]
+[tool.poetry.dependencies]
+python = "^3.12"
+onnxruntime = "^1.17.1"
+onnx = "^1.15.0"
+torch = "^2.2.1"
+[build-system]
+requires = ["poetry-core"]
+build-backend = "poetry.core.masonry.api"

quantize.py ADDED Viewed

+def quantize_onnx_model(onnx_model_path, quantized_model_path):
+    from onnxruntime.quantization import quantize_dynamic, QuantType
+    import onnx
+    onnx_opt_model = onnx.load(onnx_model_path)
+    quantize_dynamic(onnx_model_path,
+                     quantized_model_path,
+                     weight_type=QuantType.QInt8)
+    print(f"quantized model saved to:{quantized_model_path}")
+quantize_onnx_model("model.ops-infer.onnx", "model_quantized.onnx")