qMTEB / quantize_torch.py
varun4's picture
removed unneded files
81e28a1
raw
history blame contribute delete
468 Bytes
import torch
from transformers import AutoModel
import os
os.environ["TRANSFORMERS_OFFLINE"] = "1" # 1 for offline
model_fp32 = AutoModel.from_pretrained("./models/all-MiniLM-L6-v2")
model_int8 = torch.ao.quantization.quantize_dynamic(
model_fp32, # the original model
{torch.nn.Linear}, # a set of layers to dynamically quantize
dtype=torch.float16)
torch.save(model_int8.state_dict(), "./models/all-MiniLM-L6-v2-unquantized-q16/pytorch_model.bin")