Spaces:
Running
Running
import torch | |
import onnx | |
import onnxruntime as rt | |
from torchvision import transforms as T | |
from pathlib import Path | |
from PIL import Image | |
from huggingface_hub import login, hf_hub_download | |
import os | |
import gradio as gr | |
from utils.tokenizer_base import Tokenizer | |
login(os.getenv("HF_TOKEN")) | |
cwd = Path(__file__).parent.resolve() | |
model_file = os.path.join(cwd, hf_hub_download("toandev/ocr-for-captcha", "model.onnx")) | |
img_size = (32, 128) | |
vocab = r"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~" | |
tokenizer = Tokenizer(vocab) | |
def to_numpy(tensor): | |
return ( | |
tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy() | |
) | |
def get_transform(img_size): | |
transforms = [] | |
transforms.extend( | |
[ | |
T.Resize(img_size, T.InterpolationMode.BICUBIC), | |
T.ToTensor(), | |
T.Normalize(0.5, 0.5), | |
] | |
) | |
return T.Compose(transforms) | |
def load_model(model_file): | |
transform = get_transform(img_size) | |
onnx_model = onnx.load(model_file) | |
onnx.checker.check_model(onnx_model) | |
s = rt.InferenceSession(model_file) | |
return transform, s | |
transform, s = load_model(model_file=model_file) | |
def infer(img: Image.Image): | |
x = transform(img.convert("RGB")).unsqueeze(0) | |
ort_inputs = {s.get_inputs()[0].name: to_numpy(x)} | |
logits = s.run(None, ort_inputs)[0] | |
probs = torch.tensor(logits).softmax(-1) | |
preds, probs = tokenizer.decode(probs) | |
return preds[0] | |
demo = gr.Interface( | |
infer, | |
gr.components.Image(type="pil"), | |
gr.components.Textbox(), | |
title="OCR for CAPTCHA", | |
description="Solve captchas from images including letters and numbers, success rate is about 80-90%.", | |
examples=[ | |
"1.png", | |
"2.jpg", | |
"3.jpg", | |
"4.png", | |
"5.png", | |
], | |
) | |
demo.launch() | |