Spaces:
Sleeping
Sleeping
File size: 1,882 Bytes
50d8f01 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 |
import torch
import onnx
import onnxruntime as rt
from torchvision import transforms as T
from pathlib import Path
from PIL import Image
from huggingface_hub import login, hf_hub_download
import os
import gradio as gr
from utils.tokenizer_base import Tokenizer
login(os.getenv("HF_TOKEN"))
cwd = Path(__file__).parent.resolve()
model_file = os.path.join(cwd, hf_hub_download("toandev/ocr-for-captcha", "model.onnx"))
img_size = (32, 128)
vocab = r"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"
tokenizer = Tokenizer(vocab)
def to_numpy(tensor):
return (
tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()
)
def get_transform(img_size):
transforms = []
transforms.extend(
[
T.Resize(img_size, T.InterpolationMode.BICUBIC),
T.ToTensor(),
T.Normalize(0.5, 0.5),
]
)
return T.Compose(transforms)
def load_model(model_file):
transform = get_transform(img_size)
onnx_model = onnx.load(model_file)
onnx.checker.check_model(onnx_model)
s = rt.InferenceSession(model_file)
return transform, s
transform, s = load_model(model_file=model_file)
def infer(img: Image.Image):
x = transform(img.convert("RGB")).unsqueeze(0)
ort_inputs = {s.get_inputs()[0].name: to_numpy(x)}
logits = s.run(None, ort_inputs)[0]
probs = torch.tensor(logits).softmax(-1)
preds, probs = tokenizer.decode(probs)
return preds[0]
demo = gr.Interface(
infer,
gr.components.Image(type="pil"),
gr.components.Textbox(),
title="OCR for CAPTCHA",
description="Solve captchas from images including letters and numbers, success rate is about 80-90%.",
examples=[
"1.png",
"2.jpg",
"3.jpg",
"4.png",
"5.png",
],
)
demo.launch()
|