File size: 1,882 Bytes
50d8f01
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
import torch
import onnx
import onnxruntime as rt
from torchvision import transforms as T
from pathlib import Path
from PIL import Image
from huggingface_hub import login, hf_hub_download

import os
import gradio as gr

from utils.tokenizer_base import Tokenizer


login(os.getenv("HF_TOKEN"))

cwd = Path(__file__).parent.resolve()
model_file = os.path.join(cwd, hf_hub_download("toandev/ocr-for-captcha", "model.onnx"))

img_size = (32, 128)

vocab = r"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"
tokenizer = Tokenizer(vocab)


def to_numpy(tensor):
    return (
        tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()
    )


def get_transform(img_size):
    transforms = []
    transforms.extend(
        [
            T.Resize(img_size, T.InterpolationMode.BICUBIC),
            T.ToTensor(),
            T.Normalize(0.5, 0.5),
        ]
    )
    return T.Compose(transforms)


def load_model(model_file):
    transform = get_transform(img_size)

    onnx_model = onnx.load(model_file)
    onnx.checker.check_model(onnx_model)

    s = rt.InferenceSession(model_file)
    return transform, s


transform, s = load_model(model_file=model_file)


def infer(img: Image.Image):
    x = transform(img.convert("RGB")).unsqueeze(0)

    ort_inputs = {s.get_inputs()[0].name: to_numpy(x)}
    logits = s.run(None, ort_inputs)[0]
    probs = torch.tensor(logits).softmax(-1)
    preds, probs = tokenizer.decode(probs)

    return preds[0]


demo = gr.Interface(
    infer,
    gr.components.Image(type="pil"),
    gr.components.Textbox(),
    title="OCR for CAPTCHA",
    description="Solve captchas from images including letters and numbers, success rate is about 80-90%.",
    examples=[
        "1.png",
        "2.jpg",
        "3.jpg",
        "4.png",
        "5.png",
    ],
)

demo.launch()