toandev commited on
Commit
bcb8d00
Β·
1 Parent(s): 809f485

Add image examples and refactor app for improved OCR functionality

Browse files
app.py CHANGED
@@ -4,7 +4,7 @@ import onnxruntime as rt
4
  from torchvision import transforms as T
5
  from pathlib import Path
6
  from PIL import Image
7
- from huggingface_hub import login, hf_hub_download
8
 
9
  import os
10
  import gradio as gr
@@ -12,24 +12,27 @@ import gradio as gr
12
  from utils.tokenizer_base import Tokenizer
13
 
14
 
15
- login(os.getenv("HF_TOKEN"))
16
-
17
  cwd = Path(__file__).parent.resolve()
18
  model_file = os.path.join(cwd, hf_hub_download("toandev/OCR-for-Captcha", "model.onnx"))
19
 
 
20
  img_size = (32, 128)
21
-
22
  vocab = r"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"
 
 
23
  tokenizer = Tokenizer(vocab)
24
 
25
 
26
  def to_numpy(tensor):
 
27
  return (
28
  tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()
29
  )
30
 
31
 
32
  def get_transform(img_size):
 
33
  transforms = []
34
  transforms.extend(
35
  [
@@ -42,6 +45,7 @@ def get_transform(img_size):
42
 
43
 
44
  def load_model(model_file):
 
45
  transform = get_transform(img_size)
46
 
47
  onnx_model = onnx.load(model_file)
@@ -51,10 +55,12 @@ def load_model(model_file):
51
  return transform, s
52
 
53
 
 
54
  transform, s = load_model(model_file=model_file)
55
 
56
 
57
- def infer(img: Image.Image):
 
58
  x = transform(img.convert("RGB")).unsqueeze(0)
59
 
60
  ort_inputs = {s.get_inputs()[0].name: to_numpy(x)}
@@ -65,19 +71,20 @@ def infer(img: Image.Image):
65
  return preds[0]
66
 
67
 
68
- demo = gr.Interface(
69
- infer,
70
- gr.components.Image(type="pil"),
71
- gr.components.Textbox(),
72
  title="OCR for CAPTCHA",
73
  description="Solve captchas from images including letters and numbers, success rate is about 80-90%.",
74
  examples=[
75
- "1.png",
76
- "2.jpg",
77
- "3.jpg",
78
- "4.png",
79
- "5.png",
80
  ],
81
  )
82
 
83
- demo.launch()
 
 
4
  from torchvision import transforms as T
5
  from pathlib import Path
6
  from PIL import Image
7
+ from huggingface_hub import hf_hub_download
8
 
9
  import os
10
  import gradio as gr
 
12
  from utils.tokenizer_base import Tokenizer
13
 
14
 
15
+ # Download the model from Hugging Face Hub
 
16
  cwd = Path(__file__).parent.resolve()
17
  model_file = os.path.join(cwd, hf_hub_download("toandev/OCR-for-Captcha", "model.onnx"))
18
 
19
+ # Define the image size and vocabulary
20
  img_size = (32, 128)
 
21
  vocab = r"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"
22
+
23
+ # Initialize the tokenizer
24
  tokenizer = Tokenizer(vocab)
25
 
26
 
27
  def to_numpy(tensor):
28
+ """Convert tensor to numpy."""
29
  return (
30
  tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()
31
  )
32
 
33
 
34
  def get_transform(img_size):
35
+ """Preprocess the input image."""
36
  transforms = []
37
  transforms.extend(
38
  [
 
45
 
46
 
47
  def load_model(model_file):
48
+ """Load the model and return the transform function."""
49
  transform = get_transform(img_size)
50
 
51
  onnx_model = onnx.load(model_file)
 
55
  return transform, s
56
 
57
 
58
+ # Load the model
59
  transform, s = load_model(model_file=model_file)
60
 
61
 
62
+ def process(img: Image.Image):
63
+ """Predict the text from the input image."""
64
  x = transform(img.convert("RGB")).unsqueeze(0)
65
 
66
  ort_inputs = {s.get_inputs()[0].name: to_numpy(x)}
 
71
  return preds[0]
72
 
73
 
74
+ iface = gr.Interface(
75
+ process,
76
+ gr.Image(type="pil", label="Input Image"),
77
+ gr.Textbox(label="Predicted Text"),
78
  title="OCR for CAPTCHA",
79
  description="Solve captchas from images including letters and numbers, success rate is about 80-90%.",
80
  examples=[
81
+ "examples/1.png",
82
+ "examples/2.jpg",
83
+ "examples/3.jpg",
84
+ "examples/4.png",
85
+ "examples/5.png",
86
  ],
87
  )
88
 
89
+ if __name__ == "__main__":
90
+ iface.launch()
1.png β†’ examples/1.png RENAMED
File without changes
2.jpg β†’ examples/2.jpg RENAMED
File without changes
3.jpg β†’ examples/3.jpg RENAMED
File without changes
4.png β†’ examples/4.png RENAMED
File without changes
5.png β†’ examples/5.png RENAMED
File without changes