topdu commited on
Commit
1ceafe5
·
1 Parent(s): 4842f28

update app

Browse files
app.py CHANGED
@@ -1,4 +1,3 @@
1
- # -*- encoding: utf-8 -*-
2
  # @Author: OpenOCR
3
  # @Contact: 784990967@qq.com
4
  import os
@@ -12,27 +11,43 @@ import time
12
  from PIL import Image
13
  from tools.infer_e2e import OpenOCR, check_and_download_font, draw_ocr_box_txt
14
 
15
- drop_score = 0.01
16
- text_sys = OpenOCR(drop_score=drop_score)
 
 
 
 
 
 
17
  # warm up 5 times
18
  if True:
19
  img = np.random.uniform(0, 255, [640, 640, 3]).astype(np.uint8)
20
  for i in range(5):
21
  res = text_sys(img_numpy=img)
22
- font_path = './simfang.ttf'
23
- check_and_download_font(font_path)
24
 
 
 
25
 
26
  def main(input_image,
27
- rec_drop_score=0.01,
 
 
28
  mask_thresh=0.3,
29
  box_thresh=0.6,
30
  unclip_ratio=1.5,
31
  det_score_mode='slow'):
 
 
 
 
 
 
 
32
  img = input_image[:, :, ::-1]
33
  starttime = time.time()
34
  results, time_dict, mask = text_sys(img_numpy=img,
35
  return_mask=True,
 
36
  thresh=mask_thresh,
37
  box_thresh=box_thresh,
38
  unclip_ratio=unclip_ratio,
@@ -54,7 +69,6 @@ def main(input_image,
54
  mask = mask[0, 0, :, :] > mask_thresh
55
  return save_pred, elapse, draw_img, mask.astype('uint8') * 255
56
 
57
-
58
  def get_all_file_names_including_subdirs(dir_path):
59
  all_file_names = []
60
 
@@ -65,11 +79,11 @@ def get_all_file_names_including_subdirs(dir_path):
65
  file_names_only = [os.path.basename(file) for file in all_file_names]
66
  return file_names_only
67
 
68
-
69
  def list_image_paths(directory):
70
  image_extensions = ('.png', '.jpg', '.jpeg', '.gif', '.bmp', '.tiff')
71
 
72
  image_paths = []
 
73
  for root, dirs, files in os.walk(directory):
74
  for file in files:
75
  if file.lower().endswith(image_extensions):
@@ -80,14 +94,12 @@ def list_image_paths(directory):
80
  image_paths = sorted(image_paths)
81
  return image_paths
82
 
83
-
84
  def find_file_in_current_dir_and_subdirs(file_name):
85
  for root, dirs, files in os.walk('.'):
86
  if file_name in files:
87
  relative_path = os.path.join(root, file_name)
88
  return relative_path
89
 
90
-
91
  e2e_img_example = list_image_paths('./OCR_e2e_img')
92
 
93
  if __name__ == '__main__':
@@ -96,7 +108,7 @@ if __name__ == '__main__':
96
  with gr.Blocks(css=css) as demo:
97
  gr.HTML("""
98
  <h1 style='text-align: center;'><a href="https://github.com/Topdu/OpenOCR">OpenOCR</a></h1>
99
- <p style='text-align: center;'>A general OCR system with accuracy and efficiency (created by <a href="https://github.com/Topdu/OpenOCR">OCR Team</a>, <a href="https://fvl.fudan.edu.cn">FVL Lab</a>)</p>""")
100
  with gr.Row():
101
  with gr.Column(scale=1):
102
  input_image = gr.Image(label='Input image',
@@ -107,8 +119,20 @@ if __name__ == '__main__':
107
  label='Examples')
108
  downstream = gr.Button('Run')
109
 
110
- with gr.Row():
111
- with gr.Column():
 
 
 
 
 
 
 
 
 
 
 
 
112
  rec_drop_score_slider = gr.Slider(
113
  0.0,
114
  1.0,
@@ -123,7 +147,7 @@ if __name__ == '__main__':
123
  step=0.01,
124
  label="Mask Threshold",
125
  info="Mask threshold for binarizing masks, defaults to 0.3, turn it down if there is text truncation.")
126
- with gr.Column():
127
  box_thresh_slider = gr.Slider(
128
  0.0,
129
  1.0,
@@ -139,12 +163,13 @@ if __name__ == '__main__':
139
  label="Unclip Ratio",
140
  info="Expansion factor for parsing text boxes, default value is 1.5. The larger the value, the larger the text box.")
141
 
142
- det_score_mode_dropdown = gr.Dropdown(
143
- ["slow", "fast"],
144
- value="slow",
145
- label="Det Score Mode",
146
- info="The confidence calculation mode of the text box, the default is slow. Slow mode is slower but more accurate. Fast mode is faster but less accurate."
147
- )
 
148
 
149
  with gr.Column(scale=1):
150
  img_mask = gr.Image(label='mask',
@@ -159,7 +184,7 @@ if __name__ == '__main__':
159
 
160
  downstream.click(fn=main,
161
  inputs=[
162
- input_image, rec_drop_score_slider,
163
  mask_thresh_slider, box_thresh_slider,
164
  unclip_ratio_slider, det_score_mode_dropdown
165
  ],
 
 
1
  # @Author: OpenOCR
2
  # @Contact: 784990967@qq.com
3
  import os
 
11
  from PIL import Image
12
  from tools.infer_e2e import OpenOCR, check_and_download_font, draw_ocr_box_txt
13
 
14
+ def initialize_ocr(model_type, drop_score):
15
+ return OpenOCR(mode=model_type, drop_score=drop_score)
16
+
17
+ # Default model type
18
+ model_type = 'mobile'
19
+ drop_score = 0.4
20
+ text_sys = initialize_ocr(model_type, drop_score)
21
+
22
  # warm up 5 times
23
  if True:
24
  img = np.random.uniform(0, 255, [640, 640, 3]).astype(np.uint8)
25
  for i in range(5):
26
  res = text_sys(img_numpy=img)
 
 
27
 
28
+ font_path = './simfang.ttf'
29
+ font_path = check_and_download_font(font_path)
30
 
31
  def main(input_image,
32
+ model_type_select,
33
+ det_input_size_textbox=960,
34
+ rec_drop_score=0.4,
35
  mask_thresh=0.3,
36
  box_thresh=0.6,
37
  unclip_ratio=1.5,
38
  det_score_mode='slow'):
39
+ global text_sys, model_type
40
+
41
+ # Update OCR model if the model type changes
42
+ if model_type_select != model_type:
43
+ model_type = model_type_select
44
+ text_sys = initialize_ocr(model_type, rec_drop_score)
45
+
46
  img = input_image[:, :, ::-1]
47
  starttime = time.time()
48
  results, time_dict, mask = text_sys(img_numpy=img,
49
  return_mask=True,
50
+ det_input_size=int(det_input_size_textbox),
51
  thresh=mask_thresh,
52
  box_thresh=box_thresh,
53
  unclip_ratio=unclip_ratio,
 
69
  mask = mask[0, 0, :, :] > mask_thresh
70
  return save_pred, elapse, draw_img, mask.astype('uint8') * 255
71
 
 
72
  def get_all_file_names_including_subdirs(dir_path):
73
  all_file_names = []
74
 
 
79
  file_names_only = [os.path.basename(file) for file in all_file_names]
80
  return file_names_only
81
 
 
82
  def list_image_paths(directory):
83
  image_extensions = ('.png', '.jpg', '.jpeg', '.gif', '.bmp', '.tiff')
84
 
85
  image_paths = []
86
+
87
  for root, dirs, files in os.walk(directory):
88
  for file in files:
89
  if file.lower().endswith(image_extensions):
 
94
  image_paths = sorted(image_paths)
95
  return image_paths
96
 
 
97
  def find_file_in_current_dir_and_subdirs(file_name):
98
  for root, dirs, files in os.walk('.'):
99
  if file_name in files:
100
  relative_path = os.path.join(root, file_name)
101
  return relative_path
102
 
 
103
  e2e_img_example = list_image_paths('./OCR_e2e_img')
104
 
105
  if __name__ == '__main__':
 
108
  with gr.Blocks(css=css) as demo:
109
  gr.HTML("""
110
  <h1 style='text-align: center;'><a href="https://github.com/Topdu/OpenOCR">OpenOCR</a></h1>
111
+ <p style='text-align: center;'>A general OCR system with accuracy and efficiency (created by <a href="https://github.com/Topdu/OpenOCR">OCR Team</a>, <a href="https://fvl.fudan.edu.cn">FVL Lab</a>) <a href="https://github.com/Topdu/OpenOCR/tree/main?tab=readme-ov-file#quick-start">[Local Deployment]</a></p>""")
112
  with gr.Row():
113
  with gr.Column(scale=1):
114
  input_image = gr.Image(label='Input image',
 
119
  label='Examples')
120
  downstream = gr.Button('Run')
121
 
122
+ # 添加参数调节组件
123
+ with gr.Column():
124
+ with gr.Row():
125
+ det_input_size_textbox = gr.Number(
126
+ label='Detection Input Size',
127
+ value=960,
128
+ info='The longest side of the detection network input size, defaults to 960.')
129
+ det_score_mode_dropdown = gr.Dropdown(
130
+ ["slow", "fast"],
131
+ value="slow",
132
+ label="Det Score Mode",
133
+ info="The confidence calculation mode of the text box, the default is slow. Slow mode is slower but more accurate. Fast mode is faster but less accurate."
134
+ )
135
+ with gr.Row():
136
  rec_drop_score_slider = gr.Slider(
137
  0.0,
138
  1.0,
 
147
  step=0.01,
148
  label="Mask Threshold",
149
  info="Mask threshold for binarizing masks, defaults to 0.3, turn it down if there is text truncation.")
150
+ with gr.Row():
151
  box_thresh_slider = gr.Slider(
152
  0.0,
153
  1.0,
 
163
  label="Unclip Ratio",
164
  info="Expansion factor for parsing text boxes, default value is 1.5. The larger the value, the larger the text box.")
165
 
166
+ # 模型选择组件
167
+ model_type_dropdown = gr.Dropdown(
168
+ ['mobile', 'server'],
169
+ value='mobile',
170
+ label='Model Type',
171
+ info='Select the type of OCR model: high efficiency model mobile, high accuracy model server.'
172
+ )
173
 
174
  with gr.Column(scale=1):
175
  img_mask = gr.Image(label='mask',
 
184
 
185
  downstream.click(fn=main,
186
  inputs=[
187
+ input_image, model_type_dropdown, det_input_size_textbox, rec_drop_score_slider,
188
  mask_thresh_slider, box_thresh_slider,
189
  unclip_ratio_slider, det_score_mode_dropdown
190
  ],
opendet/preprocess/db_resize_for_test.py CHANGED
@@ -27,6 +27,8 @@ class DetResizeForTest(object):
27
 
28
  def __call__(self, data):
29
  img = data['image']
 
 
30
  src_h, src_w, _ = img.shape
31
  if sum([src_h, src_w]) < 64:
32
  img = self.image_padding(img)
 
27
 
28
  def __call__(self, data):
29
  img = data['image']
30
+ if 'max_sile_len' in data:
31
+ self.limit_side_len = data['max_sile_len']
32
  src_h, src_w, _ = img.shape
33
  if sum([src_h, src_w]) < 64:
34
  img = self.image_padding(img)
tools/infer_det.py CHANGED
@@ -392,6 +392,8 @@ class OpenDetector(object):
392
  img = f.read()
393
  data = {'image': img}
394
  data = self.transform(data, self.ops[:1])
 
 
395
  batch = self.transform(data, self.ops[1:])
396
 
397
  images = np.expand_dims(batch[0], axis=0)
 
392
  img = f.read()
393
  data = {'image': img}
394
  data = self.transform(data, self.ops[:1])
395
+ if kwargs.get('det_input_size', None) is not None:
396
+ data['max_sile_len'] = kwargs['det_input_size']
397
  batch = self.transform(data, self.ops[1:])
398
 
399
  images = np.expand_dims(batch[0], axis=0)