Spaces:

topdu
/

OpenOCR-Demo

Running

App Files Files Community

topdu commited on 26 days ago

Commit

1ceafe5

1 Parent(s): 4842f28

update app

Browse files

Files changed (3) hide show

app.py +46 -21
opendet/preprocess/db_resize_for_test.py +2 -0
tools/infer_det.py +2 -0

app.py CHANGED Viewed

@@ -1,4 +1,3 @@
-# -*- encoding: utf-8 -*-
 # @Author: OpenOCR
 # @Contact: 784990967@qq.com
 import os
@@ -12,27 +11,43 @@ import time
 from PIL import Image
 from tools.infer_e2e import OpenOCR, check_and_download_font, draw_ocr_box_txt
-drop_score = 0.01
-text_sys = OpenOCR(drop_score=drop_score)
 # warm up 5 times
 if True:
     img = np.random.uniform(0, 255, [640, 640, 3]).astype(np.uint8)
     for i in range(5):
         res = text_sys(img_numpy=img)
-font_path = './simfang.ttf'
-check_and_download_font(font_path)
 def main(input_image,
-         rec_drop_score=0.01,
          mask_thresh=0.3,
          box_thresh=0.6,
          unclip_ratio=1.5,
          det_score_mode='slow'):
     img = input_image[:, :, ::-1]
     starttime = time.time()
     results, time_dict, mask = text_sys(img_numpy=img,
                                         return_mask=True,
                                         thresh=mask_thresh,
                                         box_thresh=box_thresh,
                                         unclip_ratio=unclip_ratio,
@@ -54,7 +69,6 @@ def main(input_image,
     mask = mask[0, 0, :, :] > mask_thresh
     return save_pred, elapse, draw_img, mask.astype('uint8') * 255
 def get_all_file_names_including_subdirs(dir_path):
     all_file_names = []
@@ -65,11 +79,11 @@ def get_all_file_names_including_subdirs(dir_path):
     file_names_only = [os.path.basename(file) for file in all_file_names]
     return file_names_only
 def list_image_paths(directory):
     image_extensions = ('.png', '.jpg', '.jpeg', '.gif', '.bmp', '.tiff')
     image_paths = []
     for root, dirs, files in os.walk(directory):
         for file in files:
             if file.lower().endswith(image_extensions):
@@ -80,14 +94,12 @@ def list_image_paths(directory):
     image_paths = sorted(image_paths)
     return image_paths
 def find_file_in_current_dir_and_subdirs(file_name):
     for root, dirs, files in os.walk('.'):
         if file_name in files:
             relative_path = os.path.join(root, file_name)
             return relative_path
 e2e_img_example = list_image_paths('./OCR_e2e_img')
 if __name__ == '__main__':
@@ -96,7 +108,7 @@ if __name__ == '__main__':
     with gr.Blocks(css=css) as demo:
         gr.HTML("""
                 <h1 style='text-align: center;'><a href="https://github.com/Topdu/OpenOCR">OpenOCR</a></h1>
-                <p style='text-align: center;'>A general OCR system with accuracy and efficiency (created by <a href="https://github.com/Topdu/OpenOCR">OCR Team</a>, <a href="https://fvl.fudan.edu.cn">FVL Lab</a>)</p>""")
         with gr.Row():
             with gr.Column(scale=1):
                 input_image = gr.Image(label='Input image',
@@ -107,8 +119,20 @@ if __name__ == '__main__':
                                        label='Examples')
                 downstream = gr.Button('Run')
-                with gr.Row():
-                    with gr.Column():
                         rec_drop_score_slider = gr.Slider(
                             0.0,
                             1.0,
@@ -123,7 +147,7 @@ if __name__ == '__main__':
                             step=0.01,
                             label="Mask Threshold",
                             info="Mask threshold for binarizing masks, defaults to 0.3, turn it down if there is text truncation.")
-                    with gr.Column():
                         box_thresh_slider = gr.Slider(
                             0.0,
                             1.0,
@@ -139,12 +163,13 @@ if __name__ == '__main__':
                             label="Unclip Ratio",
                             info="Expansion factor for parsing text boxes, default value is 1.5. The larger the value, the larger the text box.")
-                det_score_mode_dropdown = gr.Dropdown(
-                    ["slow", "fast"],
-                    value="slow",
-                    label="Det Score Mode",
-                    info="The confidence calculation mode of the text box, the default is slow. Slow mode is slower but more accurate. Fast mode is faster but less accurate."
-                )
             with gr.Column(scale=1):
                 img_mask = gr.Image(label='mask',
@@ -159,7 +184,7 @@ if __name__ == '__main__':
             downstream.click(fn=main,
                              inputs=[
-                                 input_image, rec_drop_score_slider,
                                  mask_thresh_slider, box_thresh_slider,
                                  unclip_ratio_slider, det_score_mode_dropdown
                              ],

 # @Author: OpenOCR
 # @Contact: 784990967@qq.com
 import os
 from PIL import Image
 from tools.infer_e2e import OpenOCR, check_and_download_font, draw_ocr_box_txt
+def initialize_ocr(model_type, drop_score):
+    return OpenOCR(mode=model_type, drop_score=drop_score)
+# Default model type
+model_type = 'mobile'
+drop_score = 0.4
+text_sys = initialize_ocr(model_type, drop_score)
 # warm up 5 times
 if True:
     img = np.random.uniform(0, 255, [640, 640, 3]).astype(np.uint8)
     for i in range(5):
         res = text_sys(img_numpy=img)
+font_path = './simfang.ttf'
+font_path = check_and_download_font(font_path)
 def main(input_image,
+         model_type_select,
+         det_input_size_textbox=960,
+         rec_drop_score=0.4,
          mask_thresh=0.3,
          box_thresh=0.6,
          unclip_ratio=1.5,
          det_score_mode='slow'):
+    global text_sys, model_type
+    # Update OCR model if the model type changes
+    if model_type_select != model_type:
+        model_type = model_type_select
+        text_sys = initialize_ocr(model_type, rec_drop_score)
     img = input_image[:, :, ::-1]
     starttime = time.time()
     results, time_dict, mask = text_sys(img_numpy=img,
                                         return_mask=True,
+                                        det_input_size=int(det_input_size_textbox),
                                         thresh=mask_thresh,
                                         box_thresh=box_thresh,
                                         unclip_ratio=unclip_ratio,
     mask = mask[0, 0, :, :] > mask_thresh
     return save_pred, elapse, draw_img, mask.astype('uint8') * 255
 def get_all_file_names_including_subdirs(dir_path):
     all_file_names = []
     file_names_only = [os.path.basename(file) for file in all_file_names]
     return file_names_only
 def list_image_paths(directory):
     image_extensions = ('.png', '.jpg', '.jpeg', '.gif', '.bmp', '.tiff')
     image_paths = []
     for root, dirs, files in os.walk(directory):
         for file in files:
             if file.lower().endswith(image_extensions):
     image_paths = sorted(image_paths)
     return image_paths
 def find_file_in_current_dir_and_subdirs(file_name):
     for root, dirs, files in os.walk('.'):
         if file_name in files:
             relative_path = os.path.join(root, file_name)
             return relative_path
 e2e_img_example = list_image_paths('./OCR_e2e_img')
 if __name__ == '__main__':
     with gr.Blocks(css=css) as demo:
         gr.HTML("""
                 <h1 style='text-align: center;'><a href="https://github.com/Topdu/OpenOCR">OpenOCR</a></h1>
+                <p style='text-align: center;'>A general OCR system with accuracy and efficiency (created by <a href="https://github.com/Topdu/OpenOCR">OCR Team</a>, <a href="https://fvl.fudan.edu.cn">FVL Lab</a>) <a href="https://github.com/Topdu/OpenOCR/tree/main?tab=readme-ov-file#quick-start">[Local Deployment]</a></p>""")
         with gr.Row():
             with gr.Column(scale=1):
                 input_image = gr.Image(label='Input image',
                                        label='Examples')
                 downstream = gr.Button('Run')
+                # 添加参数调节组件
+                with gr.Column():
+                    with gr.Row():
+                        det_input_size_textbox = gr.Number(
+                            label='Detection Input Size',
+                            value=960,
+                            info='The longest side of the detection network input size, defaults to 960.')
+                        det_score_mode_dropdown = gr.Dropdown(
+                            ["slow", "fast"],
+                            value="slow",
+                            label="Det Score Mode",
+                            info="The confidence calculation mode of the text box, the default is slow. Slow mode is slower but more accurate. Fast mode is faster but less accurate."
+                        )
+                    with gr.Row():
                         rec_drop_score_slider = gr.Slider(
                             0.0,
                             1.0,
                             step=0.01,
                             label="Mask Threshold",
                             info="Mask threshold for binarizing masks, defaults to 0.3, turn it down if there is text truncation.")
+                    with gr.Row():
                         box_thresh_slider = gr.Slider(
                             0.0,
                             1.0,
                             label="Unclip Ratio",
                             info="Expansion factor for parsing text boxes, default value is 1.5. The larger the value, the larger the text box.")
+                    # 模型选择组件
+                    model_type_dropdown = gr.Dropdown(
+                        ['mobile', 'server'],
+                        value='mobile',
+                        label='Model Type',
+                        info='Select the type of OCR model: high efficiency model mobile, high accuracy model server.'
+                    )
             with gr.Column(scale=1):
                 img_mask = gr.Image(label='mask',
             downstream.click(fn=main,
                              inputs=[
+                                 input_image, model_type_dropdown, det_input_size_textbox, rec_drop_score_slider,
                                  mask_thresh_slider, box_thresh_slider,
                                  unclip_ratio_slider, det_score_mode_dropdown
                              ],

opendet/preprocess/db_resize_for_test.py CHANGED Viewed

@@ -27,6 +27,8 @@ class DetResizeForTest(object):
     def __call__(self, data):
         img = data['image']
         src_h, src_w, _ = img.shape
         if sum([src_h, src_w]) < 64:
             img = self.image_padding(img)

     def __call__(self, data):
         img = data['image']
+        if 'max_sile_len' in data:
+            self.limit_side_len = data['max_sile_len']
         src_h, src_w, _ = img.shape
         if sum([src_h, src_w]) < 64:
             img = self.image_padding(img)

tools/infer_det.py CHANGED Viewed

@@ -392,6 +392,8 @@ class OpenDetector(object):
                     img = f.read()
                     data = {'image': img}
                 data = self.transform(data, self.ops[:1])
             batch = self.transform(data, self.ops[1:])
             images = np.expand_dims(batch[0], axis=0)

                     img = f.read()
                     data = {'image': img}
                 data = self.transform(data, self.ops[:1])
+            if kwargs.get('det_input_size', None) is not None:
+                data['max_sile_len'] = kwargs['det_input_size']
             batch = self.transform(data, self.ops[1:])
             images = np.expand_dims(batch[0], axis=0)