Spaces:

breezedeus
/

Pix2Text-Demo

Running

App Files Files Community

breezedeus commited on Jan 27, 2024

Commit

5bedb5a

1 Parent(s): 33781f7

new gradio app for p2t v1.0

Browse files

Files changed (5) hide show

README.md +16 -4
app.py +245 -0
languages.yaml +84 -0
packages.txt +5 -0
requirements.txt +2 -0

README.md CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
-title: Pix2Text Demo
-emoji: 🏢
-colorFrom: blue
 colorTo: blue
 sdk: gradio
 sdk_version: 4.16.0
@@ -10,4 +10,16 @@ pinned: false
 license: mit
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Pix2Text
+emoji: 🅿❷🆃
+colorFrom: red
 colorTo: blue
 sdk: gradio
 sdk_version: 4.16.0
 license: mit
 ---
+# Pix2Text (P2T)
+[**CnOCR**](https://github.com/breezedeus/cnocr)  is an **Optical Character Recognition (OCR)** toolkit for **Python 3**. It supports recognition of common characters in **English and numbers**, **Simplified Chinese**, **Traditional Chinese** (some models), and **vertical text** recognition. It comes with [**20+ well-trained models**](https://cnocr.readthedocs.io/zh/latest/models/) for different application scenarios and can be used directly after installation. Also, CnOCR provides simple training [commands](https://cnocr.readthedocs.io/zh/latest/train/) for users to train their own models. Welcome to join the WeChat contact group.
+<div align="center">
+  <img src="https://huggingface.co/datasets/breezedeus/cnocr-wx-qr-code/resolve/main/wx-qr-code.JPG" alt="WeChat Group" width="300px"/>
+</div>
+The author also maintains **Planet of Knowledge** [**CnOCR/CnSTD Private Group**](https://t.zsxq.com/FEYZRJQ), welcome to join. The **Planet of Knowledge Private Group** will release some CnOCR/CnSTD related private materials one after another, including [**more detailed training tutorials**](https://articles.zsxq.com/id_u6b4u0wrf46e.html), **non-public models**, answers to problems encountered during usage, etc. This group also releases the latest research materials related to OCR/STD. In addition, **the author in the private group provides free training services for unique data twice a month**.
+## Documentation
+See [CnOCR online documentation](https://cnocr.readthedocs.io/) , in Chinese.

app.py ADDED Viewed

	@@ -0,0 +1,245 @@

+# coding: utf-8
+# Copyright (C) 2023, [Breezedeus](https://github.com/breezedeus).
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# Ref: https://huggingface.co/spaces/hysts/Manga-OCR/blob/main/app.py
+import os
+import json
+import functools
+import random
+import string
+import time
+import yaml
+import gradio as gr
+import numpy as np
+# from cnstd.utils import pil_to_numpy, imsave
+from pix2text import Pix2Text
+from pix2text.utils import set_logger, merge_line_texts
+logger = set_logger()
+LANGUAGES = yaml.safe_load(open('languages.yaml', 'r', encoding='utf-8'))['languages']
+def get_p2t_model(lan_list: list):
+    p2t = Pix2Text(languages=lan_list)
+    return p2t
+def latex_render(latex_str):
+    return f"$$\n{latex_str}\n$$"
+    # return latex_str
+def recognize(lang_list, rec_type, resized_shape, image_file):
+    lang_list = [LANGUAGES[l] for l in lang_list]
+    p2t = get_p2t_model(lang_list)
+    if rec_type == 'Formula & Text':
+        suffix = list(string.ascii_letters)
+        random.shuffle(suffix)
+        suffix = ''.join(suffix[:6])
+        out_det_fp = f'out-det-{time.time()}-{suffix}.jpg'
+        outs = p2t(
+            image_file, resized_shape=resized_shape, save_analysis_res=out_det_fp
+        )
+        # To get just the text contents, use:
+        only_text = merge_line_texts(outs, auto_line_break=True)
+        # return only_text, latex_render(only_text)
+        return only_text, out_det_fp
+    elif rec_type == 'Only Formula':
+        only_text = p2t.recognize_formula(image_file)
+        return latex_render(only_text), None
+    elif rec_type == 'Only Text':
+        only_text = p2t.recognize_text(image_file)
+        return only_text, None
+def main():
+    langs = list(LANGUAGES.keys())
+    langs.sort(key=lambda x: x.lower())
+    title = 'Demo'
+    # example_func = functools.partial(
+    #     recognize,
+    #     new_size=768,
+    #     box_score_thresh=0.3,
+    #     min_box_size=10,
+    # )
+    # examples = [
+    #     [
+    #         'ch_PP-OCRv3_det::onnx',
+    #         True,
+    #         'number-densenet_lite_136-fc',
+    #         False,
+    #         'docs/examples/card1-s.jpg',
+    #     ],
+    #     [
+    #         'ch_PP-OCRv3_det::onnx',
+    #         True,
+    #         'number-densenet_lite_136-fc',
+    #         False,
+    #         'docs/examples/card2-s.jpg',
+    #     ],
+    #     [
+    #         'ch_PP-OCRv3_det::onnx',
+    #         True,
+    #         'number-densenet_lite_136-fc',
+    #         False,
+    #         'docs/examples/cy1-s.jpg',
+    #     ],
+    #     [
+    #         'ch_PP-OCRv3_det::onnx',
+    #         False,
+    #         'densenet_lite_136-gru',
+    #         False,
+    #         'docs/examples/huochepiao.jpeg',
+    #     ],
+    #     [
+    #         'ch_PP-OCRv3_det::onnx',
+    #         False,
+    #         'densenet_lite_136-gru',
+    #         False,
+    #         'docs/examples/1_res.jpg',
+    #     ],
+    #     [
+    #         'db_shufflenet_v2::pytorch',
+    #         False,
+    #         'en_number_mobile_v2.0',
+    #         False,
+    #         'docs/examples/en_book1.jpeg',
+    #     ],
+    #     [
+    #         'db_shufflenet_v2::pytorch',
+    #         False,
+    #         'densenet_lite_136-gru',
+    #         True,
+    #         'docs/examples/beauty0.jpg',
+    #     ],
+    # ]
+    table_desc = """
+<div align="center">
+<img src="https://www.notion.so/image/https%3A%2F%2Fprod-files-secure.s3.us-west-2.amazonaws.com%2F9341931a-53f0-48e1-b026-0f1ad17b457c%2Fc41e0b1d-4869-4e39-93db-631569e6a38d%2FUntitled.png?table=block&id=3d0819ca-2e1a-46a7-b6f3-b4cf89cd045c" width="120px"/>
+[![Visitors](https://api.visitorbadge.io/api/visitors?path=https%3A%2F%2Fhuggingface.co%2Fspaces%2Fbreezedeus%2FCnOCR-Demo&labelColor=%23697689&countColor=%23f5c791&style=flat&labelStyle=upper)](https://visitorbadge.io/status?path=https%3A%2F%2Fhuggingface.co%2Fspaces%2Fbreezedeus%2FCnOCR-Demo)
+[![Discord](https://img.shields.io/discord/1200765964434821260?logo=discord&label=Discord)](https://discord.gg/H9FmDSMA)
+|                                 |                                         |
+| ------------------------------- | --------------------------------------- |
+| 🏄 **Free Web Service**             | [p2t.breezedeus.com](https://p2t.breezedeus.com) |
+| 📀 **Code**              | [Github](https://github.com/breezedeus/pix2text) |
+| 💬 **Discord**              | [P2T @ Discord](https://discord.gg/H9FmDSMA) |
+| 👨🏻‍💻 **Author**            | [Breezedeus](https://www.breezedeus.com) |
+If useful, please help to **star 🌟 [Pix2Text](https://github.com/breezedeus/pix2text)** 🙏
+</div>
+    """
+    with gr.Blocks() as demo:
+        gr.HTML(
+            f'<h1 style="text-align: center; margin-bottom: 1rem;"><a href="https://github.com/breezedeus/pix2text" target="_blank">Pix2Text V1.0</a> {title}</h1>'
+        )
+        with gr.Row(equal_height=False):
+            with gr.Column(min_width=200, variant='panel', scale=3):
+                gr.Markdown('### Settings')
+                lang_list = gr.Dropdown(
+                    label='Text Languages',
+                    choices=langs,
+                    value=['English', 'Chinese Simplified'],
+                    multiselect=True,
+                    info='Which languages to be recognized as Texts.',
+                )
+                rec_type = gr.Radio(
+                    choices=['Formula & Text', 'Only Formula', 'Only Text'],
+                    label='Image Type',
+                    value='Formula & Text',
+                    info='Which type of image to be recognized.',
+                )
+                resized_shape = gr.Slider(
+                    label='resized_shape',
+                    minimum=512,
+                    maximum=2048,
+                    value=608,
+                    step=32,
+                )
+                # with gr.Accordion('Choose Text Languages', open=False):
+                #     lang_list = gr.Checkboxgroup(
+                #         label='Text Languages',
+                #         choices=langs,
+                #         value=['English', 'Chinese Simplified'],
+                #     )
+            with gr.Column(scale=6, variant='compact'):
+                gr.Markdown('### Upload Image to be Recognized')
+                image_file = gr.Image(label='Image', type="pil", image_mode='RGB', show_label=False)
+                sub_btn = gr.Button("Submit", variant="primary")
+            with gr.Column(scale=2, variant='compact'):
+                gr.Markdown(table_desc)
+        with gr.Row(equal_height=False):
+            with gr.Column(scale=1, variant='compact'):
+                gr.Markdown('**Detection Result**')
+                det_result = gr.Image(
+                    label='Detection Result', scale=1, show_label=False
+                )
+            with gr.Column(scale=1, variant='compact'):
+                gr.Markdown('**Recognition Result**')
+                rec_result = gr.Textbox(
+                    label=f'Recognition Result',
+                    lines=5,
+                    value='',
+                    scale=1,
+                    show_label=False,
+                    show_copy_button=True,
+                )
+            # render_result = gr.Markdown(label=f'After Rendering', value='')
+            # rec_result.change(latex_render, rec_result, render_result)
+        sub_btn.click(
+            recognize,
+            inputs=[lang_list, rec_type, resized_shape, image_file,],
+            outputs=[rec_result, det_result],
+        )
+        # gr.Examples(
+        #     label='示例',
+        #     examples=examples,
+        #     inputs=[
+        #         det_model_name,
+        #         is_single_line,
+        #         rec_model_name,
+        #         use_angle_clf,
+        #         image_file,
+        #     ],
+        #     outputs=[out_image, naive_warn, out_texts],
+        #     fn=example_func,
+        #     cache_examples=os.getenv('CACHE_EXAMPLES') == '1',
+        # )
+    demo.queue(max_size=10)
+    demo.launch()
+if __name__ == '__main__':
+    main()

languages.yaml ADDED Viewed

	@@ -0,0 +1,84 @@

+languages:
+  Abaza: abq
+  Adyghe: ady
+  Afrikaans: af
+  Albanian: sq
+  Angika: ang
+  Arabic: ar
+  Assamese: as
+  Avar: ava
+  Azerbaijani: az
+  Belarusian: be
+  Bengali: bn
+  Bhojpuri: bho
+  Bihari: bh
+  Bosnian: bs
+  Bulgarian: bg
+  Chechen: che
+  Croatian: hr
+  Czech: cs
+  Danish: da
+  Dargwa: dar
+  Dutch: nl
+  English: en
+  Estonian: et
+  French: fr
+  German: de
+  Goan Konkani: gom
+  Hindi: hi
+  Hungarian: hu
+  Icelandic: is
+  Indonesian: id
+  Ingush: inh
+  Irish: ga
+  Italian: it
+  Japanese: ja
+  Kabardian: kbd
+  Kannada: kn
+  Korean: ko
+  Kurdish: ku
+  Lak: lbe
+  Latin: la
+  Latvian: lv
+  Lezghian: lez
+  Lithuanian: lt
+  Magahi: mah
+  Maithili: mai
+  Malay: ms
+  Maltese: mt
+  Maori: mi
+  Marathi: mr
+  Mongolian: mn
+  Nagpuri: sck
+  Nepali: ne
+  Newari: new
+  Norwegian: 'no'
+  Occitan: oc
+  Pali: pi
+  Persian (Farsi): fa
+  Polish: pl
+  Portuguese: pt
+  Romanian: ro
+  Russian: ru
+  Serbian (cyrillic): rs_cyrillic
+  Serbian (latin): rs_latin
+  Slovak: sk
+  Slovenian: sl
+  Spanish: es
+  Swahili: sw
+  Swedish: sv
+  Tabassaran: tab
+  Tagalog: tl
+  Tajik: tjk
+  Tamil: ta
+  Telugu: te
+  Thai: th
+  Chinese Simplified: ch_sim
+  Chinese Traditional: ch_tra
+  Turkish: tr
+  Ukranian: uk
+  Urdu: ur
+  Uyghur: ug
+  Uzbek: uz
+  Vietnamese: vi
+  Welsh: cy

packages.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+python3-opencv
+libglib2.0-0
+libsm6
+libxext6
+libxrender-dev

requirements.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ pyyaml
2	+ pix2text