Spaces:
Running
Running
breezedeus
commited on
Commit
•
5bedb5a
1
Parent(s):
33781f7
new gradio app for p2t v1.0
Browse files- README.md +16 -4
- app.py +245 -0
- languages.yaml +84 -0
- packages.txt +5 -0
- requirements.txt +2 -0
README.md
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
---
|
2 |
-
title: Pix2Text
|
3 |
-
emoji:
|
4 |
-
colorFrom:
|
5 |
colorTo: blue
|
6 |
sdk: gradio
|
7 |
sdk_version: 4.16.0
|
@@ -10,4 +10,16 @@ pinned: false
|
|
10 |
license: mit
|
11 |
---
|
12 |
|
13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
---
|
2 |
+
title: Pix2Text
|
3 |
+
emoji: 🅿❷🆃
|
4 |
+
colorFrom: red
|
5 |
colorTo: blue
|
6 |
sdk: gradio
|
7 |
sdk_version: 4.16.0
|
|
|
10 |
license: mit
|
11 |
---
|
12 |
|
13 |
+
# Pix2Text (P2T)
|
14 |
+
|
15 |
+
[**CnOCR**](https://github.com/breezedeus/cnocr) is an **Optical Character Recognition (OCR)** toolkit for **Python 3**. It supports recognition of common characters in **English and numbers**, **Simplified Chinese**, **Traditional Chinese** (some models), and **vertical text** recognition. It comes with [**20+ well-trained models**](https://cnocr.readthedocs.io/zh/latest/models/) for different application scenarios and can be used directly after installation. Also, CnOCR provides simple training [commands](https://cnocr.readthedocs.io/zh/latest/train/) for users to train their own models. Welcome to join the WeChat contact group.
|
16 |
+
|
17 |
+
<div align="center">
|
18 |
+
<img src="https://huggingface.co/datasets/breezedeus/cnocr-wx-qr-code/resolve/main/wx-qr-code.JPG" alt="WeChat Group" width="300px"/>
|
19 |
+
</div>
|
20 |
+
|
21 |
+
The author also maintains **Planet of Knowledge** [**CnOCR/CnSTD Private Group**](https://t.zsxq.com/FEYZRJQ), welcome to join. The **Planet of Knowledge Private Group** will release some CnOCR/CnSTD related private materials one after another, including [**more detailed training tutorials**](https://articles.zsxq.com/id_u6b4u0wrf46e.html), **non-public models**, answers to problems encountered during usage, etc. This group also releases the latest research materials related to OCR/STD. In addition, **the author in the private group provides free training services for unique data twice a month**.
|
22 |
+
|
23 |
+
## Documentation
|
24 |
+
|
25 |
+
See [CnOCR online documentation](https://cnocr.readthedocs.io/) , in Chinese.
|
app.py
ADDED
@@ -0,0 +1,245 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# coding: utf-8
|
2 |
+
# Copyright (C) 2023, [Breezedeus](https://github.com/breezedeus).
|
3 |
+
# Licensed to the Apache Software Foundation (ASF) under one
|
4 |
+
# or more contributor license agreements. See the NOTICE file
|
5 |
+
# distributed with this work for additional information
|
6 |
+
# regarding copyright ownership. The ASF licenses this file
|
7 |
+
# to you under the Apache License, Version 2.0 (the
|
8 |
+
# "License"); you may not use this file except in compliance
|
9 |
+
# with the License. You may obtain a copy of the License at
|
10 |
+
#
|
11 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
12 |
+
#
|
13 |
+
# Unless required by applicable law or agreed to in writing,
|
14 |
+
# software distributed under the License is distributed on an
|
15 |
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
16 |
+
# KIND, either express or implied. See the License for the
|
17 |
+
# specific language governing permissions and limitations
|
18 |
+
# under the License.
|
19 |
+
# Ref: https://huggingface.co/spaces/hysts/Manga-OCR/blob/main/app.py
|
20 |
+
|
21 |
+
import os
|
22 |
+
import json
|
23 |
+
import functools
|
24 |
+
import random
|
25 |
+
import string
|
26 |
+
import time
|
27 |
+
|
28 |
+
import yaml
|
29 |
+
|
30 |
+
import gradio as gr
|
31 |
+
import numpy as np
|
32 |
+
|
33 |
+
# from cnstd.utils import pil_to_numpy, imsave
|
34 |
+
|
35 |
+
from pix2text import Pix2Text
|
36 |
+
from pix2text.utils import set_logger, merge_line_texts
|
37 |
+
|
38 |
+
logger = set_logger()
|
39 |
+
|
40 |
+
LANGUAGES = yaml.safe_load(open('languages.yaml', 'r', encoding='utf-8'))['languages']
|
41 |
+
|
42 |
+
|
43 |
+
def get_p2t_model(lan_list: list):
|
44 |
+
p2t = Pix2Text(languages=lan_list)
|
45 |
+
return p2t
|
46 |
+
|
47 |
+
|
48 |
+
def latex_render(latex_str):
|
49 |
+
return f"$$\n{latex_str}\n$$"
|
50 |
+
# return latex_str
|
51 |
+
|
52 |
+
|
53 |
+
def recognize(lang_list, rec_type, resized_shape, image_file):
|
54 |
+
lang_list = [LANGUAGES[l] for l in lang_list]
|
55 |
+
p2t = get_p2t_model(lang_list)
|
56 |
+
|
57 |
+
if rec_type == 'Formula & Text':
|
58 |
+
suffix = list(string.ascii_letters)
|
59 |
+
random.shuffle(suffix)
|
60 |
+
suffix = ''.join(suffix[:6])
|
61 |
+
out_det_fp = f'out-det-{time.time()}-{suffix}.jpg'
|
62 |
+
outs = p2t(
|
63 |
+
image_file, resized_shape=resized_shape, save_analysis_res=out_det_fp
|
64 |
+
)
|
65 |
+
# To get just the text contents, use:
|
66 |
+
only_text = merge_line_texts(outs, auto_line_break=True)
|
67 |
+
|
68 |
+
# return only_text, latex_render(only_text)
|
69 |
+
return only_text, out_det_fp
|
70 |
+
elif rec_type == 'Only Formula':
|
71 |
+
only_text = p2t.recognize_formula(image_file)
|
72 |
+
return latex_render(only_text), None
|
73 |
+
elif rec_type == 'Only Text':
|
74 |
+
only_text = p2t.recognize_text(image_file)
|
75 |
+
return only_text, None
|
76 |
+
|
77 |
+
|
78 |
+
def main():
|
79 |
+
langs = list(LANGUAGES.keys())
|
80 |
+
langs.sort(key=lambda x: x.lower())
|
81 |
+
|
82 |
+
title = 'Demo'
|
83 |
+
# example_func = functools.partial(
|
84 |
+
# recognize,
|
85 |
+
# new_size=768,
|
86 |
+
# box_score_thresh=0.3,
|
87 |
+
# min_box_size=10,
|
88 |
+
# )
|
89 |
+
# examples = [
|
90 |
+
# [
|
91 |
+
# 'ch_PP-OCRv3_det::onnx',
|
92 |
+
# True,
|
93 |
+
# 'number-densenet_lite_136-fc',
|
94 |
+
# False,
|
95 |
+
# 'docs/examples/card1-s.jpg',
|
96 |
+
# ],
|
97 |
+
# [
|
98 |
+
# 'ch_PP-OCRv3_det::onnx',
|
99 |
+
# True,
|
100 |
+
# 'number-densenet_lite_136-fc',
|
101 |
+
# False,
|
102 |
+
# 'docs/examples/card2-s.jpg',
|
103 |
+
# ],
|
104 |
+
# [
|
105 |
+
# 'ch_PP-OCRv3_det::onnx',
|
106 |
+
# True,
|
107 |
+
# 'number-densenet_lite_136-fc',
|
108 |
+
# False,
|
109 |
+
# 'docs/examples/cy1-s.jpg',
|
110 |
+
# ],
|
111 |
+
# [
|
112 |
+
# 'ch_PP-OCRv3_det::onnx',
|
113 |
+
# False,
|
114 |
+
# 'densenet_lite_136-gru',
|
115 |
+
# False,
|
116 |
+
# 'docs/examples/huochepiao.jpeg',
|
117 |
+
# ],
|
118 |
+
# [
|
119 |
+
# 'ch_PP-OCRv3_det::onnx',
|
120 |
+
# False,
|
121 |
+
# 'densenet_lite_136-gru',
|
122 |
+
# False,
|
123 |
+
# 'docs/examples/1_res.jpg',
|
124 |
+
# ],
|
125 |
+
# [
|
126 |
+
# 'db_shufflenet_v2::pytorch',
|
127 |
+
# False,
|
128 |
+
# 'en_number_mobile_v2.0',
|
129 |
+
# False,
|
130 |
+
# 'docs/examples/en_book1.jpeg',
|
131 |
+
# ],
|
132 |
+
# [
|
133 |
+
# 'db_shufflenet_v2::pytorch',
|
134 |
+
# False,
|
135 |
+
# 'densenet_lite_136-gru',
|
136 |
+
# True,
|
137 |
+
# 'docs/examples/beauty0.jpg',
|
138 |
+
# ],
|
139 |
+
# ]
|
140 |
+
|
141 |
+
table_desc = """
|
142 |
+
<div align="center">
|
143 |
+
<img src="https://www.notion.so/image/https%3A%2F%2Fprod-files-secure.s3.us-west-2.amazonaws.com%2F9341931a-53f0-48e1-b026-0f1ad17b457c%2Fc41e0b1d-4869-4e39-93db-631569e6a38d%2FUntitled.png?table=block&id=3d0819ca-2e1a-46a7-b6f3-b4cf89cd045c" width="120px"/>
|
144 |
+
|
145 |
+
[![Visitors](https://api.visitorbadge.io/api/visitors?path=https%3A%2F%2Fhuggingface.co%2Fspaces%2Fbreezedeus%2FCnOCR-Demo&labelColor=%23697689&countColor=%23f5c791&style=flat&labelStyle=upper)](https://visitorbadge.io/status?path=https%3A%2F%2Fhuggingface.co%2Fspaces%2Fbreezedeus%2FCnOCR-Demo)
|
146 |
+
|
147 |
+
[![Discord](https://img.shields.io/discord/1200765964434821260?logo=discord&label=Discord)](https://discord.gg/H9FmDSMA)
|
148 |
+
|
149 |
+
| | |
|
150 |
+
| ------------------------------- | --------------------------------------- |
|
151 |
+
| 🏄 **Free Web Service** | [p2t.breezedeus.com](https://p2t.breezedeus.com) |
|
152 |
+
| 📀 **Code** | [Github](https://github.com/breezedeus/pix2text) |
|
153 |
+
| 💬 **Discord** | [P2T @ Discord](https://discord.gg/H9FmDSMA) |
|
154 |
+
| 👨🏻💻 **Author** | [Breezedeus](https://www.breezedeus.com) |
|
155 |
+
|
156 |
+
If useful, please help to **star 🌟 [Pix2Text](https://github.com/breezedeus/pix2text)** 🙏
|
157 |
+
</div>
|
158 |
+
"""
|
159 |
+
|
160 |
+
with gr.Blocks() as demo:
|
161 |
+
gr.HTML(
|
162 |
+
f'<h1 style="text-align: center; margin-bottom: 1rem;"><a href="https://github.com/breezedeus/pix2text" target="_blank">Pix2Text V1.0</a> {title}</h1>'
|
163 |
+
)
|
164 |
+
with gr.Row(equal_height=False):
|
165 |
+
with gr.Column(min_width=200, variant='panel', scale=3):
|
166 |
+
gr.Markdown('### Settings')
|
167 |
+
lang_list = gr.Dropdown(
|
168 |
+
label='Text Languages',
|
169 |
+
choices=langs,
|
170 |
+
value=['English', 'Chinese Simplified'],
|
171 |
+
multiselect=True,
|
172 |
+
info='Which languages to be recognized as Texts.',
|
173 |
+
)
|
174 |
+
rec_type = gr.Radio(
|
175 |
+
choices=['Formula & Text', 'Only Formula', 'Only Text'],
|
176 |
+
label='Image Type',
|
177 |
+
value='Formula & Text',
|
178 |
+
info='Which type of image to be recognized.',
|
179 |
+
)
|
180 |
+
resized_shape = gr.Slider(
|
181 |
+
label='resized_shape',
|
182 |
+
minimum=512,
|
183 |
+
maximum=2048,
|
184 |
+
value=608,
|
185 |
+
step=32,
|
186 |
+
)
|
187 |
+
# with gr.Accordion('Choose Text Languages', open=False):
|
188 |
+
# lang_list = gr.Checkboxgroup(
|
189 |
+
# label='Text Languages',
|
190 |
+
# choices=langs,
|
191 |
+
# value=['English', 'Chinese Simplified'],
|
192 |
+
# )
|
193 |
+
|
194 |
+
with gr.Column(scale=6, variant='compact'):
|
195 |
+
gr.Markdown('### Upload Image to be Recognized')
|
196 |
+
image_file = gr.Image(label='Image', type="pil", image_mode='RGB', show_label=False)
|
197 |
+
sub_btn = gr.Button("Submit", variant="primary")
|
198 |
+
|
199 |
+
with gr.Column(scale=2, variant='compact'):
|
200 |
+
gr.Markdown(table_desc)
|
201 |
+
with gr.Row(equal_height=False):
|
202 |
+
with gr.Column(scale=1, variant='compact'):
|
203 |
+
gr.Markdown('**Detection Result**')
|
204 |
+
det_result = gr.Image(
|
205 |
+
label='Detection Result', scale=1, show_label=False
|
206 |
+
)
|
207 |
+
with gr.Column(scale=1, variant='compact'):
|
208 |
+
gr.Markdown('**Recognition Result**')
|
209 |
+
rec_result = gr.Textbox(
|
210 |
+
label=f'Recognition Result',
|
211 |
+
lines=5,
|
212 |
+
value='',
|
213 |
+
scale=1,
|
214 |
+
show_label=False,
|
215 |
+
show_copy_button=True,
|
216 |
+
)
|
217 |
+
# render_result = gr.Markdown(label=f'After Rendering', value='')
|
218 |
+
# rec_result.change(latex_render, rec_result, render_result)
|
219 |
+
sub_btn.click(
|
220 |
+
recognize,
|
221 |
+
inputs=[lang_list, rec_type, resized_shape, image_file,],
|
222 |
+
outputs=[rec_result, det_result],
|
223 |
+
)
|
224 |
+
|
225 |
+
# gr.Examples(
|
226 |
+
# label='示例',
|
227 |
+
# examples=examples,
|
228 |
+
# inputs=[
|
229 |
+
# det_model_name,
|
230 |
+
# is_single_line,
|
231 |
+
# rec_model_name,
|
232 |
+
# use_angle_clf,
|
233 |
+
# image_file,
|
234 |
+
# ],
|
235 |
+
# outputs=[out_image, naive_warn, out_texts],
|
236 |
+
# fn=example_func,
|
237 |
+
# cache_examples=os.getenv('CACHE_EXAMPLES') == '1',
|
238 |
+
# )
|
239 |
+
|
240 |
+
demo.queue(max_size=10)
|
241 |
+
demo.launch()
|
242 |
+
|
243 |
+
|
244 |
+
if __name__ == '__main__':
|
245 |
+
main()
|
languages.yaml
ADDED
@@ -0,0 +1,84 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
languages:
|
2 |
+
Abaza: abq
|
3 |
+
Adyghe: ady
|
4 |
+
Afrikaans: af
|
5 |
+
Albanian: sq
|
6 |
+
Angika: ang
|
7 |
+
Arabic: ar
|
8 |
+
Assamese: as
|
9 |
+
Avar: ava
|
10 |
+
Azerbaijani: az
|
11 |
+
Belarusian: be
|
12 |
+
Bengali: bn
|
13 |
+
Bhojpuri: bho
|
14 |
+
Bihari: bh
|
15 |
+
Bosnian: bs
|
16 |
+
Bulgarian: bg
|
17 |
+
Chechen: che
|
18 |
+
Croatian: hr
|
19 |
+
Czech: cs
|
20 |
+
Danish: da
|
21 |
+
Dargwa: dar
|
22 |
+
Dutch: nl
|
23 |
+
English: en
|
24 |
+
Estonian: et
|
25 |
+
French: fr
|
26 |
+
German: de
|
27 |
+
Goan Konkani: gom
|
28 |
+
Hindi: hi
|
29 |
+
Hungarian: hu
|
30 |
+
Icelandic: is
|
31 |
+
Indonesian: id
|
32 |
+
Ingush: inh
|
33 |
+
Irish: ga
|
34 |
+
Italian: it
|
35 |
+
Japanese: ja
|
36 |
+
Kabardian: kbd
|
37 |
+
Kannada: kn
|
38 |
+
Korean: ko
|
39 |
+
Kurdish: ku
|
40 |
+
Lak: lbe
|
41 |
+
Latin: la
|
42 |
+
Latvian: lv
|
43 |
+
Lezghian: lez
|
44 |
+
Lithuanian: lt
|
45 |
+
Magahi: mah
|
46 |
+
Maithili: mai
|
47 |
+
Malay: ms
|
48 |
+
Maltese: mt
|
49 |
+
Maori: mi
|
50 |
+
Marathi: mr
|
51 |
+
Mongolian: mn
|
52 |
+
Nagpuri: sck
|
53 |
+
Nepali: ne
|
54 |
+
Newari: new
|
55 |
+
Norwegian: 'no'
|
56 |
+
Occitan: oc
|
57 |
+
Pali: pi
|
58 |
+
Persian (Farsi): fa
|
59 |
+
Polish: pl
|
60 |
+
Portuguese: pt
|
61 |
+
Romanian: ro
|
62 |
+
Russian: ru
|
63 |
+
Serbian (cyrillic): rs_cyrillic
|
64 |
+
Serbian (latin): rs_latin
|
65 |
+
Slovak: sk
|
66 |
+
Slovenian: sl
|
67 |
+
Spanish: es
|
68 |
+
Swahili: sw
|
69 |
+
Swedish: sv
|
70 |
+
Tabassaran: tab
|
71 |
+
Tagalog: tl
|
72 |
+
Tajik: tjk
|
73 |
+
Tamil: ta
|
74 |
+
Telugu: te
|
75 |
+
Thai: th
|
76 |
+
Chinese Simplified: ch_sim
|
77 |
+
Chinese Traditional: ch_tra
|
78 |
+
Turkish: tr
|
79 |
+
Ukranian: uk
|
80 |
+
Urdu: ur
|
81 |
+
Uyghur: ug
|
82 |
+
Uzbek: uz
|
83 |
+
Vietnamese: vi
|
84 |
+
Welsh: cy
|
packages.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
python3-opencv
|
2 |
+
libglib2.0-0
|
3 |
+
libsm6
|
4 |
+
libxext6
|
5 |
+
libxrender-dev
|
requirements.txt
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
pyyaml
|
2 |
+
pix2text
|