Spaces:
Running
Running
# coding: utf-8 | |
# [Pix2Text](https://github.com/breezedeus/pix2text): an Open-Source Alternative to Mathpix. | |
# Copyright (C) 2022-2024, [Breezedeus](https://www.breezedeus.com). | |
import os | |
import json | |
import functools | |
import random | |
import shutil | |
import string | |
import tempfile | |
import time | |
import zipfile | |
from pathlib import Path | |
import yaml | |
import gradio as gr | |
import numpy as np | |
from huggingface_hub import hf_hub_download | |
# from cnstd.utils import pil_to_numpy, imsave | |
from pix2text import Pix2Text | |
from pix2text.utils import set_logger, merge_line_texts | |
logger = set_logger() | |
LANGUAGES = yaml.safe_load(open('languages.yaml', 'r', encoding='utf-8'))['languages'] | |
OUTPUT_RESULT_DIR = Path('./output-results') | |
OUTPUT_RESULT_DIR.mkdir(exist_ok=True) | |
def prepare_mfd_model(): | |
target_fp = './yolov7-model/mfd-yolov7-epoch224-20230613.pt' | |
if os.path.exists(target_fp): | |
return target_fp | |
HF_TOKEN = os.environ.get('HF_TOKEN') | |
local_path = hf_hub_download( | |
repo_id='breezedeus/paid-models', | |
subfolder='cnstd/1.2', | |
filename='yolov7-model-20230613.zip', | |
repo_type="model", | |
cache_dir='./', | |
token=HF_TOKEN, | |
) | |
with zipfile.ZipFile(local_path) as zf: | |
zf.extractall('./') | |
return target_fp | |
def get_p2t_model(lan_list: list, mfd_model_name: str, mfr_model_name: str): | |
mfd_config = dict(model_name=mfd_model_name, model_backend='onnx') # 声明 MFD 的初始化参数 | |
formula_config = dict(model_name=mfr_model_name, model_backend='onnx') # 声明 MFR 的初始化参数 | |
text_formula_config = dict( | |
languages=lan_list, mfd=mfd_config, formula=formula_config, | |
) | |
total_config = { | |
'layout': {'scores_thresh': 0.45}, | |
'text_formula': text_formula_config, | |
} | |
p2t = Pix2Text.from_config(total_configs=total_config,) | |
return p2t | |
def latex_render(latex_str): | |
return f"$$\n{latex_str}\n$$" | |
# return latex_str | |
def recognize( | |
lang_list, mfd_model_name, mfr_model_name, rec_type, resized_shape, image_file | |
): | |
lang_list = [LANGUAGES[l] for l in lang_list] | |
p2t = get_p2t_model(lang_list, mfd_model_name, mfr_model_name) | |
# 如果 OUTPUT_RESULT_DIR 文件数量超过 100,按时间删除最早的 100 个文件 | |
if len(os.listdir(OUTPUT_RESULT_DIR)) > 100: | |
shutil.rmtree(OUTPUT_RESULT_DIR) | |
OUTPUT_RESULT_DIR.mkdir(exist_ok=True) | |
out_det_fp = './docs/no-det-res.jpg' | |
kwargs = dict(resized_shape=resized_shape, return_text=True, auto_line_break=True,) | |
if rec_type == 'page': | |
suffix = list(string.ascii_letters) | |
random.shuffle(suffix) | |
suffix = ''.join(suffix[:6]) | |
fp_suffix = f'{time.time()}-{suffix}' | |
out_debug_dir = f'out-debug-{fp_suffix}' | |
output_dir = OUTPUT_RESULT_DIR / f'output-{fp_suffix}' | |
kwargs['save_debug_res'] = OUTPUT_RESULT_DIR / out_debug_dir | |
elif rec_type == 'text_formula': | |
suffix = list(string.ascii_letters) | |
random.shuffle(suffix) | |
suffix = ''.join(suffix[:6]) | |
out_det_fp = f'out-det-{time.time()}-{suffix}.jpg' | |
kwargs['save_analysis_res'] = str(OUTPUT_RESULT_DIR / out_det_fp) | |
out = p2t.recognize(image_file, file_type=rec_type, **kwargs) | |
out_text = out | |
if rec_type == 'page': | |
out_text = out.to_markdown(output_dir) | |
out_det_fp = kwargs['save_debug_res'] / 'layout_res.jpg' | |
elif rec_type == 'text_formula': | |
out_det_fp = kwargs['save_analysis_res'] | |
return out_text, out_det_fp | |
def example_func(lang_list, rec_type, resized_shape, image_file): | |
return recognize( | |
lang_list, | |
mfd_model_name='mfd-pro', | |
mfr_model_name='mfr-pro', | |
rec_type=rec_type, | |
resized_shape=resized_shape, | |
image_file=image_file, | |
) | |
def main(): | |
langs = list(LANGUAGES.keys()) | |
langs.sort(key=lambda x: x.lower()) | |
title = ': a Free Alternative to Mathpix' | |
examples = [ | |
[['English'], 'page', 768, 'docs/examples/page.png',], | |
[['English'], 'text_formula', 768, 'docs/examples/mixed-en.jpg',], | |
[ | |
['English', 'Chinese Simplified'], | |
'text_formula', | |
768, | |
'docs/examples/mixed-ch_sim.jpg', | |
], | |
[ | |
['English', 'Chinese Traditional'], | |
'text_formula', | |
768, | |
'docs/examples/mixed-ch_tra.jpg', | |
], | |
[ | |
['English', 'Vietnamese'], | |
'text_formula', | |
608, | |
'docs/examples/mixed-vietnamese.jpg', | |
], | |
[['English'], 'formula', '-', 'docs/examples/formula1.png'], | |
[['English'], 'formula', '-', 'docs/examples/formula2.jpg'], | |
[['English'], 'formula', '-', 'docs/examples/hw-formula.png'], | |
[['English', 'Chinese Simplified'], 'text', '-', 'docs/examples/pure-text.jpg',], | |
] | |
table_desc = """ | |
<div align="center"> | |
<img src="https://pix2text.readthedocs.io/zh-cn/stable/figs/p2t-logo.png" width="120px"/> | |
[![Visitors](https://api.visitorbadge.io/api/visitors?path=https%3A%2F%2Fhuggingface.co%2Fspaces%2Fbreezedeus%2Fpix2text-demo&labelColor=%23697689&countColor=%23f5c791&style=flat&labelStyle=upper)](https://visitorbadge.io/status?path=https%3A%2F%2Fhuggingface.co%2Fspaces%2Fbreezedeus%2Fpix2text-demo) | |
[![Discord](https://img.shields.io/discord/1200765964434821260?logo=discord&label=Discord)](https://discord.gg/GgD87WM8Tf) | |
| | | | |
| ------------------------------- | --------------------------------------- | | |
| 🏄 **Online Service** | [p2t.breezedeus.com](https://p2t.breezedeus.com) | | |
| 📖 **Doc** | [Online Doc](https://pix2text.readthedocs.io) | | |
| 📀 **Code** | [Github](https://github.com/breezedeus/pix2text) | | |
| 🤗 **Models** | [breezedeus/Models](https://huggingface.co/breezedeus) | | |
| 📄 **More Infos** | [Pix2Text Infos](https://www.breezedeus.com/article/pix2text) | | |
If useful, please help to **star 🌟 [Pix2Text](https://github.com/breezedeus/pix2text)** 🙏 | |
</div> | |
""" | |
with gr.Blocks() as demo: | |
gr.HTML( | |
f'<h1 style="text-align: center; margin-bottom: 1rem;"><a href="https://github.com/breezedeus/pix2text" target="_blank">Pix2Text V1.1.1</a>{title}</h1>' | |
) | |
with gr.Row(equal_height=False): | |
with gr.Column(min_width=200, variant='panel', scale=3): | |
gr.Markdown('### Settings') | |
lang_list = gr.Dropdown( | |
label='Text Languages', | |
choices=langs, | |
value=['English', 'Chinese Simplified'], | |
multiselect=True, | |
# info='Which languages to be recognized as Texts.', | |
) | |
mfd_model_name = gr.Dropdown( | |
label='MFD Models', | |
choices=['mfd', 'mfd-advanced', 'mfd-pro'], | |
value='mfd-pro', | |
) | |
mfr_model_name = gr.Dropdown( | |
label='MFR Models', | |
choices=['mfr', 'mfr-pro', 'mfr-plus'], | |
value='mfr-pro', | |
) | |
rec_type = gr.Dropdown( | |
label='file_type', | |
choices=['page', 'text_formula', 'formula', 'text'], | |
value='text_formula', | |
# info='Which type of image to be recognized.', | |
) | |
with gr.Accordion('More Options', open=False): | |
resized_shape = gr.Slider( | |
label='resized_shape', | |
minimum=512, | |
maximum=2048, | |
value=768, | |
step=32, | |
) | |
with gr.Column(scale=6, variant='compact'): | |
gr.Markdown('### Upload Image to be Recognized') | |
image_file = gr.Image( | |
label='Image', type="pil", image_mode='RGB', show_label=False | |
) | |
sub_btn = gr.Button("Submit", variant="primary") | |
with gr.Column(scale=2, variant='compact'): | |
gr.Markdown(table_desc) | |
with gr.Row(equal_height=False): | |
with gr.Column(scale=1, variant='compact'): | |
gr.Markdown('**Detection Result**') | |
det_result = gr.Image( | |
label='Detection Result', scale=1, show_label=False | |
) | |
with gr.Column(scale=1, variant='compact'): | |
gr.Markdown( | |
'**Recognition Results (Paste them into the [P2T Online Service](https://p2t.breezedeus.com) to view rendered outcomes)**' | |
) | |
rec_result = gr.Textbox( | |
label=f'Recognition Result ', | |
lines=5, | |
value='', | |
scale=1, | |
show_label=False, | |
show_copy_button=True, | |
) | |
# render_result = gr.Markdown(label=f'After Rendering', value='') | |
# rec_result.change(latex_render, rec_result, render_result) | |
sub_btn.click( | |
recognize, | |
inputs=[ | |
lang_list, | |
mfd_model_name, | |
mfr_model_name, | |
rec_type, | |
resized_shape, | |
image_file, | |
], | |
outputs=[rec_result, det_result], | |
) | |
gr.Examples( | |
label='Examples', | |
examples=examples, | |
inputs=[lang_list, rec_type, resized_shape, image_file,], | |
outputs=[rec_result, det_result], | |
fn=example_func, | |
cache_examples=os.getenv('CACHE_EXAMPLES') == '1', | |
) | |
demo.queue(max_size=10) | |
demo.launch() | |
if __name__ == '__main__': | |
main() | |