Spaces:
Build error
Build error
import pandas as pd | |
import PIL | |
from PIL import Image | |
from PIL import ImageDraw | |
import gradio as gr | |
import torch | |
import easyocr | |
import fitz # PyMuPDF | |
# Function to extract images from PDF | |
def pdf_to_images(pdf_path): | |
doc = fitz.open(pdf_path) | |
images = [] | |
for page_num in range(len(doc)): | |
page = doc.load_page(page_num) | |
pix = page.get_pixmap() | |
img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples) | |
images.append(img) | |
return images | |
def draw_boxes(image, bounds, color='yellow', width=2): | |
draw = ImageDraw.Draw(image) | |
for bound in bounds: | |
p0, p1, p2, p3 = bound[0] | |
draw.line([*p0, *p1, *p2, *p3, *p0], fill=color, width=width) | |
return image | |
def inference(pdf, lang): | |
reader = easyocr.Reader(lang) | |
images = pdf_to_images(pdf.name) | |
results = [] | |
for i, img in enumerate(images): | |
img_path = f'page_{i + 1}.jpg' | |
img.save(img_path) | |
bounds = reader.readtext(img_path) | |
draw_boxes(img, bounds) | |
result_img_path = f'result_{i + 1}.jpg' | |
img.save(result_img_path) | |
results.append((result_img_path, pd.DataFrame(bounds).iloc[:, 1:])) | |
return results | |
title = 'EasyOCR' | |
description = 'Realtime EasyOCR.' | |
article = "<p style='text-align: center'><a href='https://www.jaided.ai/easyocr/'>OCR for written scripts.</a> | <a href='https://github.com/JaidedAI/EasyOCR'>Github Repo</a></p>" | |
examples = [['example.pdf',['en']]] | |
css = ".output_image, .input_image {height: 40rem !important; width: 100% !important;}" | |
choices = [ | |
"en", | |
"hi", | |
] | |
gr.Interface( | |
inference, | |
[gr.inputs.File(type='file', label='Input PDF'), gr.inputs.CheckboxGroup(choices, type="value", default=['en'], label='language')], | |
[gr.outputs.File(type='file', label='Output Images'), gr.outputs.Dataframe(headers=['text', 'confidence'])], | |
title=title, | |
description=description, | |
article=article, | |
examples=examples, | |
css=css, | |
enable_queue=True | |
).launch(debug=True) | |