Spaces:
Sleeping
Sleeping
| from fastapi import FastAPI, File, UploadFile | |
| from fastapi.middleware.cors import CORSMiddleware | |
| from fastapi.middleware.gzip import GZipMiddleware | |
| import numpy as np | |
| from PIL import Image | |
| from paddleocr import PaddleOCR | |
| from doctr.io import DocumentFile | |
| from doctr.models import ocr_predictor | |
| import io | |
| app = FastAPI() | |
| app.add_middleware( | |
| CORSMiddleware, | |
| allow_origins=["*"], | |
| allow_credentials=True, | |
| allow_methods=["*"], | |
| allow_headers=["*"] | |
| ) | |
| # Initialize models once at startup | |
| ocr_model = ocr_predictor(pretrained=True) | |
| paddle_ocr = PaddleOCR(lang='en', use_angle_cls=True) | |
| def ocr_with_doctr(file): | |
| text_output = '' | |
| doc = DocumentFile.from_pdf(file) | |
| result = ocr_model(doc) | |
| for page in result.pages: | |
| for block in page.blocks: | |
| for line in block.lines: | |
| text_output += " ".join([word.value for word in line.words]) + "\n" | |
| return text_output | |
| def ocr_with_paddle(img): | |
| finaltext = '' | |
| result = paddle_ocr.ocr(img) | |
| for i in range(len(result[0])): | |
| text = result[0][i][1][0] | |
| finaltext += ' ' + text | |
| return finaltext | |
| def generate_text_from_image(img): | |
| return ocr_with_paddle(img) | |
| async def perform_ocr(file: UploadFile = File(...)): | |
| file_bytes = await file.read() | |
| if file.filename.endswith('.pdf'): | |
| text_output = ocr_with_doctr(io.BytesIO(file_bytes)) | |
| else: | |
| img = np.array(Image.open(io.BytesIO(file_bytes))) | |
| text_output = generate_text_from_image(img) | |
| return {"ocr_text": text_output} | |
| async def test_call(): | |
| return {"message": "Hi. I'm running"} | |