QDrantRAG9

Sleeping

dinhquangson commited on Jun 23

Commit

ff1b047

•

1 Parent(s): 5f082a2

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -158,7 +158,27 @@ async def download_database():
     # Return the zip file as a response for download
     return FileResponse(zip_path, media_type='application/zip', filename='database.zip')
 @app.get("/")
 def api_home():
     return {'detail': 'Welcome to FastAPI Qdrant importer!'}

     # Return the zip file as a response for download
     return FileResponse(zip_path, media_type='application/zip', filename='database.zip')
+@app.post("/pdf2text/")
+async def create_upload_file(file: UploadFile = File(...)):
+    import pytesseract
+    from pdf2image import convert_from_path
+    file_savePath =  join(temp_path,file.filename)
+    with open(file_savePath,'wb') as f:
+        shutil.copyfileobj(file.file, f)
+    # convert PDF to image
+    images = convert_from_path(file_savePath)
+    text=""
+    # Extract text from images
+    for x in fruits:
+        ocr_text = pytesseract.image_to_string(image,lang='vie')
+        text=text+ocr_text+'\n'
+    return ocr_text
 @app.get("/")
 def api_home():
     return {'detail': 'Welcome to FastAPI Qdrant importer!'}