dinhquangson commited on
Commit
ff1b047
1 Parent(s): 5f082a2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -0
app.py CHANGED
@@ -158,7 +158,27 @@ async def download_database():
158
 
159
  # Return the zip file as a response for download
160
  return FileResponse(zip_path, media_type='application/zip', filename='database.zip')
 
 
 
 
 
161
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
162
  @app.get("/")
163
  def api_home():
164
  return {'detail': 'Welcome to FastAPI Qdrant importer!'}
 
158
 
159
  # Return the zip file as a response for download
160
  return FileResponse(zip_path, media_type='application/zip', filename='database.zip')
161
+
162
+ @app.post("/pdf2text/")
163
+ async def create_upload_file(file: UploadFile = File(...)):
164
+ import pytesseract
165
+ from pdf2image import convert_from_path
166
 
167
+ file_savePath = join(temp_path,file.filename)
168
+
169
+ with open(file_savePath,'wb') as f:
170
+ shutil.copyfileobj(file.file, f)
171
+ # convert PDF to image
172
+ images = convert_from_path(file_savePath)
173
+
174
+ text=""
175
+
176
+ # Extract text from images
177
+ for x in fruits:
178
+ ocr_text = pytesseract.image_to_string(image,lang='vie')
179
+ text=text+ocr_text+'\n'
180
+
181
+ return ocr_text
182
  @app.get("/")
183
  def api_home():
184
  return {'detail': 'Welcome to FastAPI Qdrant importer!'}