sschet's picture
Update main.py
de1bffc
import os
import glob
import comtypes.client
import docx
from fastapi import FastAPI, File, UploadFile
from pydantic import BaseModel
from starlette.responses import FileResponse
import zipfile
import io
app = FastAPI()
UPLOAD_FOLDER = 'uploads/'
if not os.path.exists(UPLOAD_FOLDER):
os.makedirs(UPLOAD_FOLDER)
@app.post('/convert')
async def convert_pdfs(pdf_files: list[UploadFile] = File(...)):
if not pdf_files:
return {"detail": "No selected file"}
for pdf_file in pdf_files:
filename = pdf_file.filename
with open(os.path.join(UPLOAD_FOLDER, filename), "wb") as f:
f.write(await pdf_file.read())
path_pdf = UPLOAD_FOLDER
path_docx = UPLOAD_FOLDER
word = comtypes.client.CreateObject('Word.Application')
word.visible = 0
for i, doc in enumerate(glob.iglob(path_pdf + '*.pdf')):
filename = doc.split('\\')[-1]
in_file = os.path.abspath(doc)
wdoc = word.Documents.Open(in_file)
out_file = os.path.abspath(path_docx + filename[:-4] + '.docx')
wdoc.SaveAs2(out_file, FileFormat=16)
wdoc.Close()
word.Quit()
memory_file = io.BytesIO()
with zipfile.ZipFile(memory_file, 'w', zipfile.ZIP_DEFLATED) as zf:
for root, _, filenames in os.walk(UPLOAD_FOLDER):
for filename in filenames:
if filename.endswith('.docx'):
filepath = os.path.join(root, filename)
zf.write(filepath, filename)
memory_file.seek(0)
return FileResponse(memory_file, filename='converted_docx.zip', media_type='application/zip')
if __name__ == '__main__':
import uvicorn
uvicorn.run(app, host='0.0.0.0', port=7860)