sschet's picture
main
b94ac18
raw
history blame
1.95 kB
import os
import glob
import re
import pandas as pd, numpy as np
import comtypes.client
import docx
from docx.document import Document
from docx.oxml.table import CT_Tbl
from docx.oxml.text.paragraph import CT_P
from docx.table import _Cell, Table
from docx.text.paragraph import Paragraph
from flask import Flask, request, send_file
from werkzeug.utils import secure_filename
import zipfile
import io
app = Flask(__name__)
UPLOAD_FOLDER = 'uploads/'
if not os.path.exists(UPLOAD_FOLDER):
os.makedirs(UPLOAD_FOLDER)
@app.route('/convert', methods=['POST'])
def convert_pdfs():
if 'pdf_files' not in request.files:
return 'No file part', 400
pdf_files = request.files.getlist('pdf_files')
if not pdf_files:
return 'No selected file', 400
for pdf_file in pdf_files:
filename = secure_filename(pdf_file.filename)
pdf_file.save(os.path.join(UPLOAD_FOLDER, filename))
path_pdf = UPLOAD_FOLDER
path_docx = UPLOAD_FOLDER
word = comtypes.client.CreateObject('Word.Application')
word.visible = 0
for i, doc in enumerate(glob.iglob(path_pdf + '*.pdf')):
filename = doc.split('\\')[-1]
in_file = os.path.abspath(doc)
wdoc = word.Documents.Open(in_file)
out_file = os.path.abspath(path_docx + filename[:-4] + '.docx')
wdoc.SaveAs2(out_file, FileFormat=16)
wdoc.Close()
word.Quit()
memory_file = io.BytesIO()
with zipfile.ZipFile(memory_file, 'w', zipfile.ZIP_DEFLATED) as zf:
for root, _, filenames in os.walk(UPLOAD_FOLDER):
for filename in filenames:
if filename.endswith('.docx'):
filepath = os.path.join(root, filename)
zf.write(filepath, filename)
memory_file.seek(0)
return send_file(memory_file, attachment_filename='converted_docx.zip', as_attachment=True)
if __name__ == '__main__':
app.run(host='0.0.0.0', port=5000)