import os import glob import re import pandas as pd, numpy as np import comtypes.client import docx from docx.document import Document from docx.oxml.table import CT_Tbl from docx.oxml.text.paragraph import CT_P from docx.table import _Cell, Table from docx.text.paragraph import Paragraph from flask import Flask, request, send_file from werkzeug.utils import secure_filename import zipfile import io app = Flask(__name__) UPLOAD_FOLDER = 'uploads/' if not os.path.exists(UPLOAD_FOLDER): os.makedirs(UPLOAD_FOLDER) @app.route('/convert', methods=['POST']) def convert_pdfs(): if 'pdf_files' not in request.files: return 'No file part', 400 pdf_files = request.files.getlist('pdf_files') if not pdf_files: return 'No selected file', 400 for pdf_file in pdf_files: filename = secure_filename(pdf_file.filename) pdf_file.save(os.path.join(UPLOAD_FOLDER, filename)) path_pdf = UPLOAD_FOLDER path_docx = UPLOAD_FOLDER word = comtypes.client.CreateObject('Word.Application') word.visible = 0 for i, doc in enumerate(glob.iglob(path_pdf + '*.pdf')): filename = doc.split('\\')[-1] in_file = os.path.abspath(doc) wdoc = word.Documents.Open(in_file) out_file = os.path.abspath(path_docx + filename[:-4] + '.docx') wdoc.SaveAs2(out_file, FileFormat=16) wdoc.Close() word.Quit() memory_file = io.BytesIO() with zipfile.ZipFile(memory_file, 'w', zipfile.ZIP_DEFLATED) as zf: for root, _, filenames in os.walk(UPLOAD_FOLDER): for filename in filenames: if filename.endswith('.docx'): filepath = os.path.join(root, filename) zf.write(filepath, filename) memory_file.seek(0) return send_file(memory_file, attachment_filename='converted_docx.zip', as_attachment=True) if __name__ == '__main__': app.run(host='0.0.0.0', port=5000)