|
import os |
|
import glob |
|
import re |
|
import pandas as pd, numpy as np |
|
import comtypes.client |
|
import docx |
|
from docx.document import Document |
|
from docx.oxml.table import CT_Tbl |
|
from docx.oxml.text.paragraph import CT_P |
|
from docx.table import _Cell, Table |
|
from docx.text.paragraph import Paragraph |
|
from flask import Flask, request, send_file |
|
from werkzeug.utils import secure_filename |
|
import zipfile |
|
import io |
|
|
|
app = Flask(__name__) |
|
|
|
UPLOAD_FOLDER = 'uploads/' |
|
if not os.path.exists(UPLOAD_FOLDER): |
|
os.makedirs(UPLOAD_FOLDER) |
|
|
|
@app.route('/convert', methods=['POST']) |
|
def convert_pdfs(): |
|
if 'pdf_files' not in request.files: |
|
return 'No file part', 400 |
|
|
|
pdf_files = request.files.getlist('pdf_files') |
|
if not pdf_files: |
|
return 'No selected file', 400 |
|
|
|
for pdf_file in pdf_files: |
|
filename = secure_filename(pdf_file.filename) |
|
pdf_file.save(os.path.join(UPLOAD_FOLDER, filename)) |
|
|
|
path_pdf = UPLOAD_FOLDER |
|
path_docx = UPLOAD_FOLDER |
|
|
|
word = comtypes.client.CreateObject('Word.Application') |
|
word.visible = 0 |
|
|
|
for i, doc in enumerate(glob.iglob(path_pdf + '*.pdf')): |
|
filename = doc.split('\\')[-1] |
|
in_file = os.path.abspath(doc) |
|
wdoc = word.Documents.Open(in_file) |
|
out_file = os.path.abspath(path_docx + filename[:-4] + '.docx') |
|
wdoc.SaveAs2(out_file, FileFormat=16) |
|
wdoc.Close() |
|
|
|
word.Quit() |
|
|
|
memory_file = io.BytesIO() |
|
with zipfile.ZipFile(memory_file, 'w', zipfile.ZIP_DEFLATED) as zf: |
|
for root, _, filenames in os.walk(UPLOAD_FOLDER): |
|
for filename in filenames: |
|
if filename.endswith('.docx'): |
|
filepath = os.path.join(root, filename) |
|
zf.write(filepath, filename) |
|
|
|
memory_file.seek(0) |
|
return send_file(memory_file, attachment_filename='converted_docx.zip', as_attachment=True) |
|
|
|
if __name__ == '__main__': |
|
app.run(host='0.0.0.0', port=5000) |
|
|