sschet's picture
Update app.py
3fc6766
# Import necessary libraries
import os
import glob
import re
import pandas as pd, numpy as np
import comtypes.client
import docx
from docx.document import Document
from docx.oxml.table import CT_Tbl
from docx.oxml.text.paragraph import CT_P
from docx.table import _Cell, Table
from docx.text.paragraph import Paragraph
import tempfile
import zipfile
from io import BytesIO
import streamlit as st
# Function to convert PDF to DOCX
def pdf_to_docx(pdf_file_paths, path_docx):
word = comtypes.client.CreateObject('Word.Application')
word.visible = 0
docx_files = []
for i, pdf_file_path in enumerate(pdf_file_paths):
in_file = os.path.abspath(pdf_file_path)
wdoc = word.Documents.Open(in_file)
filename = os.path.basename(pdf_file_path)
out_file = os.path.abspath(path_docx + filename[:-4] + '.docx')
wdoc.SaveAs2(out_file, FileFormat=16)
wdoc.Close()
docx_files.append(out_file)
word.Quit()
return docx_files
# Streamlit app
st.title("PDF to DOCX Converter")
uploaded_files = st.file_uploader("Upload PDF files", type="pdf", accept_multiple_files=True)
if uploaded_files:
with tempfile.TemporaryDirectory() as temp_dir:
pdf_file_paths = []
for uploaded_file in uploaded_files:
pdf_file_path = os.path.join(temp_dir, uploaded_file.name)
with open(pdf_file_path, "wb") as f:
f.write(uploaded_file.getbuffer())
pdf_file_paths.append(pdf_file_path)
converted_files = pdf_to_docx(pdf_file_paths, temp_dir)
if st.button("Convert PDF to DOCX"):
with BytesIO() as output:
with zipfile.ZipFile(output, mode="w", compression=zipfile.ZIP_DEFLATED) as archive:
for docx_file in converted_files:
archive.write(docx_file, os.path.basename(docx_file))
output.seek(0)
st.download_button(
label="Download ZIP",
data=output,
file_name="converted_docx.zip",
mime="application/zip"
)