Spaces:
Runtime error
Runtime error
# Import necessary libraries | |
import os | |
import glob | |
import re | |
import pandas as pd, numpy as np | |
import comtypes.client | |
import docx | |
from docx.document import Document | |
from docx.oxml.table import CT_Tbl | |
from docx.oxml.text.paragraph import CT_P | |
from docx.table import _Cell, Table | |
from docx.text.paragraph import Paragraph | |
import tempfile | |
import zipfile | |
from io import BytesIO | |
import streamlit as st | |
# Function to convert PDF to DOCX | |
def pdf_to_docx(pdf_file_paths, path_docx): | |
word = comtypes.client.CreateObject('Word.Application') | |
word.visible = 0 | |
docx_files = [] | |
for i, pdf_file_path in enumerate(pdf_file_paths): | |
in_file = os.path.abspath(pdf_file_path) | |
wdoc = word.Documents.Open(in_file) | |
filename = os.path.basename(pdf_file_path) | |
out_file = os.path.abspath(path_docx + filename[:-4] + '.docx') | |
wdoc.SaveAs2(out_file, FileFormat=16) | |
wdoc.Close() | |
docx_files.append(out_file) | |
word.Quit() | |
return docx_files | |
# Streamlit app | |
st.title("PDF to DOCX Converter") | |
uploaded_files = st.file_uploader("Upload PDF files", type="pdf", accept_multiple_files=True) | |
if uploaded_files: | |
with tempfile.TemporaryDirectory() as temp_dir: | |
pdf_file_paths = [] | |
for uploaded_file in uploaded_files: | |
pdf_file_path = os.path.join(temp_dir, uploaded_file.name) | |
with open(pdf_file_path, "wb") as f: | |
f.write(uploaded_file.getbuffer()) | |
pdf_file_paths.append(pdf_file_path) | |
converted_files = pdf_to_docx(pdf_file_paths, temp_dir) | |
if st.button("Convert PDF to DOCX"): | |
with BytesIO() as output: | |
with zipfile.ZipFile(output, mode="w", compression=zipfile.ZIP_DEFLATED) as archive: | |
for docx_file in converted_files: | |
archive.write(docx_file, os.path.basename(docx_file)) | |
output.seek(0) | |
st.download_button( | |
label="Download ZIP", | |
data=output, | |
file_name="converted_docx.zip", | |
mime="application/zip" | |
) | |