import gradio as gr from PyPDF2 import PdfReader from concurrent.futures import ThreadPoolExecutor def convert_pdf_to_text(pdf_file): print(pdf_file.name) if not pdf_file.name.endswith(".pdf"): raise ValueError("Invalid file format. Please upload PDF files only.") text = "\n---\n" text += f"file name: {pdf_file.name}\n content: \n" with open(pdf_file.name, "rb") as file: pdf_reader = PdfReader(file) # Extract all text at once text += "".join([page.extract_text() for page in pdf_reader.pages]) text += "\n---\n" return text def pdf_to_text(pdf_files): # Create a ThreadPoolExecutor to run the conversion in parallel with ThreadPoolExecutor() as executor: # Use the executor to map the convert_pdf_to_text function over all the pdf_files results = executor.map(convert_pdf_to_text, pdf_files) # Concatenate the text from all the PDFs text = "\n".join(results) return text iface = gr.Interface( fn=pdf_to_text, inputs=gr.inputs.File( type="file", label="Upload a PDF file", file_count="multiple"), outputs="text", title="PDF to Text Converter", description="Upload PDF files and get their content in text format.", ) if __name__ == "__main__": iface.launch()