import gradio as gr from transformers import pipeline from tempfile import NamedTemporaryFile from PyPDF2 import PdfReader from IPython.display import Audio import numpy as np from bark import SAMPLE_RATE, generate_audio, preload_models from scipy.io.wavfile import write as write_wav import torch def summarize_abstract_from_pdf(pdf_file_path): abstract_string = 'abstract' found_abstract = False intro_string ='introduction' extracted_text_string ="" # Read the PDF and extract text from the first page with open(pdf_file_path, 'rb') as pdf_file: reader = PdfReader(pdf_file) text = "" text += reader.pages[0].extract_text() file = text.splitlines() for lines in file: lower_lines = lines.lower() if lower_lines.strip()== abstract_string: found_abstract = True elif "1" in lower_lines.strip() and intro_string in lower_lines.strip(): found_abstract = False if found_abstract == True: extracted_text_string += lines extracted_text_string = extracted_text_string.replace("Abstract", "") summarizer = pipeline("summarization", "pszemraj/led-base-book-summary",device=0 if torch.cuda.is_available() else -1,) # Generate a summarized abstract using the specified model summarized_abstract = summarizer(extracted_text_string, min_length=16, max_length=150, no_repeat_ngram_size=3, encoder_no_repeat_ngram_size=3, repetition_penalty=3.5, num_beams=4, early_stopping=True, ) #I run this twice to get summazired text summarized_abstract2 = summarizer(summarized_abstract[0]['summary_text'], min_length=16, max_length=25, no_repeat_ngram_size=3, encoder_no_repeat_ngram_size=3, repetition_penalty=3.5, num_beams=4, early_stopping=True, ) # Return the summarized abstract as a string return summarized_abstract2[0]['summary_text'] def generate_audio_func(pdf_file): model_name = "suno/bark-small" # Download and load the specified model preload_models(model_name) # Access the input file path pdf_file_path = pdf_file.name # Generate audio from text #call the summarize abstract function text_prompt = summarize_abstract_from_pdf(pdf_file_path) audio_array = generate_audio(text_prompt) # Create a temporary WAV file to save the audio with NamedTemporaryFile(suffix=".wav", delete=False) as temp_wav_file: wav_file_path = temp_wav_file.name write_wav(wav_file_path, 22050, (audio_array * 32767).astype(np.int16)) return wav_file_path def generate_audio_func2(text): # Generate audio from text #call the summarize abstract function audio_array = generate_audio(text) # Create a temporary WAV file to save the audio with NamedTemporaryFile(suffix=".wav", delete=False) as temp_wav_file: wav_file_path = temp_wav_file.name write_wav(wav_file_path, 22050, (audio_array * 32767).astype(np.int16)) return wav_file_path # Define app name, app description, and examples app_name = "PDF to Audio Converter" app_description = "Convert text from a PDF file to audio. Upload a PDF file. We accept only PDF files" # Create the Gradio app input_component = gr.File(file_types=["pdf"]) output_component = gr.Audio() demo = gr.Interface( fn=generate_audio_func2, inputs="text", outputs=output_component, title=app_name, description=app_description ) demo.launch()