import gradio as gr
from transformers import pipeline
from tempfile import NamedTemporaryFile
from PyPDF2 import PdfReader
from IPython.display import Audio
import numpy as np
from bark import SAMPLE_RATE, generate_audio, preload_models
from scipy.io.wavfile import write as write_wav
import torch

def summarize_abstract_from_pdf(pdf_file_path):
    abstract_string = 'abstract'
    found_abstract = False
    intro_string ='introduction'
    extracted_text_string =""

    # Read the PDF and extract text from the first page
    with open(pdf_file_path, 'rb') as pdf_file:
        reader = PdfReader(pdf_file)
        text = ""
        text += reader.pages[0].extract_text()


    file = text.splitlines()
    for lines in file:
      lower_lines = lines.lower()
      if lower_lines.strip()== abstract_string:
        found_abstract = True
      elif "1" in lower_lines.strip() and intro_string in lower_lines.strip():
        found_abstract = False

      if found_abstract == True:
        extracted_text_string += lines


    extracted_text_string = extracted_text_string.replace("Abstract", "")
    summarizer = pipeline("summarization", "pszemraj/led-base-book-summary",device=0 if torch.cuda.is_available() else -1,)
    # Generate a summarized abstract using the specified model
    summarized_abstract = summarizer(extracted_text_string,
    min_length=16,
    max_length=150,
    no_repeat_ngram_size=3,
    encoder_no_repeat_ngram_size=3,
    repetition_penalty=3.5,
    num_beams=4,
    early_stopping=True,
    )
    #I run this twice to get summazired text
    summarized_abstract2 = summarizer(summarized_abstract[0]['summary_text'],
    min_length=16,
    max_length=25,
    no_repeat_ngram_size=3,
    encoder_no_repeat_ngram_size=3,
    repetition_penalty=3.5,
    num_beams=4,
    early_stopping=True,
    )


    # Return the summarized abstract as a string
    return summarized_abstract2[0]['summary_text']

def generate_audio_func(pdf_file):
    model_name = "suno/bark-small"
  # Download and load the specified model
    preload_models(model_name)
    # Access the input file path
    pdf_file_path = pdf_file.name

  # Generate audio from text
  #call the summarize abstract function
    text_prompt =  summarize_abstract_from_pdf(pdf_file_path)
    audio_array = generate_audio(text_prompt)
    
  # Create a temporary WAV file to save the audio
    with NamedTemporaryFile(suffix=".wav", delete=False) as temp_wav_file:
        wav_file_path = temp_wav_file.name
        write_wav(wav_file_path, 22050, (audio_array * 32767).astype(np.int16))
    return wav_file_path


def generate_audio_func2(text):

  # Generate audio from text
  #call the summarize abstract function
    audio_array = generate_audio(text)
    
  # Create a temporary WAV file to save the audio
    with NamedTemporaryFile(suffix=".wav", delete=False) as temp_wav_file:
        wav_file_path = temp_wav_file.name
        write_wav(wav_file_path, 22050, (audio_array * 32767).astype(np.int16))
    return wav_file_path


# Define app name, app description, and examples
app_name = "PDF to Audio Converter"
app_description = "Convert text from a PDF file to audio. Upload a PDF file. We accept only PDF files"

# Create the Gradio app
input_component = gr.File(file_types=["pdf"])
output_component = gr.Audio()

demo = gr.Interface(
    fn=generate_audio_func2,
    inputs="text",
    outputs=output_component,
    title=app_name,
    description=app_description
)

demo.launch()