Spaces:

ales
/

ai-audio-books

Running

App Files Files Community

bl4dylion commited on Oct 9, 2024

Commit

c2260d3

1 Parent(s): e9aad3f

add file uploader, errors and refresh

Browse files

Files changed (1) hide show

app.py +71 -8

app.py CHANGED Viewed

@@ -8,13 +8,13 @@ import gradio as gr
 import pandas as pd
 from dotenv import load_dotenv
 from openai import OpenAI
 load_dotenv()
 api_key = os.getenv("AIML_API_KEY")
 CHARACTER_CLASSIFICATION_PROMPT = """
 **Task:**
@@ -200,17 +200,50 @@ class AudiobookBuilder:
         return response
-def respond(text):
     builder = AudiobookBuilder()
     annotated_text = builder.annotate_text(text)
     unique_characters = builder.get_unique_characters(annotated_text)
     character_to_gender = builder.classify_characters(text, unique_characters)
     character_to_voice = builder.map_characters_to_voices(character_to_gender)
     builder.generate_audio(annotated_text, character_to_voice)
     audio, sr = librosa.load("audiobook.mp3", sr=None)
-    return (sr, audio)
 with gr.Blocks(title="Audiobooks Generation") as ui:
@@ -218,16 +251,46 @@ with gr.Blocks(title="Audiobooks Generation") as ui:
     with gr.Row(variant="panel"):
         text_input = gr.Textbox(label="Enter the book text", lines=20)
     with gr.Row(variant="panel"):
-        audio_output = gr.Audio(label="Generated audio")
     submit_button = gr.Button("Submit")
     submit_button.click(
         fn=respond,
         inputs=[text_input],
-        outputs=[audio_output],
     )
 ui.launch()

 import pandas as pd
 from dotenv import load_dotenv
 from openai import OpenAI
+from langchain_community.document_loaders import PyPDFLoader
 load_dotenv()
 api_key = os.getenv("AIML_API_KEY")
+FILE_SIZE_MAX = 0.5 #in mb
 CHARACTER_CLASSIFICATION_PROMPT = """
 **Task:**
         return response
+def parse_pdf(file_path):
+    """Parse the PDF file and return the text content."""
+    loader = PyPDFLoader(file_path)
+    documents = loader.load()
+    return "\n".join([doc.page_content for doc in documents])
+def respond(text, uploaded_file):
+    # Check if a file is uploaded
+    if uploaded_file is not None:
+        # Save the uploaded file temporarily to check its size
+        temp_file_path = uploaded_file.name
+        # Check the file size
+        if os.path.getsize(temp_file_path) > FILE_SIZE_MAX * 1024 * 1024:  # Check if file size is greater than 0.5 MB
+            error_message = f"Error: The uploaded file exceeds the size limit of {FILE_SIZE_MAX} MB."
+            return None, error_message  # Return None for audio output and the error message
+        # Determine file type
+        if uploaded_file.name.endswith('.txt'):
+            # Read the text from the uploaded .txt file
+            with open(temp_file_path, 'r', encoding='utf-8') as file:
+                text = file.read()
+        elif uploaded_file.name.endswith('.pdf'):
+            # Parse the PDF file and extract text
+            text = parse_pdf(temp_file_path)
+        else:
+            error_message = "Error: Unsupported file type. Please upload a .txt or .pdf file."
+            return None, error_message
+    # Proceed with the audiobook generation
     builder = AudiobookBuilder()
     annotated_text = builder.annotate_text(text)
     unique_characters = builder.get_unique_characters(annotated_text)
     character_to_gender = builder.classify_characters(text, unique_characters)
     character_to_voice = builder.map_characters_to_voices(character_to_gender)
     builder.generate_audio(annotated_text, character_to_voice)
     audio, sr = librosa.load("audiobook.mp3", sr=None)
+    return (sr, audio), None  # Return audio and None for error message
+def refresh():
+    return None, None, None  # Reset audio output, error message, and uploaded file
 with gr.Blocks(title="Audiobooks Generation") as ui:
     with gr.Row(variant="panel"):
         text_input = gr.Textbox(label="Enter the book text", lines=20)
+        # Add a file upload field for .txt and .pdf files
+        file_input = gr.File(label="Upload a text file or PDF", file_types=['.txt', '.pdf'])
     with gr.Row(variant="panel"):
+        audio_output = gr.Audio(label="Generated audio", type="numpy")
+        error_output = gr.Textbox(label="Error Messages", interactive=False, visible=False)  # Initially hidden
     submit_button = gr.Button("Submit")
     submit_button.click(
         fn=respond,
+        inputs=[text_input, file_input],  # Include the uploaded file as an input
+        outputs=[audio_output, error_output],  # Include the audio output and error message output
+    )
+    refresh_button = gr.Button("Refresh")
+    refresh_button.click(
+        fn=refresh,
+        inputs=[],
+        outputs=[audio_output, error_output, file_input]  # Reset audio output, error message, and uploaded file
+    )
+    # Hide error message dynamically when input is received
+    text_input.change(
+        fn=lambda: gr.update(visible=False),  # Hide the error field
         inputs=[text_input],
+        outputs=error_output
     )
+    file_input.change(
+        fn=lambda: gr.update(visible=False),  # Hide the error field
+        inputs=[file_input],
+        outputs=error_output
+    )
+    # To clear error field when refreshing
+    refresh_button.click(
+        fn=lambda: gr.update(visible=False),  # Hide the error field
+        inputs=[],
+        outputs=error_output,
+    )
 ui.launch()