Spaces:
Running
Running
# app.py | |
import gradio as gr | |
from extract_text_from_pdf import PDFTextExtractor | |
from generate_transcript import TranscriptProcessor | |
from generate_audio import TTSGenerator | |
import pickle | |
import os | |
import tempfile | |
import shutil | |
def create_temp_session_directory(): | |
return tempfile.mkdtemp() | |
# Combined function to perform all steps sequentially | |
def process_pdf_to_podcast(pdf_file): | |
session_dir = create_temp_session_directory() | |
# Define paths within the session directory | |
pdf_path = os.path.join(session_dir, "uploaded_pdf.pdf") | |
clean_text_path = os.path.join(session_dir, "clean_text.txt") | |
transcript_path = os.path.join(session_dir, "data.pkl") | |
tts_ready_path = os.path.join(session_dir, "podcast_ready_data.pkl") | |
audio_output_path = os.path.join(session_dir, "final_podcast_audio.mp3") | |
text_model= "llama3-70b-8192" | |
# Step 1: Extract Text from PDF | |
shutil.copy(pdf_file, pdf_path) | |
extractor = PDFTextExtractor(pdf_path,clean_text_path) | |
clean_text_path = extractor.clean_and_save_text() | |
# Display a preview of extracted text | |
with open(clean_text_path, 'r', encoding='utf-8') as file: | |
text_preview = file.read(500) | |
# Step 2: Generate Transcript | |
processor = TranscriptProcessor(clean_text_path,transcript_path,tts_ready_path,text_model) | |
transcript_path = processor.generate_transcript() | |
# Load the generated transcript for preview | |
with open(transcript_path, 'rb') as f: | |
transcript_preview = pickle.load(f) | |
# Step 3: Rewrite Transcript for TTS | |
tts_ready_path = processor.rewrite_transcript() | |
# Load the rewritten transcript for preview and editing | |
with open(tts_ready_path, 'rb') as f: | |
tts_ready_preview = pickle.load(f) | |
return ( | |
f"Steps 1-3 completed. Preview and adjust the rewritten transcript if needed.", | |
text_preview, | |
transcript_preview, | |
tts_ready_preview | |
) | |
# Final Step: Generate Audio after optional adjustments | |
def generate_audio_from_modified_text(tts_ready_text): | |
# Save any modified TTS-ready transcript | |
with open(tts_ready_path, 'wb') as f: | |
pickle.dump(tts_ready_text, f) | |
# Generate audio from the TTS-ready transcript | |
tts_gen = TTSGenerator(tts_ready_path) | |
audio_path = tts_gen.generate_audio() | |
return f"Step 4 complete. Audio saved to {audio_path}.", audio_path | |
# Gradio Interface | |
with gr.Blocks() as app: | |
gr.Markdown("# PDF to Podcast Conversion Application") | |
# Single-click initiation of Steps 1-3 | |
with gr.Row(): | |
pdf_input = gr.File(label="Upload PDF") | |
run_all_button = gr.Button("Run All Steps (1-3)") | |
output_status = gr.Textbox(label="Status") | |
# Step 1 Preview of Extracted Text | |
extracted_text_preview = gr.Textbox(label="Extracted Text Preview (First 500 Characters)", interactive=False) | |
# Step 2 Preview of Generated Transcript | |
transcript_preview = gr.Textbox(label="Generated Transcript Preview", interactive=False) | |
# Step 3 Editable Rewritten Transcript for TTS | |
tts_ready_preview = gr.Textbox(label="Editable Rewritten Transcript for TTS", interactive=True) | |
# Button for generating audio with editable transcript | |
generate_audio_button = gr.Button("Generate Audio from Edited Transcript") | |
final_audio_output = gr.Audio(label="Generated Podcast Audio") | |
# Step 1-3 execution | |
run_all_button.click( | |
process_pdf_to_podcast, | |
inputs=pdf_input, | |
outputs=[output_status, extracted_text_preview, transcript_preview, tts_ready_preview] | |
) | |
# Final step: Generate Audio from modified TTS-ready transcript | |
generate_audio_button.click( | |
generate_audio_from_modified_text, | |
inputs=tts_ready_preview, | |
outputs=[output_status, final_audio_output] | |
) | |
app.launch() | |