import json import asyncio import edge_tts from pydub import AudioSegment import os import gradio as gr from gradio_client import Client import shutil import uuid from dotenv import load_dotenv import re load_dotenv() def sanitize_filename(filename): """Convert a string to a safe filename by removing special characters and spaces""" safe_filename = re.sub(r'[^a-zA-Z0-9_-]', '', filename.replace(' ', '_')) return safe_filename.lower()[:50] async def get_voices(): """Get all available English voices from edge-tts""" voices = await edge_tts.list_voices() english_voices = [ voice for voice in voices if voice["Locale"].startswith(("en-US", "en-GB", "en-AU", "en-CA", "en-IN")) ] formatted_voices = [ f"{voice['ShortName']} ({voice['Gender']}, {voice['Locale']})" for voice in english_voices ] return formatted_voices def extract_voice_name(voice_string): """Extract the voice short name from the formatted string""" return voice_string.split(" (")[0] async def generate_audio(text, voice, filename): communicate = edge_tts.Communicate(text, extract_voice_name(voice)) await communicate.save(filename) async def create_podcast_versions(data, speaker1_name, speaker2_name, speaker1_voice, speaker2_voice, title): session_id = str(uuid.uuid4()) temp_dir = f'temp_{session_id}' safe_title = sanitize_filename(title) if not os.path.exists(temp_dir): os.makedirs(temp_dir) try: speaker1_version = AudioSegment.empty() speaker2_version = AudioSegment.empty() combined_version = AudioSegment.empty() for i, entry in enumerate(data['conversation']): if 'speaker1text' in entry: temp_file = f'{temp_dir}/speaker1_{i}.mp3' await generate_audio(entry['speaker1text'], speaker1_voice, temp_file) audio = AudioSegment.from_file(temp_file) speaker1_version += audio speaker2_version += AudioSegment.silent(duration=len(audio)) combined_version += audio os.remove(temp_file) if 'speaker2text' in entry: temp_file = f'{temp_dir}/speaker2_{i}.mp3' await generate_audio(entry['speaker2text'], speaker2_voice, temp_file) audio = AudioSegment.from_file(temp_file) speaker2_version += audio speaker1_version += AudioSegment.silent(duration=len(audio)) combined_version += audio os.remove(temp_file) speaker1_path = f"{safe_title}_{speaker1_name.lower()}_only.mp3" speaker2_path = f"{safe_title}_{speaker2_name.lower()}_only.mp3" combined_path = f"{safe_title}_combined.mp3" speaker1_version.export(speaker1_path, format="mp3") speaker2_version.export(speaker2_path, format="mp3") combined_version.export(combined_path, format="mp3") return speaker1_path, speaker2_path, combined_path, temp_dir except Exception as e: if os.path.exists(temp_dir): shutil.rmtree(temp_dir) raise e def generate_podcast(title, channel_name, speaker1_name, speaker2_name, speaker1_voice, speaker2_voice): try: if not all([title, channel_name, speaker1_name, speaker2_name, speaker1_voice, speaker2_voice]): raise ValueError("All fields must be filled out") client = Client(os.getenv('API_URL')) result = client.predict( message=f"""{os.getenv('API_MESSAGE')} {{ "title": "{title}", "channel": "{channel_name}", "speaker1": "{speaker1_name}", "speaker2": "{speaker2_name}", "conversation": [ {{ "speaker1text": "" }}, {{ "speaker2text": "" }} ] }} give 42 sentences for both. """, request=os.getenv('API_REQUEST'), param_3=0.5, param_4=8100, param_5=0.5, param_6=0, api_name="/chat" ) try: podcast_data = json.loads(result) except json.JSONDecodeError: json_start = result.find('```') + 3 json_end = result.rfind('```') if json_start > 2 and json_end > json_start: if result[json_start:json_start+4] == 'json': json_start = result.find('\n', json_start) + 1 json_str = result[json_start:json_end].strip() podcast_data = json.loads(json_str) else: raise ValueError("Could not parse JSON from response") speaker1_path, speaker2_path, combined_path, temp_dir = asyncio.run( create_podcast_versions( podcast_data, speaker1_name, speaker2_name, speaker1_voice, speaker2_voice, title ) ) if os.path.exists(temp_dir): shutil.rmtree(temp_dir) return [ speaker1_path, speaker2_path, combined_path, podcast_data ] except Exception as e: return [ None, None, None, f"Error: {str(e)}" ] with gr.Blocks(theme=gr.themes.Soft()) as interface: available_voices = asyncio.run(get_voices()) gr.Markdown("# Easy Podcast") gr.Markdown("Generate a podcast conversation between two speakers on any topic. Choose voices and customize speaker details to create your perfect podcast.
To use elevelabs voices or cloned voices contact me at aheedsajid@gmail.com
Support me USDT (TRC-20) (TAe7hsSVWtMEYz3G5V1UiUdYPQVqm28bKx)") with gr.Row(): with gr.Column(): title = gr.Textbox( label="Podcast Topic", placeholder="e.g., The Future of AI", show_label=True ) channel_name = gr.Textbox( label="Channel Name", placeholder="e.g., TechTalks", value="WeePakistan", show_label=True ) with gr.Column(): speaker1_name = gr.Textbox( label="First Speaker Name", placeholder="e.g., John", value="Andrew", show_label=True ) speaker2_name = gr.Textbox( label="Second Speaker Name", placeholder="e.g., Sarah", value="Priya", show_label=True ) with gr.Row(): with gr.Column(): speaker1_voice = gr.Dropdown( choices=available_voices, value=next((v for v in available_voices if "Christopher" in v), available_voices[0]), label="First Speaker Voice", info="Select voice for the first speaker" ) with gr.Column(): speaker2_voice = gr.Dropdown( choices=available_voices, value=next((v for v in available_voices if "Neerja" in v), available_voices[0]), label="Second Speaker Voice", info="Select voice for the second speaker" ) generate_btn = gr.Button("Generate Podcast", variant="primary") with gr.Row(): speaker1_audio = gr.Audio(label="First Speaker Audio") speaker2_audio = gr.Audio(label="Second Speaker Audio") combined_audio = gr.Audio(label="Combined Audio") conversation_json = gr.JSON(label="Generated Conversation") generate_btn.click( fn=generate_podcast, inputs=[ title, channel_name, speaker1_name, speaker2_name, speaker1_voice, speaker2_voice ], outputs=[ speaker1_audio, speaker2_audio, combined_audio, conversation_json ] ) if __name__ == "__main__": interface.launch()