Spaces:
Sleeping
Sleeping
import gradio as gr | |
from yt_dlp import YoutubeDL | |
from moviepy.editor import AudioFileClip, VideoFileClip | |
import speech_recognition as sr | |
from deep_translator import GoogleTranslator | |
from fpdf import FPDF | |
from docx import Document | |
import gradio as gr | |
import os | |
def download_audio(video_url): | |
try: | |
ydl_opts = { | |
'format': 'bestaudio/best', | |
'outtmpl': 'audio.%(ext)s', | |
'postprocessors': [{ | |
'key': 'FFmpegExtractAudio', | |
'preferredcodec': 'mp3', | |
'preferredquality': '192', | |
}], | |
} | |
with YoutubeDL(ydl_opts) as ydl: | |
ydl.download([video_url]) | |
audio_file = 'audio.mp3' | |
audio = AudioFileClip(audio_file) | |
audio.write_audiofile('audio.wav') | |
# Clean up temporary audio file | |
os.remove(audio_file) | |
return 'audio.wav' | |
except Exception as e: | |
return f"Error downloading audio: {str(e)}" | |
def split_audio(audio_file, chunk_length=60): | |
try: | |
audio = AudioFileClip(audio_file) | |
duration = int(audio.duration) | |
chunks = [] | |
for i in range(0, duration, chunk_length): | |
chunk = audio.subclip(i, min(i + chunk_length, duration)) | |
chunk_file = f'audio_chunk_{i}.wav' | |
chunk.write_audiofile(chunk_file) | |
chunks.append(chunk_file) | |
return chunks | |
except Exception as e: | |
return f"Error splitting audio: {str(e)}" | |
def generate_subtitles(audio_file): | |
try: | |
recognizer = sr.Recognizer() | |
audio_chunks = split_audio(audio_file) | |
if isinstance(audio_chunks, str) and "Error" in audio_chunks: | |
return audio_chunks | |
subtitles = "" | |
for chunk_file in audio_chunks: | |
with sr.AudioFile(chunk_file) as source: | |
audio_content = recognizer.record(source) | |
try: | |
chunk_subtitles = recognizer.recognize_google(audio_content) | |
subtitles += chunk_subtitles + " " | |
except sr.RequestError as e: | |
print(f"API unavailable: {str(e)}") | |
continue | |
except sr.UnknownValueError: | |
print(f"Unable to recognize speech in chunk {chunk_file}") | |
continue | |
finally: | |
os.remove(chunk_file) | |
return subtitles.strip() | |
except Exception as e: | |
return f"Error generating subtitles: {str(e)}" | |
def translate_subtitles(subtitles, target_language): | |
try: | |
translated = GoogleTranslator(source='auto', target=target_language).translate(subtitles) | |
return translated | |
except Exception as e: | |
return f"Error translating subtitles: {str(e)}" | |
def save_subtitles_as_pdf(subtitles, filename='subtitles.pdf'): | |
try: | |
pdf = FPDF() | |
pdf.add_page() | |
pdf.set_font("Arial", size=12) | |
pdf.multi_cell(0, 10, subtitles) | |
pdf.output(filename) | |
return filename | |
except Exception as e: | |
return f"Error saving PDF: {str(e)}" | |
def save_subtitles_as_doc(subtitles, filename='subtitles.docx'): | |
try: | |
doc = Document() | |
doc.add_paragraph(subtitles) | |
doc.save(filename) | |
return filename | |
except Exception as e: | |
return f"Error saving DOC: {str(e)}" | |
def download_video(video_url): | |
try: | |
ydl_opts = { | |
'format': 'bestvideo+bestaudio/best', | |
'outtmpl': 'video.%(ext)s', | |
'postprocessors': [{ | |
'key': 'FFmpegVideoConvertor', | |
'preferedformat': 'mp4', # Converts to MP4 format | |
}], | |
} | |
with YoutubeDL(ydl_opts) as ydl: | |
ydl.download([video_url]) | |
# Determine the video file extension | |
video_files = [f for f in os.listdir() if f.startswith('video.') and os.path.isfile(f)] | |
if video_files: | |
# Rename the file to a standard name if multiple files are found | |
video_file = video_files[0] | |
mp4_file = 'video.mp4' | |
if not video_file.endswith('.mp4'): | |
# Rename the file to ensure it's in MP4 format | |
os.rename(video_file, mp4_file) | |
return mp4_file | |
else: | |
return "Error: No video file found." | |
except Exception as e: | |
return f"Error downloading video: {str(e)}" | |
def process_video(video_url, target_language): | |
audio_file = download_audio(video_url) | |
if "Error" in audio_file: | |
return audio_file, None, None, None, None | |
subtitles = generate_subtitles(audio_file) | |
if "Error" in subtitles: | |
return subtitles, None, None, None, None | |
translated_subtitles = translate_subtitles(subtitles, target_language) | |
if "Error" in translated_subtitles: | |
return translated_subtitles, None, None, None, None | |
pdf_file = save_subtitles_as_pdf(translated_subtitles) | |
if "Error" in pdf_file: | |
return pdf_file, None, None, None, None | |
doc_file = save_subtitles_as_doc(translated_subtitles) | |
if "Error" in doc_file: | |
return doc_file, None, None, None, None | |
video_file = download_video(video_url) | |
if "Error" in video_file: | |
return video_file, None, None, None, None | |
return translated_subtitles, pdf_file, doc_file, audio_file, video_file | |
# Fetch supported languages from deep-translator | |
supported_languages = GoogleTranslator().get_supported_languages(as_dict=True) | |
language_choices = list(supported_languages.values()) | |
def gradio_process(video_url, target_language): | |
translated_subtitles, pdf_file, doc_file, audio_file, video_file = process_video(video_url, target_language) | |
return translated_subtitles, pdf_file, doc_file, audio_file, video_file | |
gr.Interface( | |
fn=gradio_process, | |
inputs=[ | |
gr.Textbox(label="Video URL"), | |
gr.Dropdown(label="Target Language", choices=language_choices) | |
], | |
outputs=[ | |
gr.Textbox(label="Translated Subtitles"), | |
gr.File(label="Download PDF"), | |
gr.File(label="Download DOC"), | |
gr.File(label="Download AUDIO"), | |
gr.File(label="Download VIDEO") | |
] | |
).launch() | |