import pathlib import time import gradio as gr import youtube_dl import whisper ydl_opts = { 'format': 'bestaudio/best', 'forcefilename': True } ydl = youtube_dl.YoutubeDL(ydl_opts) models = { "tiny": whisper.load_model("tiny"), "base": whisper.load_model("base"), "small": whisper.load_model("small"), "medium": whisper.load_model("medium") } examples = [ ["tiny", "https://www.youtube.com/watch?v=-tJYN-eG1zk", "en"], ["base", "https://www.youtube.com/watch?v=kMNPv_HXffQ", "es"] ] def get_lyrics(model_v, video_url, language): t_init = time.time() info = ydl.extract_info(video_url, download=False) print(time.time() - t_init) file_path = ydl.prepare_filename(info) print(time.time() - t_init) # ydl.download([video_url]) if not pathlib.Path(file_path).exists(): ydl.process_info(info) print(time.time() - t_init) return info["title"], file_path, models[model_v].transcribe(file_path, language=language, fp16=False)["text"] iface = gr.Interface(fn=get_lyrics, inputs=[ gr.Dropdown(choices=list(models.keys()), value="base"), "text", gr.Dropdown(choices=["en", "es", "fr", "pt", "it", "ge", "ko", "ch", "ja", "de", "nr", "sw", "ar"], value="en") ], examples=examples, outputs=[ gr.Text(label="Título"), gr.Audio(label="Audio"), gr.Textbox(label="Letra") ], cache_examples=True) iface.launch()