# https://huggingface.co/spaces/aadnk/whisper-webui/blob/main/app.py import gradio as gr import os import re import unicodedata import pathlib import asyncio import ffmpeg import whisper from whisper.utils import write_srt MAX_FILE_PREFIX_LENGTH = 17 model = whisper.load_model("base") demo = gr.Blocks(cache_examples=False) def slugify(value, allow_unicode=False): """ Taken from https://github.com/django/django/blob/master/django/utils/text.py Convert to ASCII if 'allow_unicode' is False. Convert spaces or repeated dashes to single dashes. Remove characters that aren't alphanumerics, underscores, or hyphens. Convert to lowercase. Also strip leading and trailing whitespace, dashes, and underscores. """ value = str(value) if allow_unicode: value = unicodedata.normalize('NFKC', value) else: value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore').decode('ascii') value = re.sub(r'[^\w\s-]', '', value.lower()) return re.sub(r'[-\s]+', '-', value).strip('-_') async def transcribe(file): print(type(file)) audio = whisper.load_audio(file) # transcribe_options = dict(beam_size=5, best_of=5, without_timestamps=False) # result = model.transcribe(file, **transcribe_options) result = model.transcribe(audio) file_path = pathlib.Path(file) sourceName = file_path.stem[:MAX_FILE_PREFIX_LENGTH] + file_path.suffix filePrefix = slugify(sourceName, allow_unicode=True) #write to file with open(filePrefix + "-transcript.txt", 'w', encoding="utf-8") as f: f.write(result['text']) #subtitles with open(filePrefix + "-subs.srt", 'w', encoding="utf-8") as srt: write_srt(result["segments"], file=srt) download = [] download.append(filePrefix + "-subs.srt"); download.append(filePrefix + "-transcript.txt"); return download async def transcribe_video(video): print(type(video)) with demo: gr.Markdown("Choisir le type d'entrée: fichier audio ou fichier vidéo") with gr.Tab("audio"): audio_file = gr.Audio(type="filepath") audio_button = gr.Button("Transcrire audio") with gr.Tab("vidéo"): video_file = gr.Video(type="filepath") video_button = gr.Button("Transcrire vidéo") transcript = gr.File(label="transcript") audio_button.click(transcribe, inputs=audio_file, outputs=transcript) video_button.click(transcribe_video, inputs=video_file, outputs=transcript) demo.launch()