Spaces:
Runtime error
Runtime error
File size: 2,562 Bytes
cce4292 4b1216b 3015aa0 4b1216b f82cf24 4b1216b f82cf24 4b1216b 9fc7150 89d17ad 45ee3c1 39f97dc b7cf202 76e73dd 4b1216b 1dd7cfc 4b1216b 76e73dd 4b1216b af31188 4b1216b 965b802 4b1216b 965b802 4b1216b f2822fd f82cf24 4b1216b f82cf24 00a54a7 9c821ad 00a54a7 4b1216b 00a54a7 622ff96 b845a82 622ff96 b845a82 622ff96 4b1216b 622ff96 00a54a7 622ff96 47cbf23 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 |
# https://huggingface.co/spaces/aadnk/whisper-webui/blob/main/app.py
import gradio as gr
import os
import re
import unicodedata
import pathlib
import asyncio
import ffmpeg
import whisper
from whisper.utils import write_srt
MAX_FILE_PREFIX_LENGTH = 17
model = whisper.load_model("base")
demo = gr.Blocks(cache_examples=False)
def slugify(value, allow_unicode=False):
"""
Taken from https://github.com/django/django/blob/master/django/utils/text.py
Convert to ASCII if 'allow_unicode' is False. Convert spaces or repeated
dashes to single dashes. Remove characters that aren't alphanumerics,
underscores, or hyphens. Convert to lowercase. Also strip leading and
trailing whitespace, dashes, and underscores.
"""
value = str(value)
if allow_unicode:
value = unicodedata.normalize('NFKC', value)
else:
value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore').decode('ascii')
value = re.sub(r'[^\w\s-]', '', value.lower())
return re.sub(r'[-\s]+', '-', value).strip('-_')
async def transcribe(file):
print(type(file))
audio = whisper.load_audio(file)
# transcribe_options = dict(beam_size=5, best_of=5, without_timestamps=False)
# result = model.transcribe(file, **transcribe_options)
result = model.transcribe(audio)
file_path = pathlib.Path(file)
sourceName = file_path.stem[:MAX_FILE_PREFIX_LENGTH] + file_path.suffix
filePrefix = slugify(sourceName, allow_unicode=True)
#write to file
with open(filePrefix + "-transcript.txt", 'w', encoding="utf-8") as f:
f.write(result['text'])
#subtitles
with open(filePrefix + "-subs.srt", 'w', encoding="utf-8") as srt:
write_srt(result["segments"], file=srt)
download = []
download.append(filePrefix + "-subs.srt");
download.append(filePrefix + "-transcript.txt");
return download
async def transcribe_video(video):
print(type(video))
with demo:
gr.Markdown("Choisir le type d'entrée: fichier audio ou fichier vidéo")
with gr.Tab("audio"):
audio_file = gr.Audio(type="filepath")
audio_button = gr.Button("Transcrire audio")
with gr.Tab("vidéo"):
video_file = gr.Video(type="filepath")
video_button = gr.Button("Transcrire vidéo")
transcript = gr.File(label="transcript")
audio_button.click(transcribe, inputs=audio_file, outputs=transcript)
video_button.click(transcribe_video, inputs=video_file, outputs=transcript)
demo.launch()
|