# https://huggingface.co/spaces/aadnk/whisper-webui/blob/main/app.py

import gradio as gr
import os
import re
import unicodedata
import pathlib
import asyncio

import whisper
from whisper.utils import write_srt

MAX_FILE_PREFIX_LENGTH = 17

model = whisper.load_model("base")

demo = gr.Blocks()    

def slugify(value, allow_unicode=False):
    """
    Taken from https://github.com/django/django/blob/master/django/utils/text.py
    Convert to ASCII if 'allow_unicode' is False. Convert spaces or repeated
    dashes to single dashes. Remove characters that aren't alphanumerics,
    underscores, or hyphens. Convert to lowercase. Also strip leading and
    trailing whitespace, dashes, and underscores.
    """
    value = str(value)
    if allow_unicode:
        value = unicodedata.normalize('NFKC', value)
    else:
        value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore').decode('ascii')
    value = re.sub(r'[^\w\s-]', '', value.lower())
    return re.sub(r'[-\s]+', '-', value).strip('-_')

async def transcribe(file):
    
    audio = await whisper.load_audio(file.name)

#    transcribe_options = dict(beam_size=5, best_of=5, without_timestamps=False)
    
#    result = model.transcribe(file, **transcribe_options)
    result = await model.transcribe(audio)

    file_path = pathlib.Path(file.name)
    sourceName = file_path.stem[:MAX_FILE_PREFIX_LENGTH] + file_path.suffix
    filePrefix = slugify(sourceName, allow_unicode=True)

    #transcript
    with open(filePrefix + "-transcript.txt", 'w', encoding="utf-8") as f:
        f.write(result['text'])

    #subtitles
    with open(filePrefix + "-subs.srt", 'w', encoding="utf-8") as srt:
        write_srt(result["segments"], file=srt)

    download = []
    download.append(filePrefix + "-subs.srt");
    download.append(filePrefix + "-transcript.txt");

    return download
    
    
with demo:
    audio_file = gr.File()
    transcript = gr.File(label="transcript")
    b1 = gr.Button("Transcrire")
    b1.click(transcribe, inputs=audio_file, outputs=transcript)
demo.launch()