Spaces:
Running
Running
File size: 2,181 Bytes
6c226f9 8e787d3 6c226f9 d790c0b 88183ad 6c226f9 a5bfe25 9d6fa91 66efbc3 6c226f9 3c0cd8e bab1585 6c226f9 3c0cd8e 6c226f9 bab1585 6c226f9 bab1585 6c226f9 bab1585 3c0cd8e 53b8fc6 bab1585 3c0cd8e bab1585 3c0cd8e bab1585 609dcbe 6c226f9 a5bfe25 bab1585 6c226f9 bab1585 6c226f9 7097513 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 |
import torch
import gradio as gr
import yt_dlp as youtube_dl
from transformers import pipeline
from transformers.pipelines.audio_utils import ffmpeg_read
import tempfile
import os
MODEL_NAME = "openai/whisper-large-v3"
BATCH_SIZE = 8
FILE_LIMIT_MB = 1000
device = 0 if torch.cuda.is_available() else "cpu"
pipe = pipeline(
task="automatic-speech-recognition",
model=MODEL_NAME,
chunk_length_s=30,
device=device,
)
def transcribe(inputs, task):
if inputs is None:
raise gr.Error("Cap fitxer d'脿udio introduit! Si us plau pengeu un fitxer "\
"o enregistreu un 脿udio abans d'enviar la vostra sol路licitud")
text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)["text"]
return text
demo = gr.Blocks()
description_string = "Transcripci贸 automatica de micr貌fon o de fitxers d'audio.\n Aquest demostrador est谩 desenvolupat per"\
" comprovar els models de reconeixement de parla pels m贸bils. Per ara utilitza el checkpoint "\
f"[{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) i la llibreria de 馃 Transformers per la transcripci贸."
file_transcribe = gr.Interface(
fn=transcribe,
inputs=[
gr.inputs.Audio(source="upload", type="filepath", optional=True, label="Audio file"),
gr.inputs.Radio(["transcribe", "translate"], label="Task", default="transcribe"),
],
outputs="text",
layout="horizontal",
theme="huggingface",
title="Transcripci贸 autom脿tica d'脿udio",
description=(description_string),
allow_flagging="never",
)
mf_transcribe = gr.Interface(
fn=transcribe,
inputs=[
gr.inputs.Audio(source="microphone", type="filepath", optional=True),
gr.inputs.Radio(["transcribe", "translate"], label="Task", default="transcribe"),
],
outputs="text",
layout="horizontal",
theme="huggingface",
title="Whisper Large V3: Transcribe Audio",
description=(description_string),
allow_flagging="never",
)
with demo:
gr.TabbedInterface([file_transcribe, mf_transcribe], ["Fitxer d'脌udio", "Micr貌fon"])
demo.launch(enable_queue=True)
|