|
|
|
|
|
import gradio as gr |
|
from transformers import pipeline |
|
import numpy as np |
|
|
|
transcriber = pipeline("automatic-speech-recognition", model="bartelds/gos-gpu6-cp1_adp0_192m_no_test_1e-5_cp-12000") |
|
|
|
def transcribe(audio): |
|
sr, y = audio |
|
y = y.astype(np.float32) |
|
y /= np.max(np.abs(y)) |
|
|
|
return transcriber({"sampling_rate": sr, "raw": y})["text"] |
|
|
|
|
|
demo = gr.Interface( |
|
transcribe, |
|
gr.Audio(source="upload"), |
|
"text", |
|
title="Speech-to-text for Gronings", |
|
description="Upload an audio file (in 16 kHz) with Gronings speech to obtain its transcription. Example files are in our [gos-demo](https://huggingface.co/datasets/bartelds/gos-demo) dataset." |
|
) |
|
|
|
demo.launch() |
|
|