Spaces:
Runtime error
Runtime error
import gradio as gr | |
import argparse | |
import soundfile as sf | |
import numpy as np | |
import tempfile | |
from pathlib import Path | |
import os | |
import subprocess | |
import sys | |
import re | |
# from transformers import AutoProcessor, AutoModelForPreTraining | |
# processor = AutoProcessor.from_pretrained("patrickvonplaten/mms-1b") | |
# model = AutoModelForPreTraining.from_pretrained("patrickvonplaten/mms-1b") | |
def process(audio, model, lang, format): | |
with tempfile.TemporaryDirectory() as tmpdir: | |
print(">>> preparing tmp manifest dir ...", file=sys.stderr) | |
tmpdir = Path(tmpdir) | |
with open(tmpdir / "dev.tsv", "w") as fw: | |
fw.write("/\n") | |
for audio in audio: | |
nsample = sf.SoundFile(audio).frames | |
fw.write(f"{audio}\t{nsample}\n") | |
with open(tmpdir / "dev.uid", "w") as fw: | |
fw.write(f"{audio}\n"*len(audio)) | |
with open(tmpdir / "dev.ltr", "w") as fw: | |
fw.write("d u m m y | d u m m y\n"*len(audio)) | |
with open(tmpdir / "dev.wrd", "w") as fw: | |
fw.write("dummy dummy\n"*len(audio)) | |
cmd = f""" | |
PYTHONPATH=. PREFIX=INFER HYDRA_FULL_ERROR=1 python infer.py -m decoding.type=viterbi dataset.max_tokens=4000000 distributed_training.distributed_world_size=1 "common_eval.path='{model}'" task.data={tmpdir} dataset.gen_subset="{lang}:dev" common_eval.post_process={format} decoding.results_path={tmpdir} | |
""" | |
print(">>> loading model & running inference ...", file=sys.stderr) | |
subprocess.run(cmd, shell=True, stdout=subprocess.DEVNULL,) | |
with open(tmpdir/"hypo.word") as fr: | |
for ii, hypo in enumerate(fr): | |
hypo = re.sub("\(\S+\)$", "", hypo).strip() | |
print(f'===============\nInput: {audio[ii]}\nOutput: {hypo}') | |
def transcribe(audio): | |
model = "base_300m.pt" | |
lang = "eng" | |
format = "letter" | |
process(np.ravel(audio), model, lang, format) | |
gr.Interface( | |
title = 'MetaAI (Facebook Research) MMS (Massively Multilingual Speech) ASR', | |
fn=transcribe, | |
inputs=[ | |
gr.inputs.Audio(source="microphone", type="filepath") | |
], | |
outputs=[ | |
"textbox" | |
], | |
live=True).launch() |