import os import unicodedata from datasets import load_dataset, Audio from transformers import pipeline import gradio as gr import torch ############### HF ########################### HF_TOKEN = os.getenv("HF_TOKEN") hf_writer = gr.HuggingFaceDatasetSaver(HF_TOKEN, "Urdu-ASR-flags") ############## DagsHub ################################ Model = "kingabzpro/wav2vec2-large-xls-r-300m-Urdu" # This is not working because Huggingface has completely changed the git server. # from dagshub.streaming import install_hooks # install_hooks() ############## Inference ############################## device = 0 if torch.cuda.is_available() else "cpu" def asr(audio): asr = pipeline("automatic-speech-recognition", model=Model) prediction = asr(audio, chunk_length_s=30, device=device) return unicodedata.normalize("NFC",prediction["text"]) ################### Gradio Web APP ################################ title = "Urdu Automatic Speech Recognition" description = """
Fine-tuning XLS-R for Multi-Lingual ASR with 🤗 Transformers