Spaces:

Hunzla
/

whisperaudio

Runtime error

File size: 2,154 Bytes

b0b1ade
4f9e8c5
 
1a63ef2
c576782
1a63ef2
4f9e8c5
1a63ef2
 
 
 
 
 
 
 
4f9e8c5
 
 
 
7c7805e
 
 
 
b2f9adc
 
7c7805e
b2f9adc
7c7805e
 
 
ec27395
b2f9adc
adcf092
7c7805e
1a63ef2
4f9e8c5
87f11c1
2beaf00
4f9e8c5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b0b1ade
 
7c7805e
43aa289
b0b1ade
7d2b240
 
b0b1ade
 
ef798a3

from transformers import pipeline
asr_pipe = pipeline("automatic-speech-recognition", model="Abdullah17/whisper-small-urdu")
from difflib import SequenceMatcher
import json
with open("tasks.json", "r",encoding="utf-8") as json_file:
    urdu_data = json.load(json_file)
# List of commands
# commands = [
#     "نمائندے ایجنٹ نمائندہ",
#     "  سم  ایکٹیویٹ ",
#     " سم  بلاک بند ",
#     "موبائل پیکیجز انٹرنیٹ پیکیج",
#     " چالان جمع چلان",
#     " گانا "
# ]
# replies = [
# 1,2,
# ]
# Function to find the most similar command
def find_most_similar_command(statement, command_list):
    best_match = None
    highest_similarity = 0
    i=0
    for sub_list in command_list:
     for command in sub_list:
        similarity = SequenceMatcher(None, statement, command).ratio()
        print(i,"similarity",similarity)
        if similarity > highest_similarity:
            highest_similarity = similarity
            best_match = command
            reply=i
     i+=1

    return best_match,reply
def transcribe_the_command(audio,id):
      import soundfile as sf
      commands=urdu_data[id]
      print(commands)
      sample_rate, audio_data = audio
      file_name = "recorded_audio.wav"
      sf.write(file_name, audio_data, sample_rate)
    # Convert stereo to mono by averaging the two channels
      print(file_name)

      transcript = asr_pipe(file_name)["text"]
      most_similar_command,reply = find_most_similar_command(transcript, commands)
      print(f"Given Statement: {transcript}")
      print(f"Most Similar Command: {most_similar_command}\n")
      print(reply)
    
      return reply
# get_text_from_voice("urdu.wav")
import gradio as gr


iface = gr.Interface(
    fn=transcribe_the_command,
    inputs=[gr.inputs.Audio(label="Recorded Audio",source="microphone"),gr.inputs.Textbox(label="id")],
    outputs="text",
    title="Whisper Small Urdu Command",
    description="Realtime demo for Urdu speech recognition using a fine-tuned Whisper small model and outputting the estimated command on the basis of speech transcript.",
)

iface.launch()