from transformers import pipeline
asr_pipe = pipeline("automatic-speech-recognition", model="Abdullah17/whisper-small-urdu")
from difflib import SequenceMatcher

# List of commands
commands = [
    " سم بلاک۔",
    "  سم کی حالت",
    "  سم بلاک ایکٹیویٹ کروانا",
    " سم کا شمارہ بند ",
    " سم کی حالت کو تصدیق",
    "  سم بلاک کی انکوائری",
    " سم کا شمارہ غلط",
    "موبائل پر مختلف پیکیجز کی تفصیلات انٹرنیٹ پیکیج",
    "بجٹ کے مطابق ایک سستا پیکیج",
    " چالان جمع کرنا",
    "اپنے چالان کی رقم جمع",
    
    
]
replies = [
  "کیا آپ پیکیجز کی معلومات حاصل کرنا چاہتے ہیں؟","کیا آپ سم انکوائری کرنا چاہتے ہیں؟" ,"کیا آپ نمائندے سے بات کرنا چاہتے ہیں؟",
    "کیا آپ چالان جمع کروانا چاہتے ہیں؟"
]
# Function to find the most similar command
def find_most_similar_command(statement, command_list):
    best_match = None
    highest_similarity = 0
    i=0
    for command in command_list:
        similarity = SequenceMatcher(None, statement, command).ratio()
        print(similarity)
        if similarity > highest_similarity:
            highest_similarity = similarity
            best_match = command
            if i<7:
             reply=replies[1]
            elif i>7 and i<9:
             reply=replies[2]
            elif i>8 and i<10:
             reply=replies[3]
            else:
             reply=replies[0]
        i+=1

    return best_match,reply
def transcribe_the_command(audio):
      import soundfile as sf
      sample_rate, audio_data = audio
      file_name = "recorded_audio.wav"
      sf.write(file_name, audio_data, sample_rate)
    # Convert stereo to mono by averaging the two channels
      print(file_name)

      transcript = asr_pipe(file_name)["text"]
      most_similar_command,reply = find_most_similar_command(transcript, commands)
      print(f"Given Statement: {transcript}")
      print(f"Most Similar Command: {most_similar_command}\n")
      print(reply)
    
      return reply
# get_text_from_voice("urdu.wav")
import gradio as gr


iface = gr.Interface(
    fn=transcribe_the_command,
    inputs=gr.inputs.Audio(label="Recorded Audio",source="microphone"),
    outputs="text",
    title="Whisper Small Urdu Command",
    description="Realtime demo for Urdu speech recognition using a fine-tuned Whisper small model and outputting the estimated command on the basis of speech transcript.",
)

iface.launch()