import gradio as gr import speech_recognition as sr from gtts import gTTS import os from playsound import playsound # Import playsound library from transformers import pipeline # Initialize recognizer for speech recognition recognizer = sr.Recognizer() # Initialize Hugging Face NLP pipeline for intent recognition using a specific model nlp = pipeline("zero-shot-classification", model="facebook/bart-large-mnli") # Define the food menu menu = { 'Pizza': ['Cheese', 'Pepperoni', 'Vegetarian'], 'Beverages': ['Coke', 'Pepsi', 'Water'] } # Function to process the order def process_order(order): if 'pizza' in order.lower(): return "What type of pizza would you like? Cheese, Pepperoni, or Vegetarian?" elif 'coke' in order.lower(): return "One Coke added to your order." else: return "Sorry, we didn't catch that. Please try again." # Function to handle speech recognition def recognize_speech(audio): try: # Recognize speech using SpeechRecognition text = recognizer.recognize_google(audio) response = process_order(text) # Using gTTS to respond back with speech tts = gTTS(text=response, lang='en') tts.save("response.mp3") # Play the MP3 response using playsound playsound("response.mp3") return response except Exception as e: return "Sorry, I could not understand." # Gradio Interface for the app def create_gradio_interface(): with gr.Blocks() as demo: gr.Markdown("## AI Voice Bot for Food Ordering") # Audio Input: User speaks into microphone audio_input = gr.Audio(type="numpy", label="Speak to the bot") # Display the bot's response after recognition output_text = gr.Textbox(label="Bot Response") # Define the button to process the audio input audio_input.change(fn=recognize_speech, inputs=audio_input, outputs=output_text) return demo # Create and launch the Gradio app if __name__ == "__main__": app = create_gradio_interface() app.launch(share=True)