import gradio as gr import speech_recognition as sr import pyttsx3 from transformers import pipeline # Initialize recognizer for speech recognition recognizer = sr.Recognizer() # Initialize text-to-speech engine engine = pyttsx3.init() # Initialize Hugging Face NLP pipeline for intent recognition nlp = pipeline("zero-shot-classification") # Define the food menu menu = { 'Pizza': ['Cheese', 'Pepperoni', 'Vegetarian'], 'Beverages': ['Coke', 'Pepsi', 'Water'] } # Function to process the order def process_order(order): if 'pizza' in order.lower(): return "What type of pizza would you like? Cheese, Pepperoni, or Vegetarian?" elif 'coke' in order.lower(): return "One Coke added to your order." else: return "Sorry, we didn't catch that. Please try again." # Function to handle speech recognition def recognize_speech(audio): try: text = recognizer.recognize_google(audio) response = process_order(text) engine.say(response) # TTS response engine.runAndWait() return response except Exception as e: return "Sorry, I could not understand." # Gradio Interface for the app def create_gradio_interface(): with gr.Blocks() as demo: gr.Markdown("## AI Voice Bot for Food Ordering") # Speech-to-Text: User speaks into microphone audio_input = gr.Audio(source="microphone", type="numpy", label="Speak to the bot") # Display the bot's response after recognition output_text = gr.Textbox(label="Bot Response") # Define the button to process the audio input audio_input.change(fn=recognize_speech, inputs=audio_input, outputs=output_text) return demo # Create and launch the Gradio app if __name__ == "__main__": app = create_gradio_interface() app.launch(share=True)