import gradio as gr import speech_recognition as sr from gtts import gTTS import os import pygame # Use pygame for playing audio from transformers import pipeline # Initialize pygame for audio playback pygame.mixer.init() # Initialize recognizer for speech recognition recognizer = sr.Recognizer() # Initialize Hugging Face NLP pipeline for intent recognition using a specific model nlp = pipeline("zero-shot-classification", model="facebook/bart-large-mnli") # Define the food menu menu = { 'Pizza': ['Cheese', 'Pepperoni', 'Vegetarian'], 'Beverages': ['Coke', 'Pepsi', 'Water'] } # Function to process the order def process_order(order): if 'pizza' in order.lower(): return "What type of pizza would you like? Cheese, Pepperoni, or Vegetarian?" elif 'coke' in order.lower(): return "One Coke added to your order." else: return "Sorry, we didn't catch that. Please try again." # Function to handle speech recognition from audio files or microphone def recognize_speech(audio): try: # If audio is from file, use SpeechRecognition to convert speech to text if isinstance(audio, str): # Audio file input (filepath) with sr.AudioFile(audio) as source: audio_data = recognizer.record(source) text = recognizer.recognize_google(audio_data) else: # Audio from microphone input text = recognizer.recognize_google(audio) print(f"Recognized text: {text}") # Print the recognized text for debugging response = process_order(text) # Using gTTS to respond back with speech tts = gTTS(text=response, lang='en') tts.save("response.mp3") # Play the MP3 response using pygame pygame.mixer.music.load("response.mp3") pygame.mixer.music.play() return response except Exception as e: print(f"Error: {e}") # Print the error for debugging return "Sorry, I could not understand." # Gradio Interface for the app def create_gradio_interface(): with gr.Blocks() as demo: gr.Markdown("## AI Voice Bot for Food Ordering") # Audio Input: User speaks into microphone or uploads a file (filepath) audio_input = gr.Audio(type="filepath", label="Speak to the bot (Upload or Record Audio)") # Display the bot's response after recognition output_text = gr.Textbox(label="Bot Response") # Define the button to process the audio input audio_input.change(fn=recognize_speech, inputs=audio_input, outputs=output_text) return demo # Create and launch the Gradio app if __name__ == "__main__": app = create_gradio_interface() app.launch(share=True)