import gradio as gr from transformers import pipeline import torch from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor # Load pretrained models from Hugging Face nlp_pipeline = pipeline("text-generation", model="gpt2") # For text generation (voice assistant) speech_recognition_model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-large-960h") speech_recognition_processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-large-960h") # Function for voice-to-text conversion using Wav2Vec2 def recognize_speech(audio): # Process the audio file input_values = speech_recognition_processor(audio, return_tensors="pt").input_values with torch.no_grad(): logits = speech_recognition_model(input_values).logits predicted_ids = torch.argmax(logits, dim=-1) # Decode the prediction transcription = speech_recognition_processor.decode(predicted_ids[0]) return transcription # Function for generating device commands using GPT-2 (e.g., for controlling smart devices) def generate_response(user_input): response = nlp_pipeline(user_input, max_length=50, num_return_sequences=1)[0]['generated_text'] return response # Gradio Interface def interact_with_system(audio=None, user_input=None): if audio: # Convert speech to text transcription = recognize_speech(audio) return transcription elif user_input: # Generate response to control devices response = generate_response(user_input) return response else: return "Please provide either voice or text input." # Create a Gradio interface interface = gr.Interface( fn=interact_with_system, inputs=[ gr.Audio(source="microphone", type="numpy", label="Voice Command"), # Voice input gr.Textbox(label="Text Command") # Text input ], outputs="text", # Output the text response (device control command) title="AI-Driven Consumer Device Ecosystem", description="Use voice or text commands to interact with smart devices in your ecosystem." ) # Launch the interface interface.launch()