French / app.py
mikefish's picture
Update app.py
a02905a verified
import gradio as gr
from transformers import pipeline, WhisperProcessor, WhisperForConditionalGeneration
import torch
import edge_tts
import asyncio
import numpy as np
class FrenchLearningApp:
def __init__(self):
# Initialize models
self.conversation_model = pipeline("text-generation", model="gpt2")
# Initialize Whisper model
self.whisper_processor = WhisperProcessor.from_pretrained("openai/whisper-large-v3")
self.whisper_model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-large-v3")
self.context = "Start a conversation in French"
self.learning_goals = []
def set_learning_goals(self, goals):
self.learning_goals = goals.split('\n')
return f"Learning goals set: {self.learning_goals}"
async def generate_french(self):
french_text = self.conversation_model(self.context, max_length=100)[0]['generated_text']
# Generate audio using edge-tts
voice = "fr-FR-HenriNeural" # You can change this to any available French voice
communicate = edge_tts.Communicate(french_text, voice)
audio_data = b""
async for chunk in communicate.stream():
if chunk["type"] == "audio":
audio_data += chunk["data"]
# Convert audio to numpy array for Gradio
audio_np = np.frombuffer(audio_data, dtype=np.int16)
audio_float = audio_np.astype(np.float32) / 32768.0 # Convert to float32
return (24000, audio_float), french_text # 24000 is the default sample rate for edge-tts
def process_user_response(self, audio):
# Transcribe audio to French text using Whisper
input_features = self.whisper_processor(audio, sampling_rate=16000, return_tensors="pt").input_features
# Generate French transcription
self.whisper_model.config.forced_decoder_ids = self.whisper_processor.get_decoder_prompt_ids(language="french", task="transcribe")
predicted_ids = self.whisper_model.generate(input_features)
french_text = self.whisper_processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
# Translate French to English using Whisper
self.whisper_model.config.forced_decoder_ids = self.whisper_processor.get_decoder_prompt_ids(language="french", task="translate")
predicted_ids = self.whisper_model.generate(input_features)
english_text = self.whisper_processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
# Analyze response (simplified)
analysis = self.analyze_response(english_text)
# Update context
self.context += f" {french_text}"
return french_text, english_text, analysis
def analyze_response(self, english_text):
# Simplified analysis - check if any learning goal is mentioned
analysis = []
for goal in self.learning_goals:
if goal.lower() in english_text.lower():
analysis.append(f"Goal met: {goal}")
else:
analysis.append(f"Goal not yet met: {goal}")
return "\n".join(analysis)
def launch_app():
app = FrenchLearningApp()
with gr.Blocks() as interface:
gr.Markdown("# French Learning Application")
with gr.Tab("Teacher Setup"):
goals_input = gr.Textbox(label="Enter learning goals (one per line)")
set_goals_button = gr.Button("Set Learning Goals")
goals_output = gr.Textbox(label="Goals Status")
set_goals_button.click(app.set_learning_goals, inputs=goals_input, outputs=goals_output)
with gr.Tab("Conversation"):
generate_button = gr.Button("Generate French")
audio_output = gr.Audio(label="AI Speech")
french_output = gr.Textbox(label="French Text")
generate_button.click(lambda: asyncio.run(app.generate_french()), inputs=None, outputs=[audio_output, french_output])
audio_input = gr.Audio(source="microphone", type="numpy", label="Your Response")
transcription_output = gr.Textbox(label="Your Speech (Transcribed)")
translation_output = gr.Textbox(label="English Translation")
analysis_output = gr.Textbox(label="Analysis")
audio_input.change(app.process_user_response, inputs=audio_input,
outputs=[transcription_output, translation_output, analysis_output])
interface.launch()
if __name__ == "__main__":
launch_app()