|
import gradio as gr |
|
from transformers import pipeline, WhisperProcessor, WhisperForConditionalGeneration |
|
import torch |
|
import edge_tts |
|
import asyncio |
|
import numpy as np |
|
|
|
class FrenchLearningApp: |
|
def __init__(self): |
|
|
|
self.conversation_model = pipeline("text-generation", model="gpt2") |
|
|
|
|
|
self.whisper_processor = WhisperProcessor.from_pretrained("openai/whisper-large-v3") |
|
self.whisper_model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-large-v3") |
|
|
|
self.context = "Start a conversation in French" |
|
self.learning_goals = [] |
|
|
|
def set_learning_goals(self, goals): |
|
self.learning_goals = goals.split('\n') |
|
return f"Learning goals set: {self.learning_goals}" |
|
|
|
async def generate_french(self): |
|
french_text = self.conversation_model(self.context, max_length=100)[0]['generated_text'] |
|
|
|
|
|
voice = "fr-FR-HenriNeural" |
|
communicate = edge_tts.Communicate(french_text, voice) |
|
audio_data = b"" |
|
async for chunk in communicate.stream(): |
|
if chunk["type"] == "audio": |
|
audio_data += chunk["data"] |
|
|
|
|
|
audio_np = np.frombuffer(audio_data, dtype=np.int16) |
|
audio_float = audio_np.astype(np.float32) / 32768.0 |
|
|
|
return (24000, audio_float), french_text |
|
|
|
def process_user_response(self, audio): |
|
|
|
input_features = self.whisper_processor(audio, sampling_rate=16000, return_tensors="pt").input_features |
|
|
|
|
|
self.whisper_model.config.forced_decoder_ids = self.whisper_processor.get_decoder_prompt_ids(language="french", task="transcribe") |
|
predicted_ids = self.whisper_model.generate(input_features) |
|
french_text = self.whisper_processor.batch_decode(predicted_ids, skip_special_tokens=True)[0] |
|
|
|
|
|
self.whisper_model.config.forced_decoder_ids = self.whisper_processor.get_decoder_prompt_ids(language="french", task="translate") |
|
predicted_ids = self.whisper_model.generate(input_features) |
|
english_text = self.whisper_processor.batch_decode(predicted_ids, skip_special_tokens=True)[0] |
|
|
|
|
|
analysis = self.analyze_response(english_text) |
|
|
|
|
|
self.context += f" {french_text}" |
|
|
|
return french_text, english_text, analysis |
|
|
|
def analyze_response(self, english_text): |
|
|
|
analysis = [] |
|
for goal in self.learning_goals: |
|
if goal.lower() in english_text.lower(): |
|
analysis.append(f"Goal met: {goal}") |
|
else: |
|
analysis.append(f"Goal not yet met: {goal}") |
|
return "\n".join(analysis) |
|
|
|
def launch_app(): |
|
app = FrenchLearningApp() |
|
|
|
with gr.Blocks() as interface: |
|
gr.Markdown("# French Learning Application") |
|
|
|
with gr.Tab("Teacher Setup"): |
|
goals_input = gr.Textbox(label="Enter learning goals (one per line)") |
|
set_goals_button = gr.Button("Set Learning Goals") |
|
goals_output = gr.Textbox(label="Goals Status") |
|
|
|
set_goals_button.click(app.set_learning_goals, inputs=goals_input, outputs=goals_output) |
|
|
|
with gr.Tab("Conversation"): |
|
generate_button = gr.Button("Generate French") |
|
audio_output = gr.Audio(label="AI Speech") |
|
french_output = gr.Textbox(label="French Text") |
|
|
|
generate_button.click(lambda: asyncio.run(app.generate_french()), inputs=None, outputs=[audio_output, french_output]) |
|
|
|
audio_input = gr.Audio(source="microphone", type="numpy", label="Your Response") |
|
transcription_output = gr.Textbox(label="Your Speech (Transcribed)") |
|
translation_output = gr.Textbox(label="English Translation") |
|
analysis_output = gr.Textbox(label="Analysis") |
|
|
|
audio_input.change(app.process_user_response, inputs=audio_input, |
|
outputs=[transcription_output, translation_output, analysis_output]) |
|
|
|
interface.launch() |
|
|
|
if __name__ == "__main__": |
|
launch_app() |