Spaces:

sagar007
/

FRIDAY

Sleeping

sagar007 commited on Sep 23

Commit

24a5380

•

1 Parent(s): 8067820

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,39 +1,27 @@
 import gradio as gr
 import torch
-from moshi import MoshiServer
-# Initialize MoshiServer
-server = MoshiServer()
-server.load_model("kyutai/moshika-pytorch-bf16")
-def process_audio(audio, sample_rate):
-    # Convert audio to the correct format
     audio_tensor = torch.tensor(audio).float()
     if audio_tensor.dim() == 2:
         audio_tensor = audio_tensor.mean(dim=1)
-    # Process audio
-    result = server.process_audio(audio_tensor, sample_rate)
-    return result.text
-def generate_speech(text):
-    # Generate speech from text
-    result = server.generate_speech(text)
-    return (server.config.sample_rate, result.audio.cpu().numpy())
 # Create Gradio interface
 iface = gr.Interface(
-    fn=[process_audio, generate_speech],
-    inputs=[
-        gr.Audio(source="microphone", type="numpy", label="Speak"),
-        gr.Textbox(label="Enter text for speech synthesis")
-    ],
-    outputs=[
-        gr.Textbox(label="Transcription"),
-        gr.Audio(label="Synthesized Speech")
-    ],
-    title="Moshi Speech-Text Interaction",
-    description="Interact with Moshi for speech-to-text and text-to-speech tasks."
 )
 # Launch the app

 import gradio as gr
 import torch
+from moshi import load_model, transcribe
+# Load the model
+model = load_model("kyutai/moshika-pytorch-bf16")
+def process_audio(audio):
+    # Convert audio to tensor
     audio_tensor = torch.tensor(audio).float()
     if audio_tensor.dim() == 2:
         audio_tensor = audio_tensor.mean(dim=1)
+    # Transcribe audio
+    transcription = transcribe(model, audio_tensor)
+    return transcription
 # Create Gradio interface
 iface = gr.Interface(
+    fn=process_audio,
+    inputs=gr.Audio(source="upload", type="numpy"),
+    outputs=gr.Textbox(label="Transcription"),
+    title="Moshi Speech-to-Text",
+    description="Upload an audio file to transcribe using the Moshi model."
 )
 # Launch the app