Spaces:

NexaAIDev
/

omni-audio-demo

Running

App Files Files Community

PerryCheng614 commited on Nov 13

Commit

22c5bdb

•

1 Parent(s): 6155b02

support streaming

Browse files

Files changed (1) hide show

app.py +33 -37

app.py CHANGED Viewed

@@ -1,50 +1,47 @@
 import gradio as gr
-import requests
-import os
-# FastAPI endpoint
-API_URL = "https://nexa-omni.nexa4ai.com/process-audio/"
-# Add this global variable to track the last valid audio file
-last_valid_audio = None
-def process_audio(audio_path, max_tokens):
     """
-    Send audio file to FastAPI backend for processing
     """
-    global last_valid_audio
-    # Check audio file availability
-    if audio_path and os.path.exists(audio_path):
-        # New audio uploaded/recorded
-        last_valid_audio = audio_path
-    elif not audio_path and not last_valid_audio:
-        # No audio provided and no previous valid audio
         return "Please upload or record an audio file first."
-    # Use the appropriate audio path
-    current_audio = last_valid_audio if last_valid_audio else audio_path
     try:
-        # Only proceed if we have a valid audio file
-        if current_audio and os.path.exists(current_audio):
-            files = {
-                'file': ('audio.wav', open(current_audio, 'rb'), 'audio/wav')
-            }
-            data = {'max_tokens': max_tokens}
-            response = requests.post(API_URL, files=files, data=data)
-            response.raise_for_status()
-            return response.json()['response']
-        else:
-            return "No valid audio file available."
     except Exception as e:
-        return f"Error processing audio: {str(e)}"
 # Create Gradio interface
 demo = gr.Interface(
-    fn=process_audio,
     inputs=[
         gr.Audio(
             type="filepath",
@@ -59,12 +56,11 @@ demo = gr.Interface(
             label="Max Tokens"
         )
     ],
-    outputs=gr.Textbox(label="Response"),
     title="Nexa Omni",
     description="Upload an audio file and optionally provide a prompt to analyze the audio content.",
     examples=[
         ["example_audios/example_1.wav", 200],
-        # ["example_audios/example_2.wav", 100],
     ]
 )
@@ -73,4 +69,4 @@ def clear_output(audio, max_tokens):
 demo.load_examples = clear_output
 if __name__ == "__main__":
-    demo.launch()

 import gradio as gr
+import websockets
+import asyncio
+import json
+async def process_audio_stream(audio_path, max_tokens):
     """
+    Process audio with streaming response via WebSocket
     """
+    if not audio_path:
         return "Please upload or record an audio file first."
     try:
+        # Read audio file
+        with open(audio_path, 'rb') as f:
+            audio_data = f.read()
+        # Connect to WebSocket
+        async with websockets.connect('ws://localhost:8330/ws/process-audio/') as websocket:
+            # Send audio data
+            await websocket.send(audio_data)
+            # Send parameters
+            await websocket.send(json.dumps({
+                "prompt": "",
+                "max_tokens": max_tokens
+            }))
+            # Initialize response
+            response = ""
+            # Receive streaming tokens
+            async for message in websocket:
+                if message == "[DONE]":
+                    break
+                response += message
+                yield response
     except Exception as e:
+        yield f"Error processing audio: {str(e)}"
 # Create Gradio interface
 demo = gr.Interface(
+    fn=process_audio_stream,
     inputs=[
         gr.Audio(
             type="filepath",
             label="Max Tokens"
         )
     ],
+    outputs=gr.Textbox(label="Response", interactive=False),
     title="Nexa Omni",
     description="Upload an audio file and optionally provide a prompt to analyze the audio content.",
     examples=[
         ["example_audios/example_1.wav", 200],
     ]
 )
 demo.load_examples = clear_output
 if __name__ == "__main__":
+    demo.queue().launch()