Spaces:

NexaAIDev
/

omni-audio-demo

Running

App Files Files Community

PerryCheng614 commited on 1 day ago

Commit

50ad4b3

•

1 Parent(s): c68be50

change back to wss

Browse files

Files changed (1) hide show

app.py +39 -61

app.py CHANGED Viewed

@@ -1,77 +1,61 @@
 import gradio as gr
-import requests
 import json
 import os
-API_KEY = os.getenv("API_KEY")
 if not API_KEY:
-    raise ValueError("API_KEY environment variable must be set")
-def process_audio_stream(audio_path, max_tokens):
     """
-    Process audio with streaming response via HTTP
     """
     if not audio_path:
         yield "Please upload or record an audio file first."
         return
     try:
-        # Read and prepare audio file
-        with open(audio_path, 'rb') as audio_file:
-            files = {
-                'audio_file': ('audio.wav', audio_file, 'audio/wav')
-            }
-            data = {
-                'prompt': "",
-                'max_tokens': max_tokens
-            }
-            headers = {
-                'X-API-Key': API_KEY
-            }
-            # Make streaming request
-            response = requests.post(
-                'https://nexa-omni.nexa4ai.com/process-audio/',
-                files=files,
-                data=data,
-                headers=headers,
-                stream=True
-            )
-            if response.status_code != 200:
-                yield f"Error: Server returned status code {response.status_code}"
-                return
             # Initialize response
-            response_text = ""
-            token_count = 0
-            # Process the streaming response
-            for line in response.iter_lines():
-                if line:
-                    line = line.decode('utf-8')
-                    if line.startswith('data: '):
-                        try:
-                            data = json.loads(line[6:])  # Skip 'data: ' prefix
-                            if data["status"] == "generating":
-                                if token_count < 3 and data["token"] in [" ", " \n", "\n", "<|im_start|>", "assistant"]:
-                                    token_count += 1
-                                    continue
-                                response_text += data["token"]
-                                gr.update(value=response_text)
-                                yield response_text
-                            elif data["status"] == "complete":
-                                break
-                            elif data["status"] == "error":
-                                yield f"Error: {data['error']}"
-                                break
-                        except json.JSONDecodeError:
-                            continue
     except Exception as e:
-        yield f"Error processing request: {str(e)}"
-# Create Gradio interface with specific queue configurations
 demo = gr.Interface(
     fn=process_audio_stream,
     inputs=[
@@ -108,10 +92,4 @@ demo = gr.Interface(
 )
 if __name__ == "__main__":
-    # Configure the queue for better streaming performance
-    demo.queue(
-        max_size=20,
-    ).launch(
-        server_name="0.0.0.0",
-        server_port=7860,
-    )

 import gradio as gr
+import websockets
+import asyncio
 import json
+import base64
 import os
+API_KEY = os.getenv('API_KEY')
 if not API_KEY:
+    raise ValueError("API_KEY must be set in environment variables")
+async def process_audio_stream(audio_path, max_tokens):
     """
+    Process audio with streaming response via WebSocket
     """
     if not audio_path:
         yield "Please upload or record an audio file first."
         return
     try:
+        # Read audio file and convert to base64 bytes
+        with open(audio_path, 'rb') as f:
+            audio_bytes = f.read()
+            base64_bytes = base64.b64encode(audio_bytes)
+        # Connect to WebSocket
+        async with websockets.connect('wss://nexa-omni.nexa4ai.com/ws/process-audio/?api_key=' + API_KEY) as websocket:
+            # Send binary base64 audio data as bytes
+            await websocket.send(base64_bytes)  # Send the raw base64 bytes
+            # Send parameters as JSON string
+            await websocket.send(json.dumps({
+                "prompt": "",
+                "max_tokens": max_tokens
+            }))
             # Initialize response
+            response = ""
+            # Receive streaming response
+            async for message in websocket:
+                try:
+                    data = json.loads(message)
+                    if data["status"] == "generating":
+                        response += data["token"]
+                        yield response
+                    elif data["status"] == "complete":
+                        break
+                    elif data["status"] == "error":
+                        yield f"Error: {data['error']}"
+                        break
+                except json.JSONDecodeError:
+                    continue
     except Exception as e:
+        yield f"Error connecting to server: {str(e)}"
+# Create Gradio interface
 demo = gr.Interface(
     fn=process_audio_stream,
     inputs=[
 )
 if __name__ == "__main__":
+    demo.queue().launch(server_name="0.0.0.0", server_port=7860)