Spaces:
Running
Running
File size: 3,028 Bytes
cfb4e8c 22c5bdb 9759803 cfb4e8c 22c5bdb de76a17 22c5bdb de76a17 22c5bdb b97cf3c 1bccd9f de76a17 42234b9 22c5bdb 9759803 ad55a96 4080a13 22c5bdb 9915f17 f078c1f 22c5bdb 42234b9 22c5bdb 1bccd9f 22c5bdb 1bccd9f 9759803 22c5bdb 9759803 22c5bdb de76a17 9759803 ff9e518 de76a17 22c5bdb de76a17 1bccd9f de76a17 22c5bdb a2f7134 4201079 a2f7134 2e0b130 6c152e8 2e0b130 a2f7134 de76a17 6c152e8 d6282fe 6c152e8 de76a17 9759803 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 |
import gradio as gr
import websockets
import asyncio
import json
import base64
async def process_audio_stream(audio_path, max_tokens):
"""
Process audio with streaming response via WebSocket
"""
if not audio_path:
yield "Please upload or record an audio file first."
return
try:
# Read audio file and convert to base64 bytes
with open(audio_path, 'rb') as f:
audio_bytes = f.read()
base64_bytes = base64.b64encode(audio_bytes)
# Connect to WebSocket
async with websockets.connect('wss://nexa-omni.nexa4ai.com/ws/process-audio/') as websocket:
# Send binary base64 audio data as bytes
await websocket.send(base64_bytes) # Send the raw base64 bytes
# Send parameters as JSON string
await websocket.send(json.dumps({
"prompt": "",
"max_tokens": max_tokens
}))
# Initialize response
response = ""
# Receive streaming response
async for message in websocket:
try:
data = json.loads(message)
if data["status"] == "generating":
response += data["token"]
yield response
elif data["status"] == "complete":
break
elif data["status"] == "error":
yield f"Error: {data['error']}"
break
except json.JSONDecodeError:
continue
except Exception as e:
yield f"Error connecting to server: {str(e)}"
# Create Gradio interface
demo = gr.Interface(
fn=process_audio_stream,
inputs=[
gr.Audio(
type="filepath",
label="Upload or Record Audio",
sources=["upload", "microphone"]
),
gr.Slider(
minimum=50,
maximum=200,
value=50,
step=1,
label="Max Tokens"
)
],
outputs=gr.Textbox(label="Response", interactive=False),
title="NEXA OmniAudio-2.6B",
description=f"""
OmniAudio-2.6B is a compact audio-language model optimized for edge deployment.
Model Repo: <a href="https://huggingface.co/NexaAIDev/OmniAudio-2.6B">NexaAIDev/OmniAudio-2.6B</a>
Blog: <a href="https://nexa.ai/blogs/OmniAudio-2.6B">OmniAudio-2.6B Blog</a>
Upload an audio file and optionally provide a prompt to analyze the audio content.""",
examples=[
["example_audios/voice_qa.mp3", 200],
["example_audios/voice_in_conversation.mp3", 200],
["example_audios/creative_content_generation.mp3", 200],
["example_audios/record_summary.mp3", 200],
["example_audios/change_tone.mp3", 200],
]
)
if __name__ == "__main__":
demo.queue().launch(server_name="0.0.0.0", server_port=7860)
|