Spaces:
Running
Running
File size: 3,172 Bytes
cfb4e8c 50ad4b3 22c5bdb 50ad4b3 c68be50 cfb4e8c 50ad4b3 c68be50 50ad4b3 c68be50 50ad4b3 de76a17 50ad4b3 de76a17 22c5bdb b97cf3c 1bccd9f de76a17 50ad4b3 22c5bdb 50ad4b3 1bccd9f 22c5bdb 50ad4b3 1bccd9f 50ad4b3 22c5bdb de76a17 50ad4b3 ff9e518 50ad4b3 de76a17 22c5bdb de76a17 1bccd9f de76a17 22c5bdb a2f7134 4201079 a2f7134 2e0b130 a6bd1ac 2e0b130 a2f7134 de76a17 6c152e8 d6282fe 6c152e8 de76a17 50ad4b3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 |
import gradio as gr
import websockets
import asyncio
import json
import base64
import os
API_KEY = os.getenv('API_KEY')
if not API_KEY:
raise ValueError("API_KEY must be set in environment variables")
async def process_audio_stream(audio_path, max_tokens):
"""
Process audio with streaming response via WebSocket
"""
if not audio_path:
yield "Please upload or record an audio file first."
return
try:
# Read audio file and convert to base64 bytes
with open(audio_path, 'rb') as f:
audio_bytes = f.read()
base64_bytes = base64.b64encode(audio_bytes)
# Connect to WebSocket
async with websockets.connect('wss://nexa-omni.nexa4ai.com/ws/process-audio/?api_key=' + API_KEY) as websocket:
# Send binary base64 audio data as bytes
await websocket.send(base64_bytes) # Send the raw base64 bytes
# Send parameters as JSON string
await websocket.send(json.dumps({
"prompt": "",
"max_tokens": max_tokens
}))
# Initialize response
response = ""
# Receive streaming response
async for message in websocket:
try:
data = json.loads(message)
if data["status"] == "generating":
response += data["token"]
yield response
elif data["status"] == "complete":
break
elif data["status"] == "error":
yield f"Error: {data['error']}"
break
except json.JSONDecodeError:
continue
except Exception as e:
yield f"Error connecting to server: {str(e)}"
# Create Gradio interface
demo = gr.Interface(
fn=process_audio_stream,
inputs=[
gr.Audio(
type="filepath",
label="Upload or Record Audio",
sources=["upload", "microphone"]
),
gr.Slider(
minimum=50,
maximum=200,
value=50,
step=1,
label="Max Tokens"
)
],
outputs=gr.Textbox(label="Response", interactive=False),
title="NEXA OmniAudio-2.6B",
description=f"""
OmniAudio-2.6B is a compact audio-language model optimized for edge deployment.
Model Repo: <a href="https://huggingface.co/NexaAIDev/OmniAudio-2.6B">NexaAIDev/OmniAudio-2.6B</a>
Blog: <a href="https://nexa.ai/blogs/omniaudio-2.6b">OmniAudio-2.6B Blog</a>
Upload an audio file and optionally provide a prompt to analyze the audio content.""",
examples=[
["example_audios/voice_qa.mp3", 200],
["example_audios/voice_in_conversation.mp3", 200],
["example_audios/creative_content_generation.mp3", 200],
["example_audios/record_summary.mp3", 200],
["example_audios/change_tone.mp3", 200],
]
)
if __name__ == "__main__":
demo.queue().launch(server_name="0.0.0.0", server_port=7860) |