omni-audio-demo / app.py
PerryCheng614's picture
support streaming
22c5bdb
raw
history blame
1.97 kB
import gradio as gr
import websockets
import asyncio
import json
async def process_audio_stream(audio_path, max_tokens):
"""
Process audio with streaming response via WebSocket
"""
if not audio_path:
return "Please upload or record an audio file first."
try:
# Read audio file
with open(audio_path, 'rb') as f:
audio_data = f.read()
# Connect to WebSocket
async with websockets.connect('ws://localhost:8330/ws/process-audio/') as websocket:
# Send audio data
await websocket.send(audio_data)
# Send parameters
await websocket.send(json.dumps({
"prompt": "",
"max_tokens": max_tokens
}))
# Initialize response
response = ""
# Receive streaming tokens
async for message in websocket:
if message == "[DONE]":
break
response += message
yield response
except Exception as e:
yield f"Error processing audio: {str(e)}"
# Create Gradio interface
demo = gr.Interface(
fn=process_audio_stream,
inputs=[
gr.Audio(
type="filepath",
label="Upload or Record Audio",
sources=["upload", "microphone"]
),
gr.Slider(
minimum=50,
maximum=200,
value=50,
step=1,
label="Max Tokens"
)
],
outputs=gr.Textbox(label="Response", interactive=False),
title="Nexa Omni",
description="Upload an audio file and optionally provide a prompt to analyze the audio content.",
examples=[
["example_audios/example_1.wav", 200],
]
)
def clear_output(audio, max_tokens):
return ""
demo.load_examples = clear_output
if __name__ == "__main__":
demo.queue().launch()