Spaces:

NexaAIDev
/

omni-audio-demo

Running

omni-audio-demo / app.py

support streaming

22c5bdb about 1 month ago

1.97 kB

	import gradio as gr
	import websockets
	import asyncio
	import json

	async def process_audio_stream(audio_path, max_tokens):
	"""
	Process audio with streaming response via WebSocket
	"""
	if not audio_path:
	return "Please upload or record an audio file first."

	try:
	# Read audio file
	with open(audio_path, 'rb') as f:
	audio_data = f.read()

	# Connect to WebSocket
	async with websockets.connect('ws://localhost:8330/ws/process-audio/') as websocket:
	# Send audio data
	await websocket.send(audio_data)

	# Send parameters
	await websocket.send(json.dumps({
	"prompt": "",
	"max_tokens": max_tokens
	}))

	# Initialize response
	response = ""

	# Receive streaming tokens
	async for message in websocket:
	if message == "[DONE]":
	break
	response += message
	yield response

	except Exception as e:
	yield f"Error processing audio: {str(e)}"

	# Create Gradio interface
	demo = gr.Interface(
	fn=process_audio_stream,
	inputs=[
	gr.Audio(
	type="filepath",
	label="Upload or Record Audio",
	sources=["upload", "microphone"]
	),
	gr.Slider(
	minimum=50,
	maximum=200,
	value=50,
	step=1,
	label="Max Tokens"
	)
	],
	outputs=gr.Textbox(label="Response", interactive=False),
	title="Nexa Omni",
	description="Upload an audio file and optionally provide a prompt to analyze the audio content.",
	examples=[
	["example_audios/example_1.wav", 200],
	]
	)

	def clear_output(audio, max_tokens):
	return ""
	demo.load_examples = clear_output

	if __name__ == "__main__":
	demo.queue().launch()