Spaces:

NexaAIDev
/

omni-audio-demo

Running

App Files Files Community

omni-audio-demo / app.py

PerryCheng614

change to http

c68be50 3 months ago

raw

history blame

3.98 kB

	import gradio as gr
	import requests
	import json
	import os

	API_KEY = os.getenv("API_KEY")
	if not API_KEY:
	raise ValueError("API_KEY environment variable must be set")

	def process_audio_stream(audio_path, max_tokens):
	"""
	Process audio with streaming response via HTTP
	"""
	if not audio_path:
	yield "Please upload or record an audio file first."
	return

	try:
	# Read and prepare audio file
	with open(audio_path, 'rb') as audio_file:
	files = {
	'audio_file': ('audio.wav', audio_file, 'audio/wav')
	}
	data = {
	'prompt': "",
	'max_tokens': max_tokens
	}
	headers = {
	'X-API-Key': API_KEY
	}

	# Make streaming request
	response = requests.post(
	'https://nexa-omni.nexa4ai.com/process-audio/',
	files=files,
	data=data,
	headers=headers,
	stream=True
	)

	if response.status_code != 200:
	yield f"Error: Server returned status code {response.status_code}"
	return

	# Initialize response
	response_text = ""
	token_count = 0

	# Process the streaming response
	for line in response.iter_lines():
	if line:
	line = line.decode('utf-8')
	if line.startswith('data: '):
	try:
	data = json.loads(line[6:]) # Skip 'data: ' prefix
	if data["status"] == "generating":
	if token_count < 3 and data["token"] in [" ", " \n", "\n", "<\|im_start\|>", "assistant"]:
	token_count += 1
	continue
	response_text += data["token"]
	gr.update(value=response_text)
	yield response_text
	elif data["status"] == "complete":
	break
	elif data["status"] == "error":
	yield f"Error: {data['error']}"
	break
	except json.JSONDecodeError:
	continue

	except Exception as e:
	yield f"Error processing request: {str(e)}"

	# Create Gradio interface with specific queue configurations
	demo = gr.Interface(
	fn=process_audio_stream,
	inputs=[
	gr.Audio(
	type="filepath",
	label="Upload or Record Audio",
	sources=["upload", "microphone"]
	),
	gr.Slider(
	minimum=50,
	maximum=200,
	value=50,
	step=1,
	label="Max Tokens"
	)
	],
	outputs=gr.Textbox(label="Response", interactive=False),
	title="NEXA OmniAudio-2.6B",
	description=f"""
	OmniAudio-2.6B is a compact audio-language model optimized for edge deployment.

	Model Repo: <a href="https://huggingface.co/NexaAIDev/OmniAudio-2.6B">NexaAIDev/OmniAudio-2.6B</a>

	Blog: <a href="https://nexa.ai/blogs/omniaudio-2.6b">OmniAudio-2.6B Blog</a>

	Upload an audio file and optionally provide a prompt to analyze the audio content.""",
	examples=[
	["example_audios/voice_qa.mp3", 200],
	["example_audios/voice_in_conversation.mp3", 200],
	["example_audios/creative_content_generation.mp3", 200],
	["example_audios/record_summary.mp3", 200],
	["example_audios/change_tone.mp3", 200],
	]
	)

	if __name__ == "__main__":
	# Configure the queue for better streaming performance
	demo.queue(
	max_size=20,
	).launch(
	server_name="0.0.0.0",
	server_port=7860,
	)