Spaces:
Running
Running
import gradio as gr | |
import requests | |
import json | |
import os | |
API_KEY = os.getenv("API_KEY") | |
if not API_KEY: | |
raise ValueError("API_KEY environment variable must be set") | |
def process_audio_stream(audio_path, max_tokens): | |
""" | |
Process audio with streaming response via HTTP | |
""" | |
if not audio_path: | |
yield "Please upload or record an audio file first." | |
return | |
try: | |
# Read and prepare audio file | |
with open(audio_path, 'rb') as audio_file: | |
files = { | |
'audio_file': ('audio.wav', audio_file, 'audio/wav') | |
} | |
data = { | |
'prompt': "", | |
'max_tokens': max_tokens | |
} | |
headers = { | |
'X-API-Key': API_KEY | |
} | |
# Make streaming request | |
response = requests.post( | |
'https://nexa-omni.nexa4ai.com/process-audio/', | |
files=files, | |
data=data, | |
headers=headers, | |
stream=True | |
) | |
if response.status_code != 200: | |
yield f"Error: Server returned status code {response.status_code}" | |
return | |
# Initialize response | |
response_text = "" | |
token_count = 0 | |
# Process the streaming response | |
for line in response.iter_lines(): | |
if line: | |
line = line.decode('utf-8') | |
if line.startswith('data: '): | |
try: | |
data = json.loads(line[6:]) # Skip 'data: ' prefix | |
if data["status"] == "generating": | |
if token_count < 3 and data["token"] in [" ", " \n", "\n", "<|im_start|>", "assistant"]: | |
token_count += 1 | |
continue | |
response_text += data["token"] | |
gr.update(value=response_text) | |
yield response_text | |
elif data["status"] == "complete": | |
break | |
elif data["status"] == "error": | |
yield f"Error: {data['error']}" | |
break | |
except json.JSONDecodeError: | |
continue | |
except Exception as e: | |
yield f"Error processing request: {str(e)}" | |
# Create Gradio interface with specific queue configurations | |
demo = gr.Interface( | |
fn=process_audio_stream, | |
inputs=[ | |
gr.Audio( | |
type="filepath", | |
label="Upload or Record Audio", | |
sources=["upload", "microphone"] | |
), | |
gr.Slider( | |
minimum=50, | |
maximum=200, | |
value=50, | |
step=1, | |
label="Max Tokens" | |
) | |
], | |
outputs=gr.Textbox(label="Response", interactive=False), | |
title="NEXA OmniAudio-2.6B", | |
description=f""" | |
OmniAudio-2.6B is a compact audio-language model optimized for edge deployment. | |
Model Repo: <a href="https://huggingface.co/NexaAIDev/OmniAudio-2.6B">NexaAIDev/OmniAudio-2.6B</a> | |
Blog: <a href="https://nexa.ai/blogs/omniaudio-2.6b">OmniAudio-2.6B Blog</a> | |
Upload an audio file and optionally provide a prompt to analyze the audio content.""", | |
examples=[ | |
["example_audios/voice_qa.mp3", 200], | |
["example_audios/voice_in_conversation.mp3", 200], | |
["example_audios/creative_content_generation.mp3", 200], | |
["example_audios/record_summary.mp3", 200], | |
["example_audios/change_tone.mp3", 200], | |
] | |
) | |
if __name__ == "__main__": | |
# Configure the queue for better streaming performance | |
demo.queue( | |
max_size=20, | |
).launch( | |
server_name="0.0.0.0", | |
server_port=7860, | |
) | |