omni-audio-demo / app.py
PerryCheng614's picture
updata some details
7bd2b9b
raw
history blame
1.71 kB
import gradio as gr
import requests
import os
# FastAPI endpoint
API_URL = "https://nexa-omni.nexa4ai.com/process-audio/"
def process_audio(audio_path, prompt=""):
"""
Send audio file to FastAPI backend for processing
"""
try:
# Prepare the file for upload
files = {
'file': ('audio.wav', open(audio_path, 'rb'), 'audio/wav')
}
# Send prompt as form data
data = {'prompt': prompt}
# Make the request to FastAPI
response = requests.post(API_URL, files=files, data=data)
response.raise_for_status()
return response.json()['response']
except Exception as e:
return f"Error processing audio: {str(e)}"
finally:
# Clean up the temporary file if it exists
if audio_path and os.path.exists(audio_path):
os.remove(audio_path)
# Create Gradio interface
demo = gr.Interface(
fn=process_audio,
inputs=[
gr.Audio(
type="filepath",
label="Upload or Record Audio",
sources=["upload", "microphone"]
),
gr.Textbox(
placeholder="Enter prompt (optional)",
label="Prompt",
value="transcribe this audio in English and return me the transcription:"
)
],
outputs=gr.Textbox(label="Response"),
title="Nexa Omni",
description="Upload an audio file and optionally provide a prompt to analyze the audio content.",
examples=[
["example_audios/example_1.wav", "transcribe this audio in English"],
]
)
def clear_output(audio, prompt):
return ""
demo.load_examples = clear_output
if __name__ == "__main__":
demo.launch()