File size: 1,980 Bytes
cfb4e8c
22c5bdb
 
 
cfb4e8c
22c5bdb
de76a17
22c5bdb
de76a17
22c5bdb
b97cf3c
 
1bccd9f
de76a17
22c5bdb
 
 
 
 
 
 
 
 
 
 
 
 
 
1bccd9f
22c5bdb
 
1bccd9f
22c5bdb
 
 
 
 
 
 
de76a17
22c5bdb
ff9e518
de76a17
 
22c5bdb
de76a17
 
 
 
 
 
1bccd9f
 
 
 
 
 
de76a17
 
22c5bdb
7bd2b9b
de76a17
 
502958f
de76a17
 
 
1bccd9f
7bd2b9b
 
 
de76a17
22c5bdb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import gradio as gr
import websockets
import asyncio
import json

async def process_audio_stream(audio_path, max_tokens):
    """
    Process audio with streaming response via WebSocket
    """
    if not audio_path:
        yield "Please upload or record an audio file first."
        return
    
    try:
        # Read audio file
        with open(audio_path, 'rb') as f:
            audio_data = f.read()
        
        # Connect to WebSocket
        async with websockets.connect('ws://localhost:8330/ws/process-audio/') as websocket:
            # Send audio data
            await websocket.send(audio_data)
            
            # Send parameters
            await websocket.send(json.dumps({
                "prompt": "",
                "max_tokens": max_tokens
            }))
            
            # Initialize response
            response = ""
            
            # Receive streaming tokens
            async for message in websocket:
                if message == "[DONE]":
                    break
                response += message
                yield response
                
    except Exception as e:
        yield f"Error processing audio: {str(e)}"

# Create Gradio interface
demo = gr.Interface(
    fn=process_audio_stream,
    inputs=[
        gr.Audio(
            type="filepath",
            label="Upload or Record Audio",
            sources=["upload", "microphone"]
        ),
        gr.Slider(
            minimum=50,
            maximum=200,
            value=50,
            step=1,
            label="Max Tokens"
        )
    ],
    outputs=gr.Textbox(label="Response", interactive=False),
    title="Nexa Omni",
    description="Upload an audio file and optionally provide a prompt to analyze the audio content.",
    examples=[
        ["example_audios/example_1.wav", 200],
    ]
)

def clear_output(audio, max_tokens):
    return ""
demo.load_examples = clear_output

if __name__ == "__main__":
    demo.queue().launch()