import gradio as gr
import websockets
import asyncio
import json
import base64

async def process_audio_stream(audio_path, max_tokens):
    """
    Process audio with streaming response via WebSocket
    """
    if not audio_path:
        yield "Please upload or record an audio file first."
        return
    
    try:
        # Read audio file and convert to base64 bytes
        with open(audio_path, 'rb') as f:
            audio_bytes = f.read()
            base64_bytes = base64.b64encode(audio_bytes)
            
        # Connect to WebSocket
        async with websockets.connect('wss://nexa-omni.nexa4ai.com/ws/process-audio/') as websocket:
            # Send binary base64 audio data as bytes
            await websocket.send(base64_bytes)  # Send the raw base64 bytes
            
            # Send parameters as JSON string
            await websocket.send(json.dumps({
                "prompt": "",
                "max_tokens": max_tokens
            }))
            
            # Initialize response
            response = ""
            
            # Receive streaming response
            async for message in websocket:
                try:
                    data = json.loads(message)
                    if data["status"] == "generating":
                        response += data["token"]
                        yield response
                    elif data["status"] == "complete":
                        break
                    elif data["status"] == "error":
                        yield f"Error: {data['error']}"
                        break
                except json.JSONDecodeError:
                    continue
                
    except Exception as e:
        yield f"Error connecting to server: {str(e)}"

# Create Gradio interface
demo = gr.Interface(
    fn=process_audio_stream,
    inputs=[
        gr.Audio(
            type="filepath",
            label="Upload or Record Audio",
            sources=["upload", "microphone"]
        ),
        gr.Slider(
            minimum=50,
            maximum=200,
            value=50,
            step=1,
            label="Max Tokens"
        )
    ],
    outputs=gr.Textbox(label="Response", interactive=False),
    title="Nexa Omni",
    description="Upload an audio file and optionally provide a prompt to analyze the audio content.",
    examples=[
        ["example_audios/example_1.wav", 200],
    ]
)

if __name__ == "__main__":
    demo.queue().launch(server_name="0.0.0.0", server_port=7860)