import gradio as gr
import requests
import json
import os

API_KEY = os.getenv("API_KEY")
if not API_KEY:
    raise ValueError("API_KEY environment variable must be set")

def process_audio_stream(audio_path, max_tokens):
    """
    Process audio with streaming response via HTTP
    """
    if not audio_path:
        yield "Please upload or record an audio file first."
        return
    
    try:
        # Read and prepare audio file
        with open(audio_path, 'rb') as audio_file:
            files = {
                'audio_file': ('audio.wav', audio_file, 'audio/wav')
            }
            data = {
                'prompt': "",
                'max_tokens': max_tokens
            }
            headers = {
                'X-API-Key': API_KEY
            }
            
            # Make streaming request
            response = requests.post(
                'https://nexa-omni.nexa4ai.com/process-audio/',
                files=files,
                data=data,
                headers=headers,
                stream=True
            )
            
            if response.status_code != 200:
                yield f"Error: Server returned status code {response.status_code}"
                return

            # Initialize response
            response_text = ""
            token_count = 0
            
            # Process the streaming response
            for line in response.iter_lines():
                if line:
                    line = line.decode('utf-8')
                    if line.startswith('data: '):
                        try:
                            data = json.loads(line[6:])  # Skip 'data: ' prefix
                            if data["status"] == "generating":
                                if token_count < 3 and data["token"] in [" ", " \n", "\n", "<|im_start|>", "assistant"]:
                                    token_count += 1
                                    continue
                                response_text += data["token"]
                                gr.update(value=response_text)
                                yield response_text
                            elif data["status"] == "complete":
                                break
                            elif data["status"] == "error":
                                yield f"Error: {data['error']}"
                                break
                        except json.JSONDecodeError:
                            continue
                
    except Exception as e:
        yield f"Error processing request: {str(e)}"

# Create Gradio interface with specific queue configurations
demo = gr.Interface(
    fn=process_audio_stream,
    inputs=[
        gr.Audio(
            type="filepath",
            label="Upload or Record Audio",
            sources=["upload", "microphone"]
        ),
        gr.Slider(
            minimum=50,
            maximum=200,
            value=50,
            step=1,
            label="Max Tokens"
        )
    ],
    outputs=gr.Textbox(label="Response", interactive=False),
    title="NEXA OmniAudio-2.6B",
    description=f"""
    OmniAudio-2.6B is a compact audio-language model optimized for edge deployment.
        
    Model Repo: <a href="https://huggingface.co/NexaAIDev/OmniAudio-2.6B">NexaAIDev/OmniAudio-2.6B</a>
    
    Blog: <a href="https://nexa.ai/blogs/omniaudio-2.6b">OmniAudio-2.6B Blog</a>
    
    Upload an audio file and optionally provide a prompt to analyze the audio content.""",
    examples=[
        ["example_audios/voice_qa.mp3", 200],
        ["example_audios/voice_in_conversation.mp3", 200],
        ["example_audios/creative_content_generation.mp3", 200],
        ["example_audios/record_summary.mp3", 200],
        ["example_audios/change_tone.mp3", 200],
    ]
)

if __name__ == "__main__":
    # Configure the queue for better streaming performance
    demo.queue(
        max_size=20,
    ).launch(
        server_name="0.0.0.0",
        server_port=7860,
    )