import spaces
import gradio as gr
import numpy as np
import random
import torch
from diffusers import DiffusionPipeline
from PIL import Image
from aura_sr import AuraSR

dtype = torch.bfloat16
device = "cuda" if torch.cuda.is_available() else "cpu"
pipe = DiffusionPipeline.from_pretrained("black-forest-labs/FLUX.1-schnell", torch_dtype=dtype).to(device)
MAX_SEED = np.iinfo(np.int32).max
MAX_IMAGE_SIZE = 2048

# Initialize AuraSR model
aura_sr = AuraSR.from_pretrained("fal/AuraSR-v2")

@spaces.GPU()
def infer(prompt, seed=42, randomize_seed=False, width=1024, height=1024, num_inference_steps=4, upscale=False, progress=gr.Progress(track_tqdm=True)):
    if randomize_seed:
        seed = random.randint(0, MAX_SEED)
    generator = torch.Generator().manual_seed(seed)
    image = pipe(
        prompt=prompt,
        width=width,
        height=height,
        num_inference_steps=num_inference_steps,
        generator=generator,
        guidance_scale=0.0
    ).images[0]
    
    if upscale:
        image = upscale_image(image)
    
    return image, seed

@spaces.GPU()
def upscale_image(image):
    return aura_sr.upscale_4x(image)

# Example prompt
example_prompt = "A vibrant red origami crane on a white background, intricate paper folds, studio lighting"

# Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("# FLUX.1 [schnell] Image Generator with AuraSR V2 Upscaling")
    with gr.Row():
        with gr.Column(scale=2):
            gr.Markdown("""
            ## About FLUX.1 [schnell]
            - Fast text-to-image model optimized for local development and personal use
            - Part of the FLUX.1 model family by Black Forest Labs
            - Open-source: Available under Apache 2.0 license
            - Supports resolutions between 0.1 and 2.0 megapixels
            - Outperforms many larger models in quality and prompt adherence
            - Uses advanced transformer architecture with flow matching techniques
            - Capable of generating high-quality images in just a few inference steps
            """)
        with gr.Column(scale=3):
            prompt = gr.Textbox(label="Prompt", placeholder="Enter your image description here...", value=example_prompt)
            run_button = gr.Button("Generate")
            result = gr.Image(label="Generated Image")
            upscale = gr.Checkbox(label="Upscale with AuraSR V2 (4x resolution increase)", value=True)
            gr.Markdown("""
            **Note:** Upscaling with AuraSR V2 will significantly increase the resolution and may improve image quality,
            but it will also increase processing time. Use this option for the best possible output quality.
            """)
            gr.Markdown("""
            ## Example Prompt
            Try this example prompt or modify it to see how FLUX.1 [schnell] performs:
            ```
            A vibrant red origami crane on a white background, intricate paper folds, studio lighting
            ```
            """)
            with gr.Accordion("Advanced Settings", open=True):
                seed = gr.Slider(minimum=0, maximum=MAX_SEED, step=1, label="Seed", randomize=False)
                randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
                width = gr.Slider(minimum=256, maximum=MAX_IMAGE_SIZE, step=32, value=1024, label="Width")
                height = gr.Slider(minimum=256, maximum=MAX_IMAGE_SIZE, step=32, value=1024, label="Height")
                num_inference_steps = gr.Slider(minimum=1, maximum=50, step=1, value=4, label="Number of inference steps")
                gr.Markdown("""
                **Note:** FLUX.1 [schnell] is optimized for speed and can produce high-quality results with just a few inference steps.
                Adjust the number of steps based on your speed/quality preference. More steps may improve quality but will increase generation time.
                """)
    gr.Markdown("""
    ## Additional Information
    - FLUX.1 [schnell] is based on a hybrid architecture of multimodal and parallel diffusion transformer blocks
    - It supports various aspect ratios within the 0.1 to 2.0 megapixel range
    - The model uses bfloat16 precision for efficient computation
    - For optimal performance, running on a CUDA-enabled GPU is recommended
    - For more details and other FLUX.1 variants, visit [Black Forest Labs](https://blackforestlabs.ai)
    - The upscaling feature uses AuraSR V2, an open reproduction of the GigaGAN Upscaler from fal.ai
    """)

    run_button.click(
        infer,
        inputs=[prompt, seed, randomize_seed, width, height, num_inference_steps, upscale],
        outputs=[result, seed]
    )

demo.launch()