import spaces import gradio as gr import numpy as np import random import torch from diffusers import DiffusionPipeline from PIL import Image from aura_sr import AuraSR dtype = torch.bfloat16 device = "cuda" if torch.cuda.is_available() else "cpu" pipe = DiffusionPipeline.from_pretrained("black-forest-labs/FLUX.1-schnell", torch_dtype=dtype).to(device) MAX_SEED = np.iinfo(np.int32).max MAX_IMAGE_SIZE = 2048 # Initialize AuraSR model aura_sr = AuraSR.from_pretrained("fal/AuraSR-v2") @spaces.GPU() def infer(prompt, seed=42, randomize_seed=False, width=1024, height=1024, num_inference_steps=4, upscale=False, progress=gr.Progress(track_tqdm=True)): if randomize_seed: seed = random.randint(0, MAX_SEED) generator = torch.Generator().manual_seed(seed) image = pipe( prompt=prompt, width=width, height=height, num_inference_steps=num_inference_steps, generator=generator, guidance_scale=0.0 ).images[0] if upscale: image = upscale_image(image) return image, seed @spaces.GPU() def upscale_image(image): return aura_sr.upscale_4x(image) # Example prompt example_prompt = "A vibrant red origami crane on a white background, intricate paper folds, studio lighting" # Gradio interface with gr.Blocks() as demo: gr.Markdown("# FLUX.1 [schnell] Image Generator with AuraSR V2 Upscaling") with gr.Row(): with gr.Column(scale=2): gr.Markdown(""" ## About FLUX.1 [schnell] - Fast text-to-image model optimized for local development and personal use - Part of the FLUX.1 model family by Black Forest Labs - Open-source: Available under Apache 2.0 license - Supports resolutions between 0.1 and 2.0 megapixels - Outperforms many larger models in quality and prompt adherence - Uses advanced transformer architecture with flow matching techniques - Capable of generating high-quality images in just a few inference steps """) with gr.Column(scale=3): prompt = gr.Textbox(label="Prompt", placeholder="Enter your image description here...", value=example_prompt) run_button = gr.Button("Generate") result = gr.Image(label="Generated Image") upscale = gr.Checkbox(label="Upscale with AuraSR V2 (4x resolution increase)", value=True) gr.Markdown(""" **Note:** Upscaling with AuraSR V2 will significantly increase the resolution and may improve image quality, but it will also increase processing time. Use this option for the best possible output quality. """) gr.Markdown(""" ## Example Prompt Try this example prompt or modify it to see how FLUX.1 [schnell] performs: ``` A vibrant red origami crane on a white background, intricate paper folds, studio lighting ``` """) with gr.Accordion("Advanced Settings", open=True): seed = gr.Slider(minimum=0, maximum=MAX_SEED, step=1, label="Seed", randomize=False) randomize_seed = gr.Checkbox(label="Randomize seed", value=True) width = gr.Slider(minimum=256, maximum=MAX_IMAGE_SIZE, step=32, value=1024, label="Width") height = gr.Slider(minimum=256, maximum=MAX_IMAGE_SIZE, step=32, value=1024, label="Height") num_inference_steps = gr.Slider(minimum=1, maximum=50, step=1, value=4, label="Number of inference steps") gr.Markdown(""" **Note:** FLUX.1 [schnell] is optimized for speed and can produce high-quality results with just a few inference steps. Adjust the number of steps based on your speed/quality preference. More steps may improve quality but will increase generation time. """) gr.Markdown(""" ## Additional Information - FLUX.1 [schnell] is based on a hybrid architecture of multimodal and parallel diffusion transformer blocks - It supports various aspect ratios within the 0.1 to 2.0 megapixel range - The model uses bfloat16 precision for efficient computation - For optimal performance, running on a CUDA-enabled GPU is recommended - For more details and other FLUX.1 variants, visit [Black Forest Labs](https://blackforestlabs.ai) - The upscaling feature uses AuraSR V2, an open reproduction of the GigaGAN Upscaler from fal.ai """) run_button.click( infer, inputs=[prompt, seed, randomize_seed, width, height, num_inference_steps, upscale], outputs=[result, seed] ) demo.launch()