File size: 4,691 Bytes
e06d9b6
7f891bb
4e6d911
 
2e306db
51e970b
945b578
 
d9f1205
51e970b
4e6d911
51e970b
4e6d911
 
9d86930
945b578
 
 
d2cb214
945b578
242b4ef
 
51e970b
242b4ef
e06d9b6
 
 
 
 
 
 
945b578
 
 
 
242b4ef
4e6d911
945b578
 
 
 
e06d9b6
 
 
242b4ef
 
945b578
242b4ef
e06d9b6
 
 
 
 
 
 
 
 
 
 
 
 
242b4ef
 
8c3684f
 
 
 
 
945b578
 
 
 
 
 
 
8c3684f
 
945b578
 
 
 
 
 
 
 
e06d9b6
 
 
 
 
 
 
945b578
e06d9b6
8c3684f
242b4ef
 
945b578
b9bd528
d53ee34
 
d2b0012
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
import spaces
import gradio as gr
import numpy as np
import random
import torch
from diffusers import DiffusionPipeline
from PIL import Image
from aura_sr import AuraSR

dtype = torch.bfloat16
device = "cuda" if torch.cuda.is_available() else "cpu"
pipe = DiffusionPipeline.from_pretrained("black-forest-labs/FLUX.1-schnell", torch_dtype=dtype).to(device)
MAX_SEED = np.iinfo(np.int32).max
MAX_IMAGE_SIZE = 2048

# Initialize AuraSR model
aura_sr = AuraSR.from_pretrained("fal/AuraSR-v2")

@spaces.GPU()
def infer(prompt, seed=42, randomize_seed=False, width=1024, height=1024, num_inference_steps=4, upscale=False, progress=gr.Progress(track_tqdm=True)):
    if randomize_seed:
        seed = random.randint(0, MAX_SEED)
    generator = torch.Generator().manual_seed(seed)
    image = pipe(
        prompt=prompt,
        width=width,
        height=height,
        num_inference_steps=num_inference_steps,
        generator=generator,
        guidance_scale=0.0
    ).images[0]
    
    if upscale:
        image = upscale_image(image)
    
    return image, seed

@spaces.GPU()
def upscale_image(image):
    return aura_sr.upscale_4x(image)

# Example prompt
example_prompt = "A vibrant red origami crane on a white background, intricate paper folds, studio lighting"

# Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("# FLUX.1 [schnell] Image Generator with AuraSR V2 Upscaling")
    with gr.Row():
        with gr.Column(scale=2):
            gr.Markdown("""
            ## About FLUX.1 [schnell]
            - Fast text-to-image model optimized for local development and personal use
            - Part of the FLUX.1 model family by Black Forest Labs
            - Open-source: Available under Apache 2.0 license
            - Supports resolutions between 0.1 and 2.0 megapixels
            - Outperforms many larger models in quality and prompt adherence
            - Uses advanced transformer architecture with flow matching techniques
            - Capable of generating high-quality images in just a few inference steps
            """)
        with gr.Column(scale=3):
            prompt = gr.Textbox(label="Prompt", placeholder="Enter your image description here...", value=example_prompt)
            run_button = gr.Button("Generate")
            result = gr.Image(label="Generated Image")
            upscale = gr.Checkbox(label="Upscale with AuraSR V2 (4x resolution increase)", value=True)
            gr.Markdown("""
            **Note:** Upscaling with AuraSR V2 will significantly increase the resolution and may improve image quality,
            but it will also increase processing time. Use this option for the best possible output quality.
            """)
            gr.Markdown("""
            ## Example Prompt
            Try this example prompt or modify it to see how FLUX.1 [schnell] performs:
            ```
            A vibrant red origami crane on a white background, intricate paper folds, studio lighting
            ```
            """)
            with gr.Accordion("Advanced Settings", open=True):
                seed = gr.Slider(minimum=0, maximum=MAX_SEED, step=1, label="Seed", randomize=False)
                randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
                width = gr.Slider(minimum=256, maximum=MAX_IMAGE_SIZE, step=32, value=1024, label="Width")
                height = gr.Slider(minimum=256, maximum=MAX_IMAGE_SIZE, step=32, value=1024, label="Height")
                num_inference_steps = gr.Slider(minimum=1, maximum=50, step=1, value=4, label="Number of inference steps")
                gr.Markdown("""
                **Note:** FLUX.1 [schnell] is optimized for speed and can produce high-quality results with just a few inference steps.
                Adjust the number of steps based on your speed/quality preference. More steps may improve quality but will increase generation time.
                """)
    gr.Markdown("""
    ## Additional Information
    - FLUX.1 [schnell] is based on a hybrid architecture of multimodal and parallel diffusion transformer blocks
    - It supports various aspect ratios within the 0.1 to 2.0 megapixel range
    - The model uses bfloat16 precision for efficient computation
    - For optimal performance, running on a CUDA-enabled GPU is recommended
    - For more details and other FLUX.1 variants, visit [Black Forest Labs](https://blackforestlabs.ai)
    - The upscaling feature uses AuraSR V2, an open reproduction of the GigaGAN Upscaler from fal.ai
    """)

    run_button.click(
        infer,
        inputs=[prompt, seed, randomize_seed, width, height, num_inference_steps, upscale],
        outputs=[result, seed]
    )

demo.launch()