File size: 7,512 Bytes
52b67df
f92c162
 
52b67df
 
d5bcc1a
52b67df
 
02843f1
 
52b67df
 
f92c162
ae6a57b
 
 
 
 
f92c162
56400db
20c8e78
56400db
196d0b2
56400db
 
52b67df
 
 
 
56400db
 
 
 
 
 
 
 
d5bcc1a
 
52b67df
 
 
02843f1
52b67df
 
 
 
 
 
d5bcc1a
 
52b67df
20df108
f92c162
 
52b67df
f92c162
d5bcc1a
f92c162
 
 
d5bcc1a
 
56400db
d5bcc1a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f92c162
 
46b364b
dcbae16
9e63d68
ce743f5
f92c162
 
 
 
 
 
9dcb09a
46b364b
9dcb09a
46b364b
 
 
 
 
9dcb09a
 
 
 
ce743f5
6a73c28
 
e3f5833
6a73c28
 
f92c162
ce743f5
46b364b
f92c162
20df108
ce743f5
 
 
 
f92c162
ce743f5
6e2cf60
02843f1
ce743f5
20df108
ce743f5
 
 
 
 
 
 
 
f92c162
ce743f5
f92c162
ce743f5
 
 
 
 
 
 
52b67df
ce743f5
 
 
 
 
 
52b67df
ce743f5
 
 
 
 
 
 
52b67df
ce743f5
 
 
 
 
 
 
 
6e2cf60
 
 
 
 
 
 
 
 
ce743f5
46b364b
ce743f5
 
 
f92c162
ce743f5
dcbae16
ce743f5
f92c162
 
 
d5bcc1a
f92c162
 
 
ae6a57b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
import spaces
import random
import torch
from huggingface_hub import snapshot_download
from transformers import CLIPVisionModelWithProjection, CLIPImageProcessor
from kolors.pipelines import pipeline_stable_diffusion_xl_chatglm_256_ipadapter, pipeline_stable_diffusion_xl_chatglm_256
from kolors.models.modeling_chatglm import ChatGLMModel
from kolors.models.tokenization_chatglm import ChatGLMTokenizer
from kolors.models import unet_2d_condition
from diffusers import AutoencoderKL, EulerDiscreteScheduler, UNet2DConditionModel
import gradio as gr
import numpy as np

device = "cuda"
ckpt_dir = '/home/lixiang46/Kolors/weights/Kolors'
ckpt_IPA_dir = '/home/lixiang46/Kolors/weights/Kolors-IP-Adapter-Plus'
# ckpt_dir = snapshot_download(repo_id="Kwai-Kolors/Kolors")
# ckpt_IPA_dir = snapshot_download(repo_id="Kwai-Kolors/Kolors-IP-Adapter-Plus")

text_encoder = ChatGLMModel.from_pretrained(f'{ckpt_dir}/text_encoder', torch_dtype=torch.float16).half().to(device)
tokenizer = ChatGLMTokenizer.from_pretrained(f'{ckpt_dir}/text_encoder')
vae = AutoencoderKL.from_pretrained(f"{ckpt_dir}/vae", revision=None).half().to(device)
scheduler = EulerDiscreteScheduler.from_pretrained(f"{ckpt_dir}/scheduler")
unet_t2i = UNet2DConditionModel.from_pretrained(f"{ckpt_dir}/unet", revision=None).half().to(device)
unet_i2i = unet_2d_condition.UNet2DConditionModel.from_pretrained(f"{ckpt_dir}/unet", revision=None).half().to(device)
image_encoder = CLIPVisionModelWithProjection.from_pretrained(f'{ckpt_IPA_dir}/image_encoder',ignore_mismatched_sizes=True).to(dtype=torch.float16, device=device)
ip_img_size = 336
clip_image_processor = CLIPImageProcessor(size=ip_img_size, crop_size=ip_img_size)

pipe_t2i = pipeline_stable_diffusion_xl_chatglm_256.StableDiffusionXLPipeline(
    vae=vae,
    text_encoder=text_encoder, 
    tokenizer=tokenizer, 
    unet=unet_t2i, 
    scheduler=scheduler, 
    force_zeros_for_empty_prompt=False
).to(device)

pipe_i2i = pipeline_stable_diffusion_xl_chatglm_256_ipadapter.StableDiffusionXLPipeline(
    vae=vae,
    text_encoder=text_encoder,
    tokenizer=tokenizer,
    unet=unet_i2i,
    scheduler=scheduler,
    image_encoder=image_encoder,
    feature_extractor=clip_image_processor,
    force_zeros_for_empty_prompt=False
).to(device)

if hasattr(pipe_i2i.unet, 'encoder_hid_proj'):
    pipe_i2i.unet.text_encoder_hid_proj = pipe_i2i.unet.encoder_hid_proj
    
pipe_i2i.load_ip_adapter(f'{ckpt_IPA_dir}' , subfolder="", weight_name=["ip_adapter_plus_general.bin"])

MAX_SEED = np.iinfo(np.int32).max
MAX_IMAGE_SIZE = 2048

def infer(prompt, negative_prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps, ip_adapter_image = None, ip_adapter_scale = None):
    if randomize_seed:
        seed = random.randint(0, MAX_SEED)
    generator = torch.Generator().manual_seed(seed)

    if ip_adapter_image is None:
        image = pipe_t2i(
            prompt = prompt, 
            negative_prompt = negative_prompt,
            guidance_scale = guidance_scale, 
            num_inference_steps = num_inference_steps, 
            width = width, 
            height = height,
            generator = generator
        ).images[0] 
        return image
    else:
        pipe_i2i.set_ip_adapter_scale([ip_adapter_scale])
        image = pipe_i2i(
                prompt= prompt ,
                ip_adapter_image=[ip_adapter_image],
                negative_prompt=negative_prompt, 
                height=height,
                width=width,
                num_inference_steps=num_inference_steps, 
                guidance_scale=guidance_scale,
                num_images_per_prompt=1,
                generator=generator
            ).images[0]
        return image

examples = [
    ["一张瓢虫的照片,微距,变焦,高质量,电影,拿着一个牌子,写着“可图”", None, None],
    ["穿着黑色T恤衫,上面中文绿色大字写着“可图”", "image/test_ip.jpg", 0.5],
    ["一只可爱的小狗在奔跑", "image/test_ip2.png", 0.5]
]

if torch.cuda.is_available():
    power_device = "GPU"
else:
    power_device = "CPU"

css="""
#col-left {
    margin: 0 auto;
    max-width: 500px;
}
#col-right {
    margin: 0 auto;
    max-width: 750px;
}
"""

with gr.Blocks(css=css) as demo:
    with gr.Row():
        with gr.Column():
            gr.Markdown(f"""
            # Kolors
            Currently running on {power_device}.
            """)
        
    with gr.Row():
        with gr.Column(elem_id="col-left"):
            with gr.Row():
                prompt = gr.Textbox(
                    label="Prompt",
                    show_label=False,
                    placeholder="Enter your prompt",
                    container=False,
                )
                run_button = gr.Button("Run", scale=0)
            with gr.Row():
                ip_adapter_image = gr.Image(label="IP-Adapter Image (optional)", type="pil")
            with gr.Accordion("Advanced Settings", open=False):
                negative_prompt = gr.Textbox(
                    label="Negative prompt",
                    placeholder="Enter a negative prompt",
                    visible=True,
                )
                seed = gr.Slider(
                    label="Seed",
                    minimum=0,
                    maximum=MAX_SEED,
                    step=1,
                    value=0,
                )
                randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
                with gr.Row():
                    width = gr.Slider(
                        label="Width",
                        minimum=256,
                        maximum=MAX_IMAGE_SIZE,
                        step=32,
                        value=1024,
                    )
                    height = gr.Slider(
                        label="Height",
                        minimum=256,
                        maximum=MAX_IMAGE_SIZE,
                        step=32,
                        value=1024,
                    )
                with gr.Row():
                    guidance_scale = gr.Slider(
                        label="Guidance scale",
                        minimum=0.0,
                        maximum=10.0,
                        step=0.1,
                        value=5.0,
                    )
                    num_inference_steps = gr.Slider(
                        label="Number of inference steps",
                        minimum=10,
                        maximum=50,
                        step=1,
                        value=25,
                    )
                with gr.Row():
                    ip_adapter_scale = gr.Slider(
                        label="Image influence scale",
                        info="Use 1 for creating variations",
                        minimum=0.0,
                        maximum=1.0,
                        step=0.05,
                        value=0.5,
                    )
            
        with gr.Column(elem_id="col-right"):
            result = gr.Image(label="Result", show_label=False)
    
    with gr.Row():
        gr.Examples(
                examples = examples,
                inputs = [prompt, ip_adapter_image, ip_adapter_scale]
            )

    run_button.click(
        fn = infer,
        inputs = [prompt, negative_prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps, ip_adapter_image, ip_adapter_scale],
        outputs = [result]
    )

demo.queue().launch(share=True)