Spaces:
Running
on
Zero
Running
on
Zero
adamelliotfields
commited on
Remove ToMe
Browse files- README.md +1 -1
- app.py +21 -29
- cli.py +0 -2
- lib/config.py +0 -1
- lib/inference.py +8 -23
- requirements.txt +0 -1
- usage.md +0 -4
README.md
CHANGED
@@ -53,7 +53,7 @@ Gradio app for Stable Diffusion 1.5 including:
|
|
53 |
* 100+ styles from sdxl_prompt_styler
|
54 |
* Compel prompt weighting
|
55 |
* Multiple samplers with Karras scheduling
|
56 |
-
* DeepCache,
|
57 |
* Real-ESRGAN upscaling
|
58 |
* Optional tiny autoencoder
|
59 |
|
|
|
53 |
* 100+ styles from sdxl_prompt_styler
|
54 |
* Compel prompt weighting
|
55 |
* Multiple samplers with Karras scheduling
|
56 |
+
* DeepCache, FreeU, and Clip Skip available
|
57 |
* Real-ESRGAN upscaling
|
58 |
* Optional tiny autoencoder
|
59 |
|
app.py
CHANGED
@@ -177,11 +177,12 @@ with gr.Blocks(
|
|
177 |
maximum=50,
|
178 |
step=1,
|
179 |
)
|
180 |
-
|
181 |
-
value=Config.
|
182 |
-
label="
|
183 |
-
minimum
|
184 |
-
maximum=
|
|
|
185 |
)
|
186 |
|
187 |
with gr.Row():
|
@@ -212,39 +213,31 @@ with gr.Blocks(
|
|
212 |
filterable=False,
|
213 |
label="Aspect Ratio",
|
214 |
)
|
215 |
-
scale = gr.Dropdown(
|
216 |
-
choices=[(f"{s}x", s) for s in Config.SCALES],
|
217 |
-
filterable=False,
|
218 |
-
value=Config.SCALE,
|
219 |
-
label="Scale",
|
220 |
-
)
|
221 |
|
222 |
with gr.Row():
|
|
|
|
|
|
|
|
|
|
|
|
|
223 |
num_images = gr.Dropdown(
|
224 |
choices=list(range(1, 5)),
|
225 |
value=Config.NUM_IMAGES,
|
226 |
filterable=False,
|
227 |
label="Images",
|
228 |
)
|
229 |
-
|
230 |
-
choices=["
|
231 |
-
label="File Format",
|
232 |
filterable=False,
|
233 |
-
value=
|
234 |
-
|
235 |
-
deepcache_interval = gr.Slider(
|
236 |
-
value=Config.DEEPCACHE_INTERVAL,
|
237 |
-
label="DeepCache",
|
238 |
-
minimum=1,
|
239 |
-
maximum=4,
|
240 |
-
step=1,
|
241 |
)
|
242 |
-
|
243 |
-
value=Config.
|
244 |
-
label="
|
245 |
-
minimum
|
246 |
-
maximum=
|
247 |
-
step=0.01,
|
248 |
)
|
249 |
|
250 |
with gr.Row():
|
@@ -440,7 +433,6 @@ with gr.Blocks(
|
|
440 |
truncate_prompts,
|
441 |
increment_seed,
|
442 |
deepcache_interval,
|
443 |
-
tome_ratio,
|
444 |
scale,
|
445 |
],
|
446 |
)
|
|
|
177 |
maximum=50,
|
178 |
step=1,
|
179 |
)
|
180 |
+
deepcache_interval = gr.Slider(
|
181 |
+
value=Config.DEEPCACHE_INTERVAL,
|
182 |
+
label="DeepCache",
|
183 |
+
minimum=1,
|
184 |
+
maximum=4,
|
185 |
+
step=1,
|
186 |
)
|
187 |
|
188 |
with gr.Row():
|
|
|
213 |
filterable=False,
|
214 |
label="Aspect Ratio",
|
215 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
216 |
|
217 |
with gr.Row():
|
218 |
+
file_format = gr.Dropdown(
|
219 |
+
choices=["png", "jpeg", "webp"],
|
220 |
+
label="File Format",
|
221 |
+
filterable=False,
|
222 |
+
value="png",
|
223 |
+
)
|
224 |
num_images = gr.Dropdown(
|
225 |
choices=list(range(1, 5)),
|
226 |
value=Config.NUM_IMAGES,
|
227 |
filterable=False,
|
228 |
label="Images",
|
229 |
)
|
230 |
+
scale = gr.Dropdown(
|
231 |
+
choices=[(f"{s}x", s) for s in Config.SCALES],
|
|
|
232 |
filterable=False,
|
233 |
+
value=Config.SCALE,
|
234 |
+
label="Scale",
|
|
|
|
|
|
|
|
|
|
|
|
|
235 |
)
|
236 |
+
seed = gr.Number(
|
237 |
+
value=Config.SEED,
|
238 |
+
label="Seed",
|
239 |
+
minimum=-1,
|
240 |
+
maximum=(2**64) - 1,
|
|
|
241 |
)
|
242 |
|
243 |
with gr.Row():
|
|
|
433 |
truncate_prompts,
|
434 |
increment_seed,
|
435 |
deepcache_interval,
|
|
|
436 |
scale,
|
437 |
],
|
438 |
)
|
cli.py
CHANGED
@@ -29,7 +29,6 @@ def main():
|
|
29 |
parser.add_argument("--scheduler", type=str, metavar="STR", default=Config.SCHEDULER)
|
30 |
parser.add_argument("--guidance", type=float, metavar="FLOAT", default=Config.GUIDANCE_SCALE)
|
31 |
parser.add_argument("--steps", type=int, metavar="INT", default=Config.INFERENCE_STEPS)
|
32 |
-
parser.add_argument("--tome", type=float, metavar="FLOAT", default=Config.TOME_RATIO)
|
33 |
parser.add_argument("--strength", type=float, metavar="FLOAT", default=Config.DENOISING_STRENGTH)
|
34 |
parser.add_argument("--image", type=str, metavar="STR")
|
35 |
parser.add_argument("--taesd", action="store_true")
|
@@ -63,7 +62,6 @@ def main():
|
|
63 |
args.truncate,
|
64 |
args.no_increment,
|
65 |
args.deepcache,
|
66 |
-
args.tome,
|
67 |
args.scale,
|
68 |
)
|
69 |
save_images(images, args.filename)
|
|
|
29 |
parser.add_argument("--scheduler", type=str, metavar="STR", default=Config.SCHEDULER)
|
30 |
parser.add_argument("--guidance", type=float, metavar="FLOAT", default=Config.GUIDANCE_SCALE)
|
31 |
parser.add_argument("--steps", type=int, metavar="INT", default=Config.INFERENCE_STEPS)
|
|
|
32 |
parser.add_argument("--strength", type=float, metavar="FLOAT", default=Config.DENOISING_STRENGTH)
|
33 |
parser.add_argument("--image", type=str, metavar="STR")
|
34 |
parser.add_argument("--taesd", action="store_true")
|
|
|
62 |
args.truncate,
|
63 |
args.no_increment,
|
64 |
args.deepcache,
|
|
|
65 |
args.scale,
|
66 |
)
|
67 |
save_images(images, args.filename)
|
lib/config.py
CHANGED
@@ -43,7 +43,6 @@ Config = SimpleNamespace(
|
|
43 |
INFERENCE_STEPS=30,
|
44 |
DENOISING_STRENGTH=0.6,
|
45 |
DEEPCACHE_INTERVAL=2,
|
46 |
-
TOME_RATIO=0.0,
|
47 |
SCALE=1,
|
48 |
SCALES=[1, 2, 4],
|
49 |
)
|
|
|
43 |
INFERENCE_STEPS=30,
|
44 |
DENOISING_STRENGTH=0.6,
|
45 |
DEEPCACHE_INTERVAL=2,
|
|
|
46 |
SCALE=1,
|
47 |
SCALES=[1, 2, 4],
|
48 |
)
|
lib/inference.py
CHANGED
@@ -2,14 +2,12 @@ import json
|
|
2 |
import os
|
3 |
import re
|
4 |
import time
|
5 |
-
from contextlib import contextmanager
|
6 |
from datetime import datetime
|
7 |
from itertools import product
|
8 |
from typing import Callable
|
9 |
|
10 |
import numpy as np
|
11 |
import spaces
|
12 |
-
import tomesd
|
13 |
import torch
|
14 |
from compel import Compel, DiffusersTextualInversionManager, ReturnedEmbeddingsType
|
15 |
from compel.prompt_parser import PromptParser
|
@@ -25,17 +23,6 @@ with open("./data/styles.json") as f:
|
|
25 |
styles = json.load(f)
|
26 |
|
27 |
|
28 |
-
# applies tome to the pipeline
|
29 |
-
@contextmanager
|
30 |
-
def token_merging(pipe, tome_ratio=0):
|
31 |
-
try:
|
32 |
-
if tome_ratio > 0:
|
33 |
-
tomesd.apply_patch(pipe, max_downsample=1, sx=2, sy=2, ratio=tome_ratio)
|
34 |
-
yield
|
35 |
-
finally:
|
36 |
-
tomesd.remove_patch(pipe) # idempotent
|
37 |
-
|
38 |
-
|
39 |
# parse prompts with arrays
|
40 |
def parse_prompt(prompt: str) -> list[str]:
|
41 |
arrays = re.findall(r"\[\[(.*?)\]\]", prompt)
|
@@ -106,7 +93,6 @@ def generate(
|
|
106 |
truncate_prompts=False,
|
107 |
increment_seed=True,
|
108 |
deepcache=1,
|
109 |
-
tome_ratio=0,
|
110 |
scale=1,
|
111 |
Info: Callable[[str], None] = None,
|
112 |
Error=Exception,
|
@@ -216,15 +202,14 @@ def generate(
|
|
216 |
kwargs["strength"] = denoising_strength
|
217 |
kwargs["image"] = prepare_image(image_prompt, (width, height))
|
218 |
|
219 |
-
|
220 |
-
|
221 |
-
|
222 |
-
|
223 |
-
|
224 |
-
|
225 |
-
|
226 |
-
|
227 |
-
torch.cuda.empty_cache()
|
228 |
|
229 |
if increment_seed:
|
230 |
current_seed += 1
|
|
|
2 |
import os
|
3 |
import re
|
4 |
import time
|
|
|
5 |
from datetime import datetime
|
6 |
from itertools import product
|
7 |
from typing import Callable
|
8 |
|
9 |
import numpy as np
|
10 |
import spaces
|
|
|
11 |
import torch
|
12 |
from compel import Compel, DiffusersTextualInversionManager, ReturnedEmbeddingsType
|
13 |
from compel.prompt_parser import PromptParser
|
|
|
23 |
styles = json.load(f)
|
24 |
|
25 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
# parse prompts with arrays
|
27 |
def parse_prompt(prompt: str) -> list[str]:
|
28 |
arrays = re.findall(r"\[\[(.*?)\]\]", prompt)
|
|
|
93 |
truncate_prompts=False,
|
94 |
increment_seed=True,
|
95 |
deepcache=1,
|
|
|
96 |
scale=1,
|
97 |
Info: Callable[[str], None] = None,
|
98 |
Error=Exception,
|
|
|
202 |
kwargs["strength"] = denoising_strength
|
203 |
kwargs["image"] = prepare_image(image_prompt, (width, height))
|
204 |
|
205 |
+
try:
|
206 |
+
image = pipe(**kwargs).images[0]
|
207 |
+
if scale > 1:
|
208 |
+
image = upscaler.predict(image)
|
209 |
+
images.append((image, str(current_seed)))
|
210 |
+
finally:
|
211 |
+
pipe.unload_textual_inversion()
|
212 |
+
torch.cuda.empty_cache()
|
|
|
213 |
|
214 |
if increment_seed:
|
215 |
current_seed += 1
|
requirements.txt
CHANGED
@@ -8,7 +8,6 @@ gradio==4.41.0
|
|
8 |
ruff==0.5.7
|
9 |
scipy # for LMS scheduler
|
10 |
spaces
|
11 |
-
tomesd==0.1.3
|
12 |
torch==2.3.1
|
13 |
torchvision==0.18.1
|
14 |
transformers==4.43.4
|
|
|
8 |
ruff==0.5.7
|
9 |
scipy # for LMS scheduler
|
10 |
spaces
|
|
|
11 |
torch==2.3.1
|
12 |
torchvision==0.18.1
|
13 |
transformers==4.43.4
|
usage.md
CHANGED
@@ -71,10 +71,6 @@ Denoising strength is essentially how much the generation will differ from the i
|
|
71 |
* `3`: balanced
|
72 |
* `4`: more speed
|
73 |
|
74 |
-
#### ToMe
|
75 |
-
|
76 |
-
[Token merging](https://github.com/dbolya/tomesd) (Bolya & Hoffman 2023) reduces the number of tokens processed by the model. Set `Ratio` to the desired reduction factor. Only necessary to speed up generation on older GPUs.
|
77 |
-
|
78 |
#### FreeU
|
79 |
|
80 |
[FreeU](https://github.com/ChenyangSi/FreeU) (Si et al. 2023) re-weights the contributions sourced from the U-Net’s skip connections and backbone feature maps to potentially improve image quality.
|
|
|
71 |
* `3`: balanced
|
72 |
* `4`: more speed
|
73 |
|
|
|
|
|
|
|
|
|
74 |
#### FreeU
|
75 |
|
76 |
[FreeU](https://github.com/ChenyangSi/FreeU) (Si et al. 2023) re-weights the contributions sourced from the U-Net’s skip connections and backbone feature maps to potentially improve image quality.
|