adamelliotfields commited on
Commit
c5cf566
·
verified ·
1 Parent(s): eb8fc69

Remove ToMe

Browse files
Files changed (7) hide show
  1. README.md +1 -1
  2. app.py +21 -29
  3. cli.py +0 -2
  4. lib/config.py +0 -1
  5. lib/inference.py +8 -23
  6. requirements.txt +0 -1
  7. usage.md +0 -4
README.md CHANGED
@@ -53,7 +53,7 @@ Gradio app for Stable Diffusion 1.5 including:
53
  * 100+ styles from sdxl_prompt_styler
54
  * Compel prompt weighting
55
  * Multiple samplers with Karras scheduling
56
- * DeepCache, ToMe, FreeU, and Clip Skip available
57
  * Real-ESRGAN upscaling
58
  * Optional tiny autoencoder
59
 
 
53
  * 100+ styles from sdxl_prompt_styler
54
  * Compel prompt weighting
55
  * Multiple samplers with Karras scheduling
56
+ * DeepCache, FreeU, and Clip Skip available
57
  * Real-ESRGAN upscaling
58
  * Optional tiny autoencoder
59
 
app.py CHANGED
@@ -177,11 +177,12 @@ with gr.Blocks(
177
  maximum=50,
178
  step=1,
179
  )
180
- seed = gr.Number(
181
- value=Config.SEED,
182
- label="Seed",
183
- minimum=-1,
184
- maximum=(2**64) - 1,
 
185
  )
186
 
187
  with gr.Row():
@@ -212,39 +213,31 @@ with gr.Blocks(
212
  filterable=False,
213
  label="Aspect Ratio",
214
  )
215
- scale = gr.Dropdown(
216
- choices=[(f"{s}x", s) for s in Config.SCALES],
217
- filterable=False,
218
- value=Config.SCALE,
219
- label="Scale",
220
- )
221
 
222
  with gr.Row():
 
 
 
 
 
 
223
  num_images = gr.Dropdown(
224
  choices=list(range(1, 5)),
225
  value=Config.NUM_IMAGES,
226
  filterable=False,
227
  label="Images",
228
  )
229
- file_format = gr.Dropdown(
230
- choices=["png", "jpeg", "webp"],
231
- label="File Format",
232
  filterable=False,
233
- value="png",
234
- )
235
- deepcache_interval = gr.Slider(
236
- value=Config.DEEPCACHE_INTERVAL,
237
- label="DeepCache",
238
- minimum=1,
239
- maximum=4,
240
- step=1,
241
  )
242
- tome_ratio = gr.Slider(
243
- value=Config.TOME_RATIO,
244
- label="ToMe Ratio",
245
- minimum=0.0,
246
- maximum=0.5,
247
- step=0.01,
248
  )
249
 
250
  with gr.Row():
@@ -440,7 +433,6 @@ with gr.Blocks(
440
  truncate_prompts,
441
  increment_seed,
442
  deepcache_interval,
443
- tome_ratio,
444
  scale,
445
  ],
446
  )
 
177
  maximum=50,
178
  step=1,
179
  )
180
+ deepcache_interval = gr.Slider(
181
+ value=Config.DEEPCACHE_INTERVAL,
182
+ label="DeepCache",
183
+ minimum=1,
184
+ maximum=4,
185
+ step=1,
186
  )
187
 
188
  with gr.Row():
 
213
  filterable=False,
214
  label="Aspect Ratio",
215
  )
 
 
 
 
 
 
216
 
217
  with gr.Row():
218
+ file_format = gr.Dropdown(
219
+ choices=["png", "jpeg", "webp"],
220
+ label="File Format",
221
+ filterable=False,
222
+ value="png",
223
+ )
224
  num_images = gr.Dropdown(
225
  choices=list(range(1, 5)),
226
  value=Config.NUM_IMAGES,
227
  filterable=False,
228
  label="Images",
229
  )
230
+ scale = gr.Dropdown(
231
+ choices=[(f"{s}x", s) for s in Config.SCALES],
 
232
  filterable=False,
233
+ value=Config.SCALE,
234
+ label="Scale",
 
 
 
 
 
 
235
  )
236
+ seed = gr.Number(
237
+ value=Config.SEED,
238
+ label="Seed",
239
+ minimum=-1,
240
+ maximum=(2**64) - 1,
 
241
  )
242
 
243
  with gr.Row():
 
433
  truncate_prompts,
434
  increment_seed,
435
  deepcache_interval,
 
436
  scale,
437
  ],
438
  )
cli.py CHANGED
@@ -29,7 +29,6 @@ def main():
29
  parser.add_argument("--scheduler", type=str, metavar="STR", default=Config.SCHEDULER)
30
  parser.add_argument("--guidance", type=float, metavar="FLOAT", default=Config.GUIDANCE_SCALE)
31
  parser.add_argument("--steps", type=int, metavar="INT", default=Config.INFERENCE_STEPS)
32
- parser.add_argument("--tome", type=float, metavar="FLOAT", default=Config.TOME_RATIO)
33
  parser.add_argument("--strength", type=float, metavar="FLOAT", default=Config.DENOISING_STRENGTH)
34
  parser.add_argument("--image", type=str, metavar="STR")
35
  parser.add_argument("--taesd", action="store_true")
@@ -63,7 +62,6 @@ def main():
63
  args.truncate,
64
  args.no_increment,
65
  args.deepcache,
66
- args.tome,
67
  args.scale,
68
  )
69
  save_images(images, args.filename)
 
29
  parser.add_argument("--scheduler", type=str, metavar="STR", default=Config.SCHEDULER)
30
  parser.add_argument("--guidance", type=float, metavar="FLOAT", default=Config.GUIDANCE_SCALE)
31
  parser.add_argument("--steps", type=int, metavar="INT", default=Config.INFERENCE_STEPS)
 
32
  parser.add_argument("--strength", type=float, metavar="FLOAT", default=Config.DENOISING_STRENGTH)
33
  parser.add_argument("--image", type=str, metavar="STR")
34
  parser.add_argument("--taesd", action="store_true")
 
62
  args.truncate,
63
  args.no_increment,
64
  args.deepcache,
 
65
  args.scale,
66
  )
67
  save_images(images, args.filename)
lib/config.py CHANGED
@@ -43,7 +43,6 @@ Config = SimpleNamespace(
43
  INFERENCE_STEPS=30,
44
  DENOISING_STRENGTH=0.6,
45
  DEEPCACHE_INTERVAL=2,
46
- TOME_RATIO=0.0,
47
  SCALE=1,
48
  SCALES=[1, 2, 4],
49
  )
 
43
  INFERENCE_STEPS=30,
44
  DENOISING_STRENGTH=0.6,
45
  DEEPCACHE_INTERVAL=2,
 
46
  SCALE=1,
47
  SCALES=[1, 2, 4],
48
  )
lib/inference.py CHANGED
@@ -2,14 +2,12 @@ import json
2
  import os
3
  import re
4
  import time
5
- from contextlib import contextmanager
6
  from datetime import datetime
7
  from itertools import product
8
  from typing import Callable
9
 
10
  import numpy as np
11
  import spaces
12
- import tomesd
13
  import torch
14
  from compel import Compel, DiffusersTextualInversionManager, ReturnedEmbeddingsType
15
  from compel.prompt_parser import PromptParser
@@ -25,17 +23,6 @@ with open("./data/styles.json") as f:
25
  styles = json.load(f)
26
 
27
 
28
- # applies tome to the pipeline
29
- @contextmanager
30
- def token_merging(pipe, tome_ratio=0):
31
- try:
32
- if tome_ratio > 0:
33
- tomesd.apply_patch(pipe, max_downsample=1, sx=2, sy=2, ratio=tome_ratio)
34
- yield
35
- finally:
36
- tomesd.remove_patch(pipe) # idempotent
37
-
38
-
39
  # parse prompts with arrays
40
  def parse_prompt(prompt: str) -> list[str]:
41
  arrays = re.findall(r"\[\[(.*?)\]\]", prompt)
@@ -106,7 +93,6 @@ def generate(
106
  truncate_prompts=False,
107
  increment_seed=True,
108
  deepcache=1,
109
- tome_ratio=0,
110
  scale=1,
111
  Info: Callable[[str], None] = None,
112
  Error=Exception,
@@ -216,15 +202,14 @@ def generate(
216
  kwargs["strength"] = denoising_strength
217
  kwargs["image"] = prepare_image(image_prompt, (width, height))
218
 
219
- with token_merging(pipe, tome_ratio=tome_ratio):
220
- try:
221
- image = pipe(**kwargs).images[0]
222
- if scale > 1:
223
- image = upscaler.predict(image)
224
- images.append((image, str(current_seed)))
225
- finally:
226
- pipe.unload_textual_inversion()
227
- torch.cuda.empty_cache()
228
 
229
  if increment_seed:
230
  current_seed += 1
 
2
  import os
3
  import re
4
  import time
 
5
  from datetime import datetime
6
  from itertools import product
7
  from typing import Callable
8
 
9
  import numpy as np
10
  import spaces
 
11
  import torch
12
  from compel import Compel, DiffusersTextualInversionManager, ReturnedEmbeddingsType
13
  from compel.prompt_parser import PromptParser
 
23
  styles = json.load(f)
24
 
25
 
 
 
 
 
 
 
 
 
 
 
 
26
  # parse prompts with arrays
27
  def parse_prompt(prompt: str) -> list[str]:
28
  arrays = re.findall(r"\[\[(.*?)\]\]", prompt)
 
93
  truncate_prompts=False,
94
  increment_seed=True,
95
  deepcache=1,
 
96
  scale=1,
97
  Info: Callable[[str], None] = None,
98
  Error=Exception,
 
202
  kwargs["strength"] = denoising_strength
203
  kwargs["image"] = prepare_image(image_prompt, (width, height))
204
 
205
+ try:
206
+ image = pipe(**kwargs).images[0]
207
+ if scale > 1:
208
+ image = upscaler.predict(image)
209
+ images.append((image, str(current_seed)))
210
+ finally:
211
+ pipe.unload_textual_inversion()
212
+ torch.cuda.empty_cache()
 
213
 
214
  if increment_seed:
215
  current_seed += 1
requirements.txt CHANGED
@@ -8,7 +8,6 @@ gradio==4.41.0
8
  ruff==0.5.7
9
  scipy # for LMS scheduler
10
  spaces
11
- tomesd==0.1.3
12
  torch==2.3.1
13
  torchvision==0.18.1
14
  transformers==4.43.4
 
8
  ruff==0.5.7
9
  scipy # for LMS scheduler
10
  spaces
 
11
  torch==2.3.1
12
  torchvision==0.18.1
13
  transformers==4.43.4
usage.md CHANGED
@@ -71,10 +71,6 @@ Denoising strength is essentially how much the generation will differ from the i
71
  * `3`: balanced
72
  * `4`: more speed
73
 
74
- #### ToMe
75
-
76
- [Token merging](https://github.com/dbolya/tomesd) (Bolya & Hoffman 2023) reduces the number of tokens processed by the model. Set `Ratio` to the desired reduction factor. Only necessary to speed up generation on older GPUs.
77
-
78
  #### FreeU
79
 
80
  [FreeU](https://github.com/ChenyangSi/FreeU) (Si et al. 2023) re-weights the contributions sourced from the U-Net’s skip connections and backbone feature maps to potentially improve image quality.
 
71
  * `3`: balanced
72
  * `4`: more speed
73
 
 
 
 
 
74
  #### FreeU
75
 
76
  [FreeU](https://github.com/ChenyangSi/FreeU) (Si et al. 2023) re-weights the contributions sourced from the U-Net’s skip connections and backbone feature maps to potentially improve image quality.