patrickvonplaten
commited on
Commit
•
811d1c6
1
Parent(s):
7956ca3
fix more tests
Browse files- 1 +26 -0
- all_branches.txt +0 -40
- check_for_branches.py +1 -1
- collect_env.py +609 -0
- init_image.png +0 -0
- mask_image.png +0 -0
- model_ids.txt +0 -0
- new_scheduler.py +22 -0
- prompt_weight.py +35 -0
- run_bug_conv.py +63 -0
- run_local_fuse_xl.py +38 -0
- run_local_xl.py +4 -5
- run_lora.py +43 -0
- run_wuerst.py +37 -0
- run_xl_lora.py +4 -1
- sd_xl_inpaint.py +76 -0
- train_unet.py +24 -0
1
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
from diffusers import UNet2DConditionModel
|
3 |
+
import torch
|
4 |
+
|
5 |
+
unet = UNet2DConditionModel.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", subfolder="unet", variant="fp16", torch_dtype=torch.float16)
|
6 |
+
unet.train()
|
7 |
+
unet.enable_gradient_checkpointing()
|
8 |
+
unet = unet.to("cuda:1")
|
9 |
+
|
10 |
+
batch_size = 8
|
11 |
+
|
12 |
+
sample = torch.randn((1, 4, 128, 128)).half().to(unet.device).repeat(batch_size, 1, 1, 1)
|
13 |
+
time_ids = (torch.arange(6) / 6)[None, :].half().to(unet.device).repeat(batch_size, 1)
|
14 |
+
encoder_hidden_states = torch.randn((1, 77, 2048)).half().to(unet.device).repeat(batch_size, 1, 1)
|
15 |
+
text_embeds = torch.randn((1, 1280)).half().to(unet.device).repeat(batch_size, 1)
|
16 |
+
|
17 |
+
out = unet(sample, 1.0, added_cond_kwargs={"time_ids": time_ids, "text_embeds": text_embeds}, encoder_hidden_states=encoder_hidden_states).sample
|
18 |
+
|
19 |
+
loss = ((out - sample) ** 2).mean()
|
20 |
+
loss.backward()
|
21 |
+
|
22 |
+
print(torch.cuda.max_memory_allocated(device=unet.device))
|
23 |
+
|
24 |
+
|
25 |
+
# no gradient checkpointing: 12,276,695,552
|
26 |
+
# curr gradient checkpointing: 10,862,276,096
|
all_branches.txt
CHANGED
@@ -1,42 +1,2 @@
|
|
1 |
-
CompVis/stable-diffusion-v1-3
|
2 |
-
CompVis/stable-diffusion-v1-1
|
3 |
-
CompVis/stable-diffusion-v1-2
|
4 |
CompVis/stable-diffusion-v1-4
|
5 |
-
hakurei/waifu-diffusion
|
6 |
-
rinna/japanese-stable-diffusion
|
7 |
-
CompVis/stable-diffusion-v1-5
|
8 |
-
runwayml/stable-diffusion-inpainting
|
9 |
-
fusing/sd-inpaint-temp
|
10 |
runwayml/stable-diffusion-v1-5
|
11 |
-
ckpt/sd15
|
12 |
-
aarondotwork/sd-pokemon-diffusers
|
13 |
-
technillogue/waifu-diffusion
|
14 |
-
DGSpitzer/Cyberpunk-Anime-Diffusion
|
15 |
-
microsoft/vq-diffusion-ithq
|
16 |
-
fusing/rdm
|
17 |
-
CompVis/ldm-super-resolution-4x-openimages
|
18 |
-
BAAI/AltDiffusion
|
19 |
-
fusing/test
|
20 |
-
stabilityai/stable-diffusion-2
|
21 |
-
stabilityai/stable-diffusion-2-base
|
22 |
-
stabilityai/stable-diffusion-2-depth
|
23 |
-
stabilityai/stable-diffusion-2-inpainting
|
24 |
-
stabilityai/stable-diffusion-x4-upscaler
|
25 |
-
jplumail/matthieu-v1-pipe
|
26 |
-
stabilityai/stable-diffusion-2-1
|
27 |
-
stabilityai/stable-diffusion-2-1-base
|
28 |
-
jplumail/matthieu-v2-pipe
|
29 |
-
timbrooks/instruct-pix2pix
|
30 |
-
ruiruin/counmargemodel
|
31 |
-
Nacholmo/AbyssOrangeMix2-hard-vae-swapped
|
32 |
-
Nacholmo/Counterfeit-V2.5-vae-swapped
|
33 |
-
Nacholmo/VOXO-v0-vtuber-diffusers
|
34 |
-
p1atdev/pvc-v3
|
35 |
-
Nacholmo/meinamixv7-diffusers
|
36 |
-
gligen/diffusers-generation-text-box
|
37 |
-
gligen/diffusers-inpainting-text-box
|
38 |
-
zhg/deliberate
|
39 |
-
philz1337/realism
|
40 |
-
viktfb/patterngenai
|
41 |
-
viktfb/patterngen-v1
|
42 |
-
viktfb/style2.0
|
|
|
|
|
|
|
|
|
1 |
CompVis/stable-diffusion-v1-4
|
|
|
|
|
|
|
|
|
|
|
2 |
runwayml/stable-diffusion-v1-5
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
check_for_branches.py
CHANGED
@@ -26,7 +26,7 @@ if __name__ == "__main__":
|
|
26 |
api = HfApi()
|
27 |
branches = main(api, model_id)
|
28 |
|
29 |
-
if "
|
30 |
print(model_id)
|
31 |
#
|
32 |
# if len(branches) > 0:
|
|
|
26 |
api = HfApi()
|
27 |
branches = main(api, model_id)
|
28 |
|
29 |
+
if "non-ema" in branches:
|
30 |
print(model_id)
|
31 |
#
|
32 |
# if len(branches) > 0:
|
collect_env.py
ADDED
@@ -0,0 +1,609 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
# Unlike the rest of the PyTorch this file must be python2 compliant.
|
3 |
+
# This script outputs relevant system environment info
|
4 |
+
# Run it with `python collect_env.py`.
|
5 |
+
import datetime
|
6 |
+
import locale
|
7 |
+
import re
|
8 |
+
import subprocess
|
9 |
+
import sys
|
10 |
+
import os
|
11 |
+
from collections import namedtuple
|
12 |
+
|
13 |
+
|
14 |
+
try:
|
15 |
+
import torch
|
16 |
+
TORCH_AVAILABLE = True
|
17 |
+
except (ImportError, NameError, AttributeError, OSError):
|
18 |
+
TORCH_AVAILABLE = False
|
19 |
+
|
20 |
+
# System Environment Information
|
21 |
+
SystemEnv = namedtuple('SystemEnv', [
|
22 |
+
'torch_version',
|
23 |
+
'is_debug_build',
|
24 |
+
'cuda_compiled_version',
|
25 |
+
'gcc_version',
|
26 |
+
'clang_version',
|
27 |
+
'cmake_version',
|
28 |
+
'os',
|
29 |
+
'libc_version',
|
30 |
+
'python_version',
|
31 |
+
'python_platform',
|
32 |
+
'is_cuda_available',
|
33 |
+
'cuda_runtime_version',
|
34 |
+
'cuda_module_loading',
|
35 |
+
'nvidia_driver_version',
|
36 |
+
'nvidia_gpu_models',
|
37 |
+
'cudnn_version',
|
38 |
+
'pip_version', # 'pip' or 'pip3'
|
39 |
+
'pip_packages',
|
40 |
+
'conda_packages',
|
41 |
+
'hip_compiled_version',
|
42 |
+
'hip_runtime_version',
|
43 |
+
'miopen_runtime_version',
|
44 |
+
'caching_allocator_config',
|
45 |
+
'is_xnnpack_available',
|
46 |
+
'cpu_info',
|
47 |
+
])
|
48 |
+
|
49 |
+
|
50 |
+
def run(command):
|
51 |
+
"""Returns (return-code, stdout, stderr)"""
|
52 |
+
shell = True if type(command) is str else False
|
53 |
+
p = subprocess.Popen(command, stdout=subprocess.PIPE,
|
54 |
+
stderr=subprocess.PIPE, shell=shell)
|
55 |
+
raw_output, raw_err = p.communicate()
|
56 |
+
rc = p.returncode
|
57 |
+
if get_platform() == 'win32':
|
58 |
+
enc = 'oem'
|
59 |
+
else:
|
60 |
+
enc = locale.getpreferredencoding()
|
61 |
+
output = raw_output.decode(enc)
|
62 |
+
err = raw_err.decode(enc)
|
63 |
+
return rc, output.strip(), err.strip()
|
64 |
+
|
65 |
+
|
66 |
+
def run_and_read_all(run_lambda, command):
|
67 |
+
"""Runs command using run_lambda; reads and returns entire output if rc is 0"""
|
68 |
+
rc, out, _ = run_lambda(command)
|
69 |
+
if rc != 0:
|
70 |
+
return None
|
71 |
+
return out
|
72 |
+
|
73 |
+
|
74 |
+
def run_and_parse_first_match(run_lambda, command, regex):
|
75 |
+
"""Runs command using run_lambda, returns the first regex match if it exists"""
|
76 |
+
rc, out, _ = run_lambda(command)
|
77 |
+
if rc != 0:
|
78 |
+
return None
|
79 |
+
match = re.search(regex, out)
|
80 |
+
if match is None:
|
81 |
+
return None
|
82 |
+
return match.group(1)
|
83 |
+
|
84 |
+
def run_and_return_first_line(run_lambda, command):
|
85 |
+
"""Runs command using run_lambda and returns first line if output is not empty"""
|
86 |
+
rc, out, _ = run_lambda(command)
|
87 |
+
if rc != 0:
|
88 |
+
return None
|
89 |
+
return out.split('\n')[0]
|
90 |
+
|
91 |
+
|
92 |
+
def get_conda_packages(run_lambda):
|
93 |
+
conda = os.environ.get('CONDA_EXE', 'conda')
|
94 |
+
out = run_and_read_all(run_lambda, "{} list".format(conda))
|
95 |
+
if out is None:
|
96 |
+
return out
|
97 |
+
|
98 |
+
return "\n".join(
|
99 |
+
line
|
100 |
+
for line in out.splitlines()
|
101 |
+
if not line.startswith("#")
|
102 |
+
and any(
|
103 |
+
name in line
|
104 |
+
for name in {
|
105 |
+
"torch",
|
106 |
+
"numpy",
|
107 |
+
"cudatoolkit",
|
108 |
+
"soumith",
|
109 |
+
"mkl",
|
110 |
+
"magma",
|
111 |
+
"triton",
|
112 |
+
}
|
113 |
+
)
|
114 |
+
)
|
115 |
+
|
116 |
+
def get_gcc_version(run_lambda):
|
117 |
+
return run_and_parse_first_match(run_lambda, 'gcc --version', r'gcc (.*)')
|
118 |
+
|
119 |
+
def get_clang_version(run_lambda):
|
120 |
+
return run_and_parse_first_match(run_lambda, 'clang --version', r'clang version (.*)')
|
121 |
+
|
122 |
+
|
123 |
+
def get_cmake_version(run_lambda):
|
124 |
+
return run_and_parse_first_match(run_lambda, 'cmake --version', r'cmake (.*)')
|
125 |
+
|
126 |
+
|
127 |
+
def get_nvidia_driver_version(run_lambda):
|
128 |
+
if get_platform() == 'darwin':
|
129 |
+
cmd = 'kextstat | grep -i cuda'
|
130 |
+
return run_and_parse_first_match(run_lambda, cmd,
|
131 |
+
r'com[.]nvidia[.]CUDA [(](.*?)[)]')
|
132 |
+
smi = get_nvidia_smi()
|
133 |
+
return run_and_parse_first_match(run_lambda, smi, r'Driver Version: (.*?) ')
|
134 |
+
|
135 |
+
|
136 |
+
def get_gpu_info(run_lambda):
|
137 |
+
if get_platform() == 'darwin' or (TORCH_AVAILABLE and hasattr(torch.version, 'hip') and torch.version.hip is not None):
|
138 |
+
if TORCH_AVAILABLE and torch.cuda.is_available():
|
139 |
+
return torch.cuda.get_device_name(None)
|
140 |
+
return None
|
141 |
+
smi = get_nvidia_smi()
|
142 |
+
uuid_regex = re.compile(r' \(UUID: .+?\)')
|
143 |
+
rc, out, _ = run_lambda(smi + ' -L')
|
144 |
+
if rc != 0:
|
145 |
+
return None
|
146 |
+
# Anonymize GPUs by removing their UUID
|
147 |
+
return re.sub(uuid_regex, '', out)
|
148 |
+
|
149 |
+
|
150 |
+
def get_running_cuda_version(run_lambda):
|
151 |
+
return run_and_parse_first_match(run_lambda, 'nvcc --version', r'release .+ V(.*)')
|
152 |
+
|
153 |
+
|
154 |
+
def get_cudnn_version(run_lambda):
|
155 |
+
"""This will return a list of libcudnn.so; it's hard to tell which one is being used"""
|
156 |
+
if get_platform() == 'win32':
|
157 |
+
system_root = os.environ.get('SYSTEMROOT', 'C:\\Windows')
|
158 |
+
cuda_path = os.environ.get('CUDA_PATH', "%CUDA_PATH%")
|
159 |
+
where_cmd = os.path.join(system_root, 'System32', 'where')
|
160 |
+
cudnn_cmd = '{} /R "{}\\bin" cudnn*.dll'.format(where_cmd, cuda_path)
|
161 |
+
elif get_platform() == 'darwin':
|
162 |
+
# CUDA libraries and drivers can be found in /usr/local/cuda/. See
|
163 |
+
# https://docs.nvidia.com/cuda/cuda-installation-guide-mac-os-x/index.html#install
|
164 |
+
# https://docs.nvidia.com/deeplearning/sdk/cudnn-install/index.html#installmac
|
165 |
+
# Use CUDNN_LIBRARY when cudnn library is installed elsewhere.
|
166 |
+
cudnn_cmd = 'ls /usr/local/cuda/lib/libcudnn*'
|
167 |
+
else:
|
168 |
+
cudnn_cmd = 'ldconfig -p | grep libcudnn | rev | cut -d" " -f1 | rev'
|
169 |
+
rc, out, _ = run_lambda(cudnn_cmd)
|
170 |
+
# find will return 1 if there are permission errors or if not found
|
171 |
+
if len(out) == 0 or (rc != 1 and rc != 0):
|
172 |
+
l = os.environ.get('CUDNN_LIBRARY')
|
173 |
+
if l is not None and os.path.isfile(l):
|
174 |
+
return os.path.realpath(l)
|
175 |
+
return None
|
176 |
+
files_set = set()
|
177 |
+
for fn in out.split('\n'):
|
178 |
+
fn = os.path.realpath(fn) # eliminate symbolic links
|
179 |
+
if os.path.isfile(fn):
|
180 |
+
files_set.add(fn)
|
181 |
+
if not files_set:
|
182 |
+
return None
|
183 |
+
# Alphabetize the result because the order is non-deterministic otherwise
|
184 |
+
files = sorted(files_set)
|
185 |
+
if len(files) == 1:
|
186 |
+
return files[0]
|
187 |
+
result = '\n'.join(files)
|
188 |
+
return 'Probably one of the following:\n{}'.format(result)
|
189 |
+
|
190 |
+
|
191 |
+
def get_nvidia_smi():
|
192 |
+
# Note: nvidia-smi is currently available only on Windows and Linux
|
193 |
+
smi = 'nvidia-smi'
|
194 |
+
if get_platform() == 'win32':
|
195 |
+
system_root = os.environ.get('SYSTEMROOT', 'C:\\Windows')
|
196 |
+
program_files_root = os.environ.get('PROGRAMFILES', 'C:\\Program Files')
|
197 |
+
legacy_path = os.path.join(program_files_root, 'NVIDIA Corporation', 'NVSMI', smi)
|
198 |
+
new_path = os.path.join(system_root, 'System32', smi)
|
199 |
+
smis = [new_path, legacy_path]
|
200 |
+
for candidate_smi in smis:
|
201 |
+
if os.path.exists(candidate_smi):
|
202 |
+
smi = '"{}"'.format(candidate_smi)
|
203 |
+
break
|
204 |
+
return smi
|
205 |
+
|
206 |
+
|
207 |
+
# example outputs of CPU infos
|
208 |
+
# * linux
|
209 |
+
# Architecture: x86_64
|
210 |
+
# CPU op-mode(s): 32-bit, 64-bit
|
211 |
+
# Address sizes: 46 bits physical, 48 bits virtual
|
212 |
+
# Byte Order: Little Endian
|
213 |
+
# CPU(s): 128
|
214 |
+
# On-line CPU(s) list: 0-127
|
215 |
+
# Vendor ID: GenuineIntel
|
216 |
+
# Model name: Intel(R) Xeon(R) Platinum 8375C CPU @ 2.90GHz
|
217 |
+
# CPU family: 6
|
218 |
+
# Model: 106
|
219 |
+
# Thread(s) per core: 2
|
220 |
+
# Core(s) per socket: 32
|
221 |
+
# Socket(s): 2
|
222 |
+
# Stepping: 6
|
223 |
+
# BogoMIPS: 5799.78
|
224 |
+
# Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr
|
225 |
+
# sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon rep_good nopl
|
226 |
+
# xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq monitor ssse3 fma cx16
|
227 |
+
# pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand
|
228 |
+
# hypervisor lahf_lm abm 3dnowprefetch invpcid_single ssbd ibrs ibpb stibp ibrs_enhanced
|
229 |
+
# fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid avx512f avx512dq rdseed adx smap
|
230 |
+
# avx512ifma clflushopt clwb avx512cd sha_ni avx512bw avx512vl xsaveopt xsavec xgetbv1
|
231 |
+
# xsaves wbnoinvd ida arat avx512vbmi pku ospke avx512_vbmi2 gfni vaes vpclmulqdq
|
232 |
+
# avx512_vnni avx512_bitalg tme avx512_vpopcntdq rdpid md_clear flush_l1d arch_capabilities
|
233 |
+
# Virtualization features:
|
234 |
+
# Hypervisor vendor: KVM
|
235 |
+
# Virtualization type: full
|
236 |
+
# Caches (sum of all):
|
237 |
+
# L1d: 3 MiB (64 instances)
|
238 |
+
# L1i: 2 MiB (64 instances)
|
239 |
+
# L2: 80 MiB (64 instances)
|
240 |
+
# L3: 108 MiB (2 instances)
|
241 |
+
# NUMA:
|
242 |
+
# NUMA node(s): 2
|
243 |
+
# NUMA node0 CPU(s): 0-31,64-95
|
244 |
+
# NUMA node1 CPU(s): 32-63,96-127
|
245 |
+
# Vulnerabilities:
|
246 |
+
# Itlb multihit: Not affected
|
247 |
+
# L1tf: Not affected
|
248 |
+
# Mds: Not affected
|
249 |
+
# Meltdown: Not affected
|
250 |
+
# Mmio stale data: Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown
|
251 |
+
# Retbleed: Not affected
|
252 |
+
# Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl and seccomp
|
253 |
+
# Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization
|
254 |
+
# Spectre v2: Mitigation; Enhanced IBRS, IBPB conditional, RSB filling, PBRSB-eIBRS SW sequence
|
255 |
+
# Srbds: Not affected
|
256 |
+
# Tsx async abort: Not affected
|
257 |
+
# * win32
|
258 |
+
# Architecture=9
|
259 |
+
# CurrentClockSpeed=2900
|
260 |
+
# DeviceID=CPU0
|
261 |
+
# Family=179
|
262 |
+
# L2CacheSize=40960
|
263 |
+
# L2CacheSpeed=
|
264 |
+
# Manufacturer=GenuineIntel
|
265 |
+
# MaxClockSpeed=2900
|
266 |
+
# Name=Intel(R) Xeon(R) Platinum 8375C CPU @ 2.90GHz
|
267 |
+
# ProcessorType=3
|
268 |
+
# Revision=27142
|
269 |
+
#
|
270 |
+
# Architecture=9
|
271 |
+
# CurrentClockSpeed=2900
|
272 |
+
# DeviceID=CPU1
|
273 |
+
# Family=179
|
274 |
+
# L2CacheSize=40960
|
275 |
+
# L2CacheSpeed=
|
276 |
+
# Manufacturer=GenuineIntel
|
277 |
+
# MaxClockSpeed=2900
|
278 |
+
# Name=Intel(R) Xeon(R) Platinum 8375C CPU @ 2.90GHz
|
279 |
+
# ProcessorType=3
|
280 |
+
# Revision=27142
|
281 |
+
|
282 |
+
def get_cpu_info(run_lambda):
|
283 |
+
rc, out, err = 0, '', ''
|
284 |
+
if get_platform() == 'linux':
|
285 |
+
rc, out, err = run_lambda('lscpu')
|
286 |
+
elif get_platform() == 'win32':
|
287 |
+
rc, out, err = run_lambda('wmic cpu get Name,Manufacturer,Family,Architecture,ProcessorType,DeviceID,\
|
288 |
+
CurrentClockSpeed,MaxClockSpeed,L2CacheSize,L2CacheSpeed,Revision /VALUE')
|
289 |
+
elif get_platform() == 'darwin':
|
290 |
+
rc, out, err = run_lambda("sysctl -n machdep.cpu.brand_string")
|
291 |
+
cpu_info = 'None'
|
292 |
+
if rc == 0:
|
293 |
+
cpu_info = out
|
294 |
+
else:
|
295 |
+
cpu_info = err
|
296 |
+
return cpu_info
|
297 |
+
|
298 |
+
|
299 |
+
def get_platform():
|
300 |
+
if sys.platform.startswith('linux'):
|
301 |
+
return 'linux'
|
302 |
+
elif sys.platform.startswith('win32'):
|
303 |
+
return 'win32'
|
304 |
+
elif sys.platform.startswith('cygwin'):
|
305 |
+
return 'cygwin'
|
306 |
+
elif sys.platform.startswith('darwin'):
|
307 |
+
return 'darwin'
|
308 |
+
else:
|
309 |
+
return sys.platform
|
310 |
+
|
311 |
+
|
312 |
+
def get_mac_version(run_lambda):
|
313 |
+
return run_and_parse_first_match(run_lambda, 'sw_vers -productVersion', r'(.*)')
|
314 |
+
|
315 |
+
|
316 |
+
def get_windows_version(run_lambda):
|
317 |
+
system_root = os.environ.get('SYSTEMROOT', 'C:\\Windows')
|
318 |
+
wmic_cmd = os.path.join(system_root, 'System32', 'Wbem', 'wmic')
|
319 |
+
findstr_cmd = os.path.join(system_root, 'System32', 'findstr')
|
320 |
+
return run_and_read_all(run_lambda, '{} os get Caption | {} /v Caption'.format(wmic_cmd, findstr_cmd))
|
321 |
+
|
322 |
+
|
323 |
+
def get_lsb_version(run_lambda):
|
324 |
+
return run_and_parse_first_match(run_lambda, 'lsb_release -a', r'Description:\t(.*)')
|
325 |
+
|
326 |
+
|
327 |
+
def check_release_file(run_lambda):
|
328 |
+
return run_and_parse_first_match(run_lambda, 'cat /etc/*-release',
|
329 |
+
r'PRETTY_NAME="(.*)"')
|
330 |
+
|
331 |
+
|
332 |
+
def get_os(run_lambda):
|
333 |
+
from platform import machine
|
334 |
+
platform = get_platform()
|
335 |
+
|
336 |
+
if platform == 'win32' or platform == 'cygwin':
|
337 |
+
return get_windows_version(run_lambda)
|
338 |
+
|
339 |
+
if platform == 'darwin':
|
340 |
+
version = get_mac_version(run_lambda)
|
341 |
+
if version is None:
|
342 |
+
return None
|
343 |
+
return 'macOS {} ({})'.format(version, machine())
|
344 |
+
|
345 |
+
if platform == 'linux':
|
346 |
+
# Ubuntu/Debian based
|
347 |
+
desc = get_lsb_version(run_lambda)
|
348 |
+
if desc is not None:
|
349 |
+
return '{} ({})'.format(desc, machine())
|
350 |
+
|
351 |
+
# Try reading /etc/*-release
|
352 |
+
desc = check_release_file(run_lambda)
|
353 |
+
if desc is not None:
|
354 |
+
return '{} ({})'.format(desc, machine())
|
355 |
+
|
356 |
+
return '{} ({})'.format(platform, machine())
|
357 |
+
|
358 |
+
# Unknown platform
|
359 |
+
return platform
|
360 |
+
|
361 |
+
|
362 |
+
def get_python_platform():
|
363 |
+
import platform
|
364 |
+
return platform.platform()
|
365 |
+
|
366 |
+
|
367 |
+
def get_libc_version():
|
368 |
+
import platform
|
369 |
+
if get_platform() != 'linux':
|
370 |
+
return 'N/A'
|
371 |
+
return '-'.join(platform.libc_ver())
|
372 |
+
|
373 |
+
|
374 |
+
def get_pip_packages(run_lambda):
|
375 |
+
"""Returns `pip list` output. Note: will also find conda-installed pytorch
|
376 |
+
and numpy packages."""
|
377 |
+
# People generally have `pip` as `pip` or `pip3`
|
378 |
+
# But here it is invoked as `python -mpip`
|
379 |
+
def run_with_pip(pip):
|
380 |
+
out = run_and_read_all(run_lambda, pip + ["list", "--format=freeze"])
|
381 |
+
return "\n".join(
|
382 |
+
line
|
383 |
+
for line in out.splitlines()
|
384 |
+
if any(
|
385 |
+
name in line
|
386 |
+
for name in {
|
387 |
+
"torch",
|
388 |
+
"numpy",
|
389 |
+
"mypy",
|
390 |
+
"flake8",
|
391 |
+
"triton",
|
392 |
+
}
|
393 |
+
)
|
394 |
+
)
|
395 |
+
|
396 |
+
pip_version = 'pip3' if sys.version[0] == '3' else 'pip'
|
397 |
+
out = run_with_pip([sys.executable, '-mpip'])
|
398 |
+
|
399 |
+
return pip_version, out
|
400 |
+
|
401 |
+
|
402 |
+
def get_cachingallocator_config():
|
403 |
+
ca_config = os.environ.get('PYTORCH_CUDA_ALLOC_CONF', '')
|
404 |
+
return ca_config
|
405 |
+
|
406 |
+
|
407 |
+
def get_cuda_module_loading_config():
|
408 |
+
if TORCH_AVAILABLE and torch.cuda.is_available():
|
409 |
+
torch.cuda.init()
|
410 |
+
config = os.environ.get('CUDA_MODULE_LOADING', '')
|
411 |
+
return config
|
412 |
+
else:
|
413 |
+
return "N/A"
|
414 |
+
|
415 |
+
|
416 |
+
def is_xnnpack_available():
|
417 |
+
if TORCH_AVAILABLE:
|
418 |
+
import torch.backends.xnnpack
|
419 |
+
return str(torch.backends.xnnpack.enabled) # type: ignore[attr-defined]
|
420 |
+
else:
|
421 |
+
return "N/A"
|
422 |
+
|
423 |
+
def get_env_info():
|
424 |
+
run_lambda = run
|
425 |
+
pip_version, pip_list_output = get_pip_packages(run_lambda)
|
426 |
+
|
427 |
+
if TORCH_AVAILABLE:
|
428 |
+
version_str = torch.__version__
|
429 |
+
debug_mode_str = str(torch.version.debug)
|
430 |
+
cuda_available_str = str(torch.cuda.is_available())
|
431 |
+
cuda_version_str = torch.version.cuda
|
432 |
+
if not hasattr(torch.version, 'hip') or torch.version.hip is None: # cuda version
|
433 |
+
hip_compiled_version = hip_runtime_version = miopen_runtime_version = 'N/A'
|
434 |
+
else: # HIP version
|
435 |
+
def get_version_or_na(cfg, prefix):
|
436 |
+
_lst = [s.rsplit(None, 1)[-1] for s in cfg if prefix in s]
|
437 |
+
return _lst[0] if _lst else 'N/A'
|
438 |
+
|
439 |
+
cfg = torch._C._show_config().split('\n')
|
440 |
+
hip_runtime_version = get_version_or_na(cfg, 'HIP Runtime')
|
441 |
+
miopen_runtime_version = get_version_or_na(cfg, 'MIOpen')
|
442 |
+
cuda_version_str = 'N/A'
|
443 |
+
hip_compiled_version = torch.version.hip
|
444 |
+
else:
|
445 |
+
version_str = debug_mode_str = cuda_available_str = cuda_version_str = 'N/A'
|
446 |
+
hip_compiled_version = hip_runtime_version = miopen_runtime_version = 'N/A'
|
447 |
+
|
448 |
+
sys_version = sys.version.replace("\n", " ")
|
449 |
+
|
450 |
+
return SystemEnv(
|
451 |
+
torch_version=version_str,
|
452 |
+
is_debug_build=debug_mode_str,
|
453 |
+
python_version='{} ({}-bit runtime)'.format(sys_version, sys.maxsize.bit_length() + 1),
|
454 |
+
python_platform=get_python_platform(),
|
455 |
+
is_cuda_available=cuda_available_str,
|
456 |
+
cuda_compiled_version=cuda_version_str,
|
457 |
+
cuda_runtime_version=get_running_cuda_version(run_lambda),
|
458 |
+
cuda_module_loading=get_cuda_module_loading_config(),
|
459 |
+
nvidia_gpu_models=get_gpu_info(run_lambda),
|
460 |
+
nvidia_driver_version=get_nvidia_driver_version(run_lambda),
|
461 |
+
cudnn_version=get_cudnn_version(run_lambda),
|
462 |
+
hip_compiled_version=hip_compiled_version,
|
463 |
+
hip_runtime_version=hip_runtime_version,
|
464 |
+
miopen_runtime_version=miopen_runtime_version,
|
465 |
+
pip_version=pip_version,
|
466 |
+
pip_packages=pip_list_output,
|
467 |
+
conda_packages=get_conda_packages(run_lambda),
|
468 |
+
os=get_os(run_lambda),
|
469 |
+
libc_version=get_libc_version(),
|
470 |
+
gcc_version=get_gcc_version(run_lambda),
|
471 |
+
clang_version=get_clang_version(run_lambda),
|
472 |
+
cmake_version=get_cmake_version(run_lambda),
|
473 |
+
caching_allocator_config=get_cachingallocator_config(),
|
474 |
+
is_xnnpack_available=is_xnnpack_available(),
|
475 |
+
cpu_info=get_cpu_info(run_lambda),
|
476 |
+
)
|
477 |
+
|
478 |
+
env_info_fmt = """
|
479 |
+
PyTorch version: {torch_version}
|
480 |
+
Is debug build: {is_debug_build}
|
481 |
+
CUDA used to build PyTorch: {cuda_compiled_version}
|
482 |
+
ROCM used to build PyTorch: {hip_compiled_version}
|
483 |
+
|
484 |
+
OS: {os}
|
485 |
+
GCC version: {gcc_version}
|
486 |
+
Clang version: {clang_version}
|
487 |
+
CMake version: {cmake_version}
|
488 |
+
Libc version: {libc_version}
|
489 |
+
|
490 |
+
Python version: {python_version}
|
491 |
+
Python platform: {python_platform}
|
492 |
+
Is CUDA available: {is_cuda_available}
|
493 |
+
CUDA runtime version: {cuda_runtime_version}
|
494 |
+
CUDA_MODULE_LOADING set to: {cuda_module_loading}
|
495 |
+
GPU models and configuration: {nvidia_gpu_models}
|
496 |
+
Nvidia driver version: {nvidia_driver_version}
|
497 |
+
cuDNN version: {cudnn_version}
|
498 |
+
HIP runtime version: {hip_runtime_version}
|
499 |
+
MIOpen runtime version: {miopen_runtime_version}
|
500 |
+
Is XNNPACK available: {is_xnnpack_available}
|
501 |
+
|
502 |
+
CPU:
|
503 |
+
{cpu_info}
|
504 |
+
|
505 |
+
Versions of relevant libraries:
|
506 |
+
{pip_packages}
|
507 |
+
{conda_packages}
|
508 |
+
""".strip()
|
509 |
+
|
510 |
+
|
511 |
+
def pretty_str(envinfo):
|
512 |
+
def replace_nones(dct, replacement='Could not collect'):
|
513 |
+
for key in dct.keys():
|
514 |
+
if dct[key] is not None:
|
515 |
+
continue
|
516 |
+
dct[key] = replacement
|
517 |
+
return dct
|
518 |
+
|
519 |
+
def replace_bools(dct, true='Yes', false='No'):
|
520 |
+
for key in dct.keys():
|
521 |
+
if dct[key] is True:
|
522 |
+
dct[key] = true
|
523 |
+
elif dct[key] is False:
|
524 |
+
dct[key] = false
|
525 |
+
return dct
|
526 |
+
|
527 |
+
def prepend(text, tag='[prepend]'):
|
528 |
+
lines = text.split('\n')
|
529 |
+
updated_lines = [tag + line for line in lines]
|
530 |
+
return '\n'.join(updated_lines)
|
531 |
+
|
532 |
+
def replace_if_empty(text, replacement='No relevant packages'):
|
533 |
+
if text is not None and len(text) == 0:
|
534 |
+
return replacement
|
535 |
+
return text
|
536 |
+
|
537 |
+
def maybe_start_on_next_line(string):
|
538 |
+
# If `string` is multiline, prepend a \n to it.
|
539 |
+
if string is not None and len(string.split('\n')) > 1:
|
540 |
+
return '\n{}\n'.format(string)
|
541 |
+
return string
|
542 |
+
|
543 |
+
mutable_dict = envinfo._asdict()
|
544 |
+
|
545 |
+
# If nvidia_gpu_models is multiline, start on the next line
|
546 |
+
mutable_dict['nvidia_gpu_models'] = \
|
547 |
+
maybe_start_on_next_line(envinfo.nvidia_gpu_models)
|
548 |
+
|
549 |
+
# If the machine doesn't have CUDA, report some fields as 'No CUDA'
|
550 |
+
dynamic_cuda_fields = [
|
551 |
+
'cuda_runtime_version',
|
552 |
+
'nvidia_gpu_models',
|
553 |
+
'nvidia_driver_version',
|
554 |
+
]
|
555 |
+
all_cuda_fields = dynamic_cuda_fields + ['cudnn_version']
|
556 |
+
all_dynamic_cuda_fields_missing = all(
|
557 |
+
mutable_dict[field] is None for field in dynamic_cuda_fields)
|
558 |
+
if TORCH_AVAILABLE and not torch.cuda.is_available() and all_dynamic_cuda_fields_missing:
|
559 |
+
for field in all_cuda_fields:
|
560 |
+
mutable_dict[field] = 'No CUDA'
|
561 |
+
if envinfo.cuda_compiled_version is None:
|
562 |
+
mutable_dict['cuda_compiled_version'] = 'None'
|
563 |
+
|
564 |
+
# Replace True with Yes, False with No
|
565 |
+
mutable_dict = replace_bools(mutable_dict)
|
566 |
+
|
567 |
+
# Replace all None objects with 'Could not collect'
|
568 |
+
mutable_dict = replace_nones(mutable_dict)
|
569 |
+
|
570 |
+
# If either of these are '', replace with 'No relevant packages'
|
571 |
+
mutable_dict['pip_packages'] = replace_if_empty(mutable_dict['pip_packages'])
|
572 |
+
mutable_dict['conda_packages'] = replace_if_empty(mutable_dict['conda_packages'])
|
573 |
+
|
574 |
+
# Tag conda and pip packages with a prefix
|
575 |
+
# If they were previously None, they'll show up as ie '[conda] Could not collect'
|
576 |
+
if mutable_dict['pip_packages']:
|
577 |
+
mutable_dict['pip_packages'] = prepend(mutable_dict['pip_packages'],
|
578 |
+
'[{}] '.format(envinfo.pip_version))
|
579 |
+
if mutable_dict['conda_packages']:
|
580 |
+
mutable_dict['conda_packages'] = prepend(mutable_dict['conda_packages'],
|
581 |
+
'[conda] ')
|
582 |
+
mutable_dict['cpu_info'] = envinfo.cpu_info
|
583 |
+
return env_info_fmt.format(**mutable_dict)
|
584 |
+
|
585 |
+
|
586 |
+
def get_pretty_env_info():
|
587 |
+
return pretty_str(get_env_info())
|
588 |
+
|
589 |
+
|
590 |
+
def main():
|
591 |
+
print("Collecting environment information...")
|
592 |
+
output = get_pretty_env_info()
|
593 |
+
print(output)
|
594 |
+
|
595 |
+
if TORCH_AVAILABLE and hasattr(torch, 'utils') and hasattr(torch.utils, '_crash_handler'):
|
596 |
+
minidump_dir = torch.utils._crash_handler.DEFAULT_MINIDUMP_DIR
|
597 |
+
if sys.platform == "linux" and os.path.exists(minidump_dir):
|
598 |
+
dumps = [os.path.join(minidump_dir, dump) for dump in os.listdir(minidump_dir)]
|
599 |
+
latest = max(dumps, key=os.path.getctime)
|
600 |
+
ctime = os.path.getctime(latest)
|
601 |
+
creation_time = datetime.datetime.fromtimestamp(ctime).strftime('%Y-%m-%d %H:%M:%S')
|
602 |
+
msg = "\n*** Detected a minidump at {} created on {}, ".format(latest, creation_time) + \
|
603 |
+
"if this is related to your bug please include it when you file a report ***"
|
604 |
+
print(msg, file=sys.stderr)
|
605 |
+
|
606 |
+
|
607 |
+
|
608 |
+
if __name__ == '__main__':
|
609 |
+
main()
|
init_image.png
ADDED
mask_image.png
ADDED
model_ids.txt
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
new_scheduler.py
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
from diffusers import DiffusionPipeline, EulerAncestralDiscreteScheduler
|
3 |
+
import torch
|
4 |
+
|
5 |
+
path = "runwayml/stable-diffusion-v1-5"
|
6 |
+
|
7 |
+
run_compile = False # Set True / False
|
8 |
+
use_karras_sigmas = False
|
9 |
+
|
10 |
+
pipe = DiffusionPipeline.from_pretrained(path, torch_dtype=torch.float16)
|
11 |
+
pipe = pipe.to("cuda")
|
12 |
+
pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe.scheduler.config, use_karras_sigmas=use_karras_sigmas)
|
13 |
+
pipe.unet.to(memory_format=torch.channels_last)
|
14 |
+
|
15 |
+
if run_compile:
|
16 |
+
print("Run torch compile")
|
17 |
+
pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
|
18 |
+
|
19 |
+
prompt = "ghibli style, a fantasy landscape with castles"
|
20 |
+
|
21 |
+
for _ in range(3):
|
22 |
+
images = pipe(prompt=prompt).images
|
prompt_weight.py
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
import torch
|
3 |
+
import os
|
4 |
+
from compel import Compel, ReturnedEmbeddingsType
|
5 |
+
from diffusers import DiffusionPipeline
|
6 |
+
from huggingface_hub import HfApi
|
7 |
+
from pathlib import Path
|
8 |
+
|
9 |
+
api = HfApi()
|
10 |
+
|
11 |
+
pipeline = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", variant="fp16", use_safetensors=True, torch_dtype=torch.float16).to("cuda")
|
12 |
+
|
13 |
+
compel = Compel(tokenizer=[pipeline.tokenizer, pipeline.tokenizer_2] , text_encoder=[pipeline.text_encoder, pipeline.text_encoder_2], returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED, requires_pooled=[False, True])
|
14 |
+
|
15 |
+
# upweight "ball"
|
16 |
+
prompt = ["a red cat playing with a (ball)1.5", "a red cat playing with a (ball)0.6"]
|
17 |
+
conditioning, pooled = compel(prompt)
|
18 |
+
|
19 |
+
|
20 |
+
# generate image
|
21 |
+
generator = [torch.Generator().manual_seed(33) for _ in range(len(prompt))]
|
22 |
+
images = pipeline(prompt_embeds=conditioning, pooled_prompt_embeds=pooled, generator=generator, num_inference_steps=30).images
|
23 |
+
|
24 |
+
for i, image in enumerate(images):
|
25 |
+
file_name = f"bb_1_{i}"
|
26 |
+
path = os.path.join(Path.home(), "images", f"{file_name}.png")
|
27 |
+
image.save(path)
|
28 |
+
|
29 |
+
api.upload_file(
|
30 |
+
path_or_fileobj=path,
|
31 |
+
path_in_repo=path.split("/")[-1],
|
32 |
+
repo_id="patrickvonplaten/images",
|
33 |
+
repo_type="dataset",
|
34 |
+
)
|
35 |
+
print(f"https://huggingface.co/datasets/patrickvonplaten/images/blob/main/{file_name}.png")
|
run_bug_conv.py
ADDED
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
import torch
|
3 |
+
import torch.nn as nn
|
4 |
+
import torch.nn.functional as F
|
5 |
+
|
6 |
+
|
7 |
+
class SuperConv(nn.Conv2d):
|
8 |
+
|
9 |
+
def __init__(self, *args, is_lora=False, **kwargs):
|
10 |
+
super().__init__(*args, **kwargs)
|
11 |
+
|
12 |
+
self.is_lora = is_lora
|
13 |
+
|
14 |
+
def forward(self, *args, **kwargs):
|
15 |
+
if self.is_lora:
|
16 |
+
return 3 + super().forward(*args, **kwargs)
|
17 |
+
else:
|
18 |
+
return super().forward(*args, **kwargs)
|
19 |
+
|
20 |
+
# Define a simple Convolutional Neural Network
|
21 |
+
class SimpleCNN(nn.Module):
|
22 |
+
def __init__(self):
|
23 |
+
super(SimpleCNN, self).__init__()
|
24 |
+
self.conv1 = SuperConv(3, 6, 5) # Assuming input images are RGB, so 3 input channels
|
25 |
+
self.pool = nn.MaxPool2d(2, 2)
|
26 |
+
self.conv2 = SuperConv(6, 16, 5)
|
27 |
+
self.fc1 = nn.Linear(16 * 5 * 5, 120)
|
28 |
+
self.fc2 = nn.Linear(120, 84)
|
29 |
+
self.fc3 = nn.Linear(84, 10)
|
30 |
+
|
31 |
+
def forward(self, x):
|
32 |
+
x = self.pool(F.relu(self.conv1(x)))
|
33 |
+
x = self.pool(F.relu(self.conv2(x)))
|
34 |
+
x = x.view(-1, 16 * 5 * 5)
|
35 |
+
x = F.relu(self.fc1(x))
|
36 |
+
x = F.relu(self.fc2(x))
|
37 |
+
x = self.fc3(x)
|
38 |
+
return x
|
39 |
+
|
40 |
+
# Create the network
|
41 |
+
net = SimpleCNN()
|
42 |
+
|
43 |
+
# Initialize weights with dummy values
|
44 |
+
for m in net.modules():
|
45 |
+
if isinstance(m, nn.Conv2d):
|
46 |
+
nn.init.constant_(m.weight, 0.1)
|
47 |
+
nn.init.constant_(m.bias, 0.1)
|
48 |
+
elif isinstance(m, nn.Linear):
|
49 |
+
nn.init.constant_(m.weight, 0.1)
|
50 |
+
nn.init.constant_(m.bias, 0.1)
|
51 |
+
|
52 |
+
# Perform inference
|
53 |
+
input = torch.randn(1, 3, 32, 32).to("cuda")
|
54 |
+
net = net.to("cuda")
|
55 |
+
output = net(input)
|
56 |
+
|
57 |
+
print(output)
|
58 |
+
|
59 |
+
net = torch.compile(net, mode="reduce-overhead", fullgraph=True)
|
60 |
+
|
61 |
+
output = net(input)
|
62 |
+
|
63 |
+
print(output)
|
run_local_fuse_xl.py
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
from huggingface_hub import HfApi
|
3 |
+
import torch
|
4 |
+
from pathlib import Path
|
5 |
+
import os
|
6 |
+
import time
|
7 |
+
|
8 |
+
api = HfApi()
|
9 |
+
start_time = time.time()
|
10 |
+
|
11 |
+
from diffusers import DiffusionPipeline
|
12 |
+
import torch
|
13 |
+
|
14 |
+
pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16)
|
15 |
+
pipe.load_lora_weights("stabilityai/stable-diffusion-xl-base-1.0", weight_name="sd_xl_offset_example-lora_1.0.safetensors")
|
16 |
+
pipe.unet.fuse_lora()
|
17 |
+
|
18 |
+
pipe.to(torch_dtype=torch.float16)
|
19 |
+
pipe.to("cuda")
|
20 |
+
|
21 |
+
torch.manual_seed(0)
|
22 |
+
|
23 |
+
prompt = "beautiful scenery nature glass bottle landscape, , purple galaxy bottle"
|
24 |
+
negative_prompt = "text, watermark"
|
25 |
+
|
26 |
+
image = pipe(prompt, negative_prompt=negative_prompt, num_inference_steps=25).images[0]
|
27 |
+
|
28 |
+
file_name = f"aaa"
|
29 |
+
path = os.path.join(Path.home(), "images", "ediffi_sdxl", f"{file_name}.png")
|
30 |
+
image.save(path)
|
31 |
+
|
32 |
+
api.upload_file(
|
33 |
+
path_or_fileobj=path,
|
34 |
+
path_in_repo=path.split("/")[-1],
|
35 |
+
repo_id="patrickvonplaten/images",
|
36 |
+
repo_type="dataset",
|
37 |
+
)
|
38 |
+
print(f"https://huggingface.co/datasets/patrickvonplaten/images/blob/main/{file_name}.png")
|
run_local_xl.py
CHANGED
@@ -19,15 +19,14 @@ start_time = time.time()
|
|
19 |
# use_refiner = bool(int(sys.argv[1]))
|
20 |
use_refiner = True
|
21 |
use_diffusers = True
|
22 |
-
path = "/
|
23 |
-
refiner_path = "/
|
24 |
-
vae_path = "/
|
25 |
-
vae_path = "/home/patrick/sai/sdxl-vae"
|
26 |
|
27 |
vae = AutoencoderKL.from_pretrained(vae_path, torch_dtype=torch.float16, force_upcast=True)
|
28 |
if use_diffusers:
|
29 |
# pipe = StableDiffusionXLPipeline.from_pretrained(path, vae=vae, torch_dtype=torch.float16, variant="fp16", use_safetensors=True, local_files_only=True)
|
30 |
-
pipe = StableDiffusionXLPipeline.from_pretrained(path, torch_dtype=torch.float16, vae=vae, variant="fp16", use_safetensors=True, local_files_only=True)
|
31 |
print(time.time() - start_time)
|
32 |
pipe.to("cuda")
|
33 |
|
|
|
19 |
# use_refiner = bool(int(sys.argv[1]))
|
20 |
use_refiner = True
|
21 |
use_diffusers = True
|
22 |
+
path = "stabilityai/stable-diffusion-xl-base-1.0"
|
23 |
+
refiner_path = "stabilityai/stable-diffusion-xl-refiner-1.0"
|
24 |
+
vae_path = "stabilityai/sdxl-vae"
|
|
|
25 |
|
26 |
vae = AutoencoderKL.from_pretrained(vae_path, torch_dtype=torch.float16, force_upcast=True)
|
27 |
if use_diffusers:
|
28 |
# pipe = StableDiffusionXLPipeline.from_pretrained(path, vae=vae, torch_dtype=torch.float16, variant="fp16", use_safetensors=True, local_files_only=True)
|
29 |
+
pipe = StableDiffusionXLPipeline.from_pretrained(path, torch_dtype=torch.float16, vae=vae, variant="fp16", use_safetensors=True, local_files_only=True, add_watermarker=False)
|
30 |
print(time.time() - start_time)
|
31 |
pipe.to("cuda")
|
32 |
|
run_lora.py
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
from diffusers import StableDiffusionPipeline, KDPM2DiscreteScheduler, StableDiffusionImg2ImgPipeline, HeunDiscreteScheduler, KDPM2AncestralDiscreteScheduler, DDIMScheduler, DPMSolverMultistepScheduler
|
3 |
+
import time
|
4 |
+
import os
|
5 |
+
from huggingface_hub import HfApi
|
6 |
+
# from compel import Compel
|
7 |
+
import torch
|
8 |
+
import sys
|
9 |
+
from pathlib import Path
|
10 |
+
import requests
|
11 |
+
from PIL import Image
|
12 |
+
from io import BytesIO
|
13 |
+
|
14 |
+
path = "runwayml/stable-diffusion-v1-5"
|
15 |
+
lora_id = "takuma104/lora-test-text-encoder-lora-target"
|
16 |
+
|
17 |
+
api = HfApi()
|
18 |
+
start_time = time.time()
|
19 |
+
pipe = StableDiffusionPipeline.from_pretrained(path, torch_dtype=torch.float16)
|
20 |
+
pipe.load_lora_weights(lora_id)
|
21 |
+
pipe = pipe.to("cuda")
|
22 |
+
|
23 |
+
prompt = "a red sks dog"
|
24 |
+
|
25 |
+
images = pipe(prompt=prompt,
|
26 |
+
num_inference_steps=15,
|
27 |
+
cross_attention_kwargs={"scale": 0.5},
|
28 |
+
generator=torch.manual_seed(0)
|
29 |
+
).images
|
30 |
+
|
31 |
+
|
32 |
+
for i, image in enumerate(images):
|
33 |
+
file_name = f"aa_{i}"
|
34 |
+
path = os.path.join(Path.home(), "images", f"{file_name}.png")
|
35 |
+
image.save(path)
|
36 |
+
|
37 |
+
api.upload_file(
|
38 |
+
path_or_fileobj=path,
|
39 |
+
path_in_repo=path.split("/")[-1],
|
40 |
+
repo_id="patrickvonplaten/images",
|
41 |
+
repo_type="dataset",
|
42 |
+
)
|
43 |
+
print(f"https://huggingface.co/datasets/patrickvonplaten/images/blob/main/{file_name}.png")
|
run_wuerst.py
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
import torch
|
3 |
+
from diffusers import AutoPipelineForText2Image
|
4 |
+
from huggingface_hub import HfApi
|
5 |
+
from pathlib import Path
|
6 |
+
import os
|
7 |
+
|
8 |
+
from PIL import Image
|
9 |
+
import numpy as np
|
10 |
+
|
11 |
+
api = HfApi()
|
12 |
+
|
13 |
+
pipe = AutoPipelineForText2Image.from_pretrained("warp-diffusion/WuerstchenGeneratorPipeline", torch_dtype=torch.float16).to("cuda")
|
14 |
+
|
15 |
+
prompt = [
|
16 |
+
"An old destroyed car standing on a cliff in norway, cinematic photography",
|
17 |
+
"Western movie, closeup cinematic photography",
|
18 |
+
"Pink nike shoe commercial, closeup cinematic photography",
|
19 |
+
"Croatia, closeup cinematic photography",
|
20 |
+
"South Tyrol mountains at sunset, closeup cinematic photography",
|
21 |
+
]
|
22 |
+
|
23 |
+
|
24 |
+
images = pipe(prompt, guidance_scale=8.0, width=1024, height=1024).images
|
25 |
+
|
26 |
+
for i, image in enumerate(images):
|
27 |
+
file_name = f"bb_1_{i}"
|
28 |
+
path = os.path.join(Path.home(), "images", f"{file_name}.png")
|
29 |
+
image.save(path)
|
30 |
+
|
31 |
+
api.upload_file(
|
32 |
+
path_or_fileobj=path,
|
33 |
+
path_in_repo=path.split("/")[-1],
|
34 |
+
repo_id="patrickvonplaten/images",
|
35 |
+
repo_type="dataset",
|
36 |
+
)
|
37 |
+
print(f"https://huggingface.co/datasets/patrickvonplaten/images/blob/main/{file_name}.png")
|
run_xl_lora.py
CHANGED
@@ -8,7 +8,10 @@ import os
|
|
8 |
api = HfApi()
|
9 |
|
10 |
pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16)
|
11 |
-
pipe.load_lora_weights("
|
|
|
|
|
|
|
12 |
pipe.to(torch_dtype=torch.float16)
|
13 |
pipe.to("cuda")
|
14 |
|
|
|
8 |
api = HfApi()
|
9 |
|
10 |
pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16)
|
11 |
+
pipe.load_lora_weights("stabilityai/stable-diffusion-xl-base-1.0", weight_name="sd_xl_offset_example-lora_1.0.safetensors")
|
12 |
+
# pipe.unet.fuse_lora()
|
13 |
+
# 7.8 it/s to beat
|
14 |
+
#
|
15 |
pipe.to(torch_dtype=torch.float16)
|
16 |
pipe.to("cuda")
|
17 |
|
sd_xl_inpaint.py
ADDED
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
from diffusers import AutoPipelineForInpainting, AutoPipelineForImage2Image
|
3 |
+
from diffusers.utils import load_image
|
4 |
+
import torch
|
5 |
+
from pathlib import Path
|
6 |
+
import os
|
7 |
+
from huggingface_hub import HfApi
|
8 |
+
|
9 |
+
torch.backends.cuda.matmul.allow_tf32 = True
|
10 |
+
torch_device = "cuda" if torch.cuda.is_available() else "cpu"
|
11 |
+
api = HfApi()
|
12 |
+
|
13 |
+
pipe = AutoPipelineForInpainting.from_pretrained("runwayml/stable-diffusion-inpainting", torch_dtype=torch.float16, variant="fp16", use_safetensors=True)
|
14 |
+
|
15 |
+
pipe = pipe.to(torch_device)
|
16 |
+
pipe.enable_xformers_memory_efficient_attention()
|
17 |
+
|
18 |
+
img_url = "https://raw.githubusercontent.com/CompVis/latent-diffusion/main/data/inpainting_examples/overture-creations-5sI6fQgYIuo.png"
|
19 |
+
mask_url = "https://raw.githubusercontent.com/CompVis/latent-diffusion/main/data/inpainting_examples/overture-creations-5sI6fQgYIuo_mask.png"
|
20 |
+
|
21 |
+
image = load_image(img_url)
|
22 |
+
mask_image = load_image(mask_url)
|
23 |
+
|
24 |
+
prompt = "dslr photography of an empty bench, high quality"
|
25 |
+
generator = torch.Generator(device="cuda").manual_seed(0)
|
26 |
+
|
27 |
+
image = pipe(
|
28 |
+
prompt=prompt,
|
29 |
+
image=image,
|
30 |
+
mask_image=mask_image,
|
31 |
+
guidance_scale=8.0,
|
32 |
+
num_inference_steps=20,
|
33 |
+
generator=generator,
|
34 |
+
).images[0]
|
35 |
+
|
36 |
+
|
37 |
+
image = image.resize((1024, 1024))
|
38 |
+
|
39 |
+
pipe = AutoPipelineForInpainting.from_pretrained("stabilityai/stable-diffusion-xl-refiner-1.0", torch_dtype=torch.float16, variant="fp16", use_safetensors=True)
|
40 |
+
pipe.to("cuda")
|
41 |
+
|
42 |
+
pipe.enable_xformers_memory_efficient_attention()
|
43 |
+
|
44 |
+
image = pipe(
|
45 |
+
prompt=prompt,
|
46 |
+
image=image,
|
47 |
+
mask_image=mask_image,
|
48 |
+
guidance_scale=8.0,
|
49 |
+
num_inference_steps=100,
|
50 |
+
strength=0.2,
|
51 |
+
generator=generator,
|
52 |
+
).images[0]
|
53 |
+
|
54 |
+
pipe = AutoPipelineForImage2Image.from_pipe(pipe)
|
55 |
+
pipe.enable_xformers_memory_efficient_attention()
|
56 |
+
|
57 |
+
image = pipe(
|
58 |
+
prompt=prompt,
|
59 |
+
image=image,
|
60 |
+
guidance_scale=8.0,
|
61 |
+
num_inference_steps=100,
|
62 |
+
strength=0.2,
|
63 |
+
generator=generator,
|
64 |
+
).images[0]
|
65 |
+
|
66 |
+
file_name = f"aaa"
|
67 |
+
path = os.path.join(Path.home(), "images", "ediffi_sdxl", f"{file_name}.png")
|
68 |
+
image.save(path)
|
69 |
+
|
70 |
+
api.upload_file(
|
71 |
+
path_or_fileobj=path,
|
72 |
+
path_in_repo=path.split("/")[-1],
|
73 |
+
repo_id="patrickvonplaten/images",
|
74 |
+
repo_type="dataset",
|
75 |
+
)
|
76 |
+
print(f"https://huggingface.co/datasets/patrickvonplaten/images/blob/main/{file_name}.png")
|
train_unet.py
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
from diffusers import UNet2DConditionModel
|
3 |
+
import torch
|
4 |
+
|
5 |
+
torch.cuda.set_per_process_memory_fraction(0.5, device="cuda:1")
|
6 |
+
|
7 |
+
unet = UNet2DConditionModel.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", subfolder="unet", variant="fp16", torch_dtype=torch.float16)
|
8 |
+
unet.train()
|
9 |
+
unet.enable_gradient_checkpointing()
|
10 |
+
unet = unet.to("cuda:1")
|
11 |
+
|
12 |
+
batch_size = 2
|
13 |
+
|
14 |
+
sample = torch.randn((1, 4, 128, 128)).half().to(unet.device).repeat(batch_size, 1, 1, 1)
|
15 |
+
time_ids = (torch.arange(6) / 6)[None, :].half().to(unet.device).repeat(batch_size, 1)
|
16 |
+
encoder_hidden_states = torch.randn((1, 77, 2048)).half().to(unet.device).repeat(batch_size, 1, 1)
|
17 |
+
text_embeds = torch.randn((1, 1280)).half().to(unet.device).repeat(batch_size, 1)
|
18 |
+
|
19 |
+
out = unet(sample, 1.0, added_cond_kwargs={"time_ids": time_ids, "text_embeds": text_embeds}, encoder_hidden_states=encoder_hidden_states).sample
|
20 |
+
|
21 |
+
loss = ((out - sample) ** 2).mean()
|
22 |
+
loss.backward()
|
23 |
+
|
24 |
+
print(torch.cuda.max_memory_allocated(device=unet.device))
|