fix more tests

Browse files

Files changed (17) hide show

1 +26 -0
all_branches.txt +0 -40
check_for_branches.py +1 -1
collect_env.py +609 -0
init_image.png +0 -0
mask_image.png +0 -0
model_ids.txt +0 -0
new_scheduler.py +22 -0
prompt_weight.py +35 -0
run_bug_conv.py +63 -0
run_local_fuse_xl.py +38 -0
run_local_xl.py +4 -5
run_lora.py +43 -0
run_wuerst.py +37 -0
run_xl_lora.py +4 -1
sd_xl_inpaint.py +76 -0
train_unet.py +24 -0

1 ADDED Viewed

	@@ -0,0 +1,26 @@

+#!/usr/bin/env python3
+from diffusers import UNet2DConditionModel
+import torch
+unet = UNet2DConditionModel.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", subfolder="unet", variant="fp16", torch_dtype=torch.float16)
+unet.train()
+unet.enable_gradient_checkpointing()
+unet = unet.to("cuda:1")
+batch_size = 8
+sample = torch.randn((1, 4, 128, 128)).half().to(unet.device).repeat(batch_size, 1, 1, 1)
+time_ids = (torch.arange(6) / 6)[None, :].half().to(unet.device).repeat(batch_size, 1)
+encoder_hidden_states = torch.randn((1, 77, 2048)).half().to(unet.device).repeat(batch_size, 1, 1)
+text_embeds = torch.randn((1, 1280)).half().to(unet.device).repeat(batch_size, 1)
+out = unet(sample, 1.0, added_cond_kwargs={"time_ids": time_ids, "text_embeds": text_embeds}, encoder_hidden_states=encoder_hidden_states).sample
+loss = ((out - sample) ** 2).mean()
+loss.backward()
+print(torch.cuda.max_memory_allocated(device=unet.device))
+# no gradient checkpointing: 12,276,695,552
+# curr gradient checkpointing: 10,862,276,096

all_branches.txt CHANGED Viewed

@@ -1,42 +1,2 @@
-CompVis/stable-diffusion-v1-3
-CompVis/stable-diffusion-v1-1
-CompVis/stable-diffusion-v1-2
 CompVis/stable-diffusion-v1-4
-hakurei/waifu-diffusion
-rinna/japanese-stable-diffusion
-CompVis/stable-diffusion-v1-5
-runwayml/stable-diffusion-inpainting
-fusing/sd-inpaint-temp
 runwayml/stable-diffusion-v1-5
-ckpt/sd15
-aarondotwork/sd-pokemon-diffusers
-technillogue/waifu-diffusion
-DGSpitzer/Cyberpunk-Anime-Diffusion
-microsoft/vq-diffusion-ithq
-fusing/rdm
-CompVis/ldm-super-resolution-4x-openimages
-BAAI/AltDiffusion
-fusing/test
-stabilityai/stable-diffusion-2
-stabilityai/stable-diffusion-2-base
-stabilityai/stable-diffusion-2-depth
-stabilityai/stable-diffusion-2-inpainting
-stabilityai/stable-diffusion-x4-upscaler
-jplumail/matthieu-v1-pipe
-stabilityai/stable-diffusion-2-1
-stabilityai/stable-diffusion-2-1-base
-jplumail/matthieu-v2-pipe
-timbrooks/instruct-pix2pix
-ruiruin/counmargemodel
-Nacholmo/AbyssOrangeMix2-hard-vae-swapped
-Nacholmo/Counterfeit-V2.5-vae-swapped
-Nacholmo/VOXO-v0-vtuber-diffusers
-p1atdev/pvc-v3
-Nacholmo/meinamixv7-diffusers
-gligen/diffusers-generation-text-box
-gligen/diffusers-inpainting-text-box
-zhg/deliberate
-philz1337/realism
-viktfb/patterngenai
-viktfb/patterngen-v1
-viktfb/style2.0





1	CompVis/stable-diffusion-v1-4





2	runwayml/stable-diffusion-v1-5

check_for_branches.py CHANGED Viewed

@@ -26,7 +26,7 @@ if __name__ == "__main__":
     api = HfApi()
     branches = main(api, model_id)
-    if "fp16" in branches:
         print(model_id)
 #
 #    if len(branches) > 0:

     api = HfApi()
     branches = main(api, model_id)
+    if "non-ema" in branches:
         print(model_id)
 #
 #    if len(branches) > 0:

collect_env.py ADDED Viewed

	@@ -0,0 +1,609 @@

+# Unlike the rest of the PyTorch this file must be python2 compliant.
+# This script outputs relevant system environment info
+# Run it with `python collect_env.py`.
+import datetime
+import locale
+import re
+import subprocess
+import sys
+import os
+from collections import namedtuple
+try:
+    import torch
+    TORCH_AVAILABLE = True
+except (ImportError, NameError, AttributeError, OSError):
+    TORCH_AVAILABLE = False
+# System Environment Information
+SystemEnv = namedtuple('SystemEnv', [
+    'torch_version',
+    'is_debug_build',
+    'cuda_compiled_version',
+    'gcc_version',
+    'clang_version',
+    'cmake_version',
+    'os',
+    'libc_version',
+    'python_version',
+    'python_platform',
+    'is_cuda_available',
+    'cuda_runtime_version',
+    'cuda_module_loading',
+    'nvidia_driver_version',
+    'nvidia_gpu_models',
+    'cudnn_version',
+    'pip_version',  # 'pip' or 'pip3'
+    'pip_packages',
+    'conda_packages',
+    'hip_compiled_version',
+    'hip_runtime_version',
+    'miopen_runtime_version',
+    'caching_allocator_config',
+    'is_xnnpack_available',
+    'cpu_info',
+])
+def run(command):
+    """Returns (return-code, stdout, stderr)"""
+    shell = True if type(command) is str else False
+    p = subprocess.Popen(command, stdout=subprocess.PIPE,
+                         stderr=subprocess.PIPE, shell=shell)
+    raw_output, raw_err = p.communicate()
+    rc = p.returncode
+    if get_platform() == 'win32':
+        enc = 'oem'
+    else:
+        enc = locale.getpreferredencoding()
+    output = raw_output.decode(enc)
+    err = raw_err.decode(enc)
+    return rc, output.strip(), err.strip()
+def run_and_read_all(run_lambda, command):
+    """Runs command using run_lambda; reads and returns entire output if rc is 0"""
+    rc, out, _ = run_lambda(command)
+    if rc != 0:
+        return None
+    return out
+def run_and_parse_first_match(run_lambda, command, regex):
+    """Runs command using run_lambda, returns the first regex match if it exists"""
+    rc, out, _ = run_lambda(command)
+    if rc != 0:
+        return None
+    match = re.search(regex, out)
+    if match is None:
+        return None
+    return match.group(1)
+def run_and_return_first_line(run_lambda, command):
+    """Runs command using run_lambda and returns first line if output is not empty"""
+    rc, out, _ = run_lambda(command)
+    if rc != 0:
+        return None
+    return out.split('\n')[0]
+def get_conda_packages(run_lambda):
+    conda = os.environ.get('CONDA_EXE', 'conda')
+    out = run_and_read_all(run_lambda, "{} list".format(conda))
+    if out is None:
+        return out
+    return "\n".join(
+        line
+        for line in out.splitlines()
+        if not line.startswith("#")
+        and any(
+            name in line
+            for name in {
+                "torch",
+                "numpy",
+                "cudatoolkit",
+                "soumith",
+                "mkl",
+                "magma",
+                "triton",
+            }
+        )
+    )
+def get_gcc_version(run_lambda):
+    return run_and_parse_first_match(run_lambda, 'gcc --version', r'gcc (.*)')
+def get_clang_version(run_lambda):
+    return run_and_parse_first_match(run_lambda, 'clang --version', r'clang version (.*)')
+def get_cmake_version(run_lambda):
+    return run_and_parse_first_match(run_lambda, 'cmake --version', r'cmake (.*)')
+def get_nvidia_driver_version(run_lambda):
+    if get_platform() == 'darwin':
+        cmd = 'kextstat | grep -i cuda'
+        return run_and_parse_first_match(run_lambda, cmd,
+                                         r'com[.]nvidia[.]CUDA [(](.*?)[)]')
+    smi = get_nvidia_smi()
+    return run_and_parse_first_match(run_lambda, smi, r'Driver Version: (.*?) ')
+def get_gpu_info(run_lambda):
+    if get_platform() == 'darwin' or (TORCH_AVAILABLE and hasattr(torch.version, 'hip') and torch.version.hip is not None):
+        if TORCH_AVAILABLE and torch.cuda.is_available():
+            return torch.cuda.get_device_name(None)
+        return None
+    smi = get_nvidia_smi()
+    uuid_regex = re.compile(r' \(UUID: .+?\)')
+    rc, out, _ = run_lambda(smi + ' -L')
+    if rc != 0:
+        return None
+    # Anonymize GPUs by removing their UUID
+    return re.sub(uuid_regex, '', out)
+def get_running_cuda_version(run_lambda):
+    return run_and_parse_first_match(run_lambda, 'nvcc --version', r'release .+ V(.*)')
+def get_cudnn_version(run_lambda):
+    """This will return a list of libcudnn.so; it's hard to tell which one is being used"""
+    if get_platform() == 'win32':
+        system_root = os.environ.get('SYSTEMROOT', 'C:\\Windows')
+        cuda_path = os.environ.get('CUDA_PATH', "%CUDA_PATH%")
+        where_cmd = os.path.join(system_root, 'System32', 'where')
+        cudnn_cmd = '{} /R "{}\\bin" cudnn*.dll'.format(where_cmd, cuda_path)
+    elif get_platform() == 'darwin':
+        # CUDA libraries and drivers can be found in /usr/local/cuda/. See
+        # https://docs.nvidia.com/cuda/cuda-installation-guide-mac-os-x/index.html#install
+        # https://docs.nvidia.com/deeplearning/sdk/cudnn-install/index.html#installmac
+        # Use CUDNN_LIBRARY when cudnn library is installed elsewhere.
+        cudnn_cmd = 'ls /usr/local/cuda/lib/libcudnn*'
+    else:
+        cudnn_cmd = 'ldconfig -p | grep libcudnn | rev | cut -d" " -f1 | rev'
+    rc, out, _ = run_lambda(cudnn_cmd)
+    # find will return 1 if there are permission errors or if not found
+    if len(out) == 0 or (rc != 1 and rc != 0):
+        l = os.environ.get('CUDNN_LIBRARY')
+        if l is not None and os.path.isfile(l):
+            return os.path.realpath(l)
+        return None
+    files_set = set()
+    for fn in out.split('\n'):
+        fn = os.path.realpath(fn)  # eliminate symbolic links
+        if os.path.isfile(fn):
+            files_set.add(fn)
+    if not files_set:
+        return None
+    # Alphabetize the result because the order is non-deterministic otherwise
+    files = sorted(files_set)
+    if len(files) == 1:
+        return files[0]
+    result = '\n'.join(files)
+    return 'Probably one of the following:\n{}'.format(result)
+def get_nvidia_smi():
+    # Note: nvidia-smi is currently available only on Windows and Linux
+    smi = 'nvidia-smi'
+    if get_platform() == 'win32':
+        system_root = os.environ.get('SYSTEMROOT', 'C:\\Windows')
+        program_files_root = os.environ.get('PROGRAMFILES', 'C:\\Program Files')
+        legacy_path = os.path.join(program_files_root, 'NVIDIA Corporation', 'NVSMI', smi)
+        new_path = os.path.join(system_root, 'System32', smi)
+        smis = [new_path, legacy_path]
+        for candidate_smi in smis:
+            if os.path.exists(candidate_smi):
+                smi = '"{}"'.format(candidate_smi)
+                break
+    return smi
+# example outputs of CPU infos
+#  * linux
+#    Architecture:            x86_64
+#      CPU op-mode(s):        32-bit, 64-bit
+#      Address sizes:         46 bits physical, 48 bits virtual
+#      Byte Order:            Little Endian
+#    CPU(s):                  128
+#      On-line CPU(s) list:   0-127
+#    Vendor ID:               GenuineIntel
+#      Model name:            Intel(R) Xeon(R) Platinum 8375C CPU @ 2.90GHz
+#        CPU family:          6
+#        Model:               106
+#        Thread(s) per core:  2
+#        Core(s) per socket:  32
+#        Socket(s):           2
+#        Stepping:            6
+#        BogoMIPS:            5799.78
+#        Flags:               fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr
+#                             sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon rep_good nopl
+#                             xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq monitor ssse3 fma cx16
+#                             pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand
+#                             hypervisor lahf_lm abm 3dnowprefetch invpcid_single ssbd ibrs ibpb stibp ibrs_enhanced
+#                             fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid avx512f avx512dq rdseed adx smap
+#                             avx512ifma clflushopt clwb avx512cd sha_ni avx512bw avx512vl xsaveopt xsavec xgetbv1
+#                             xsaves wbnoinvd ida arat avx512vbmi pku ospke avx512_vbmi2 gfni vaes vpclmulqdq
+#                             avx512_vnni avx512_bitalg tme avx512_vpopcntdq rdpid md_clear flush_l1d arch_capabilities
+#    Virtualization features:
+#      Hypervisor vendor:     KVM
+#      Virtualization type:   full
+#    Caches (sum of all):
+#      L1d:                   3 MiB (64 instances)
+#      L1i:                   2 MiB (64 instances)
+#      L2:                    80 MiB (64 instances)
+#      L3:                    108 MiB (2 instances)
+#    NUMA:
+#      NUMA node(s):          2
+#      NUMA node0 CPU(s):     0-31,64-95
+#      NUMA node1 CPU(s):     32-63,96-127
+#    Vulnerabilities:
+#      Itlb multihit:         Not affected
+#      L1tf:                  Not affected
+#      Mds:                   Not affected
+#      Meltdown:              Not affected
+#      Mmio stale data:       Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown
+#      Retbleed:              Not affected
+#      Spec store bypass:     Mitigation; Speculative Store Bypass disabled via prctl and seccomp
+#      Spectre v1:            Mitigation; usercopy/swapgs barriers and __user pointer sanitization
+#      Spectre v2:            Mitigation; Enhanced IBRS, IBPB conditional, RSB filling, PBRSB-eIBRS SW sequence
+#      Srbds:                 Not affected
+#      Tsx async abort:       Not affected
+#  * win32
+#    Architecture=9
+#    CurrentClockSpeed=2900
+#    DeviceID=CPU0
+#    Family=179
+#    L2CacheSize=40960
+#    L2CacheSpeed=
+#    Manufacturer=GenuineIntel
+#    MaxClockSpeed=2900
+#    Name=Intel(R) Xeon(R) Platinum 8375C CPU @ 2.90GHz
+#    ProcessorType=3
+#    Revision=27142
+#
+#    Architecture=9
+#    CurrentClockSpeed=2900
+#    DeviceID=CPU1
+#    Family=179
+#    L2CacheSize=40960
+#    L2CacheSpeed=
+#    Manufacturer=GenuineIntel
+#    MaxClockSpeed=2900
+#    Name=Intel(R) Xeon(R) Platinum 8375C CPU @ 2.90GHz
+#    ProcessorType=3
+#    Revision=27142
+def get_cpu_info(run_lambda):
+    rc, out, err = 0, '', ''
+    if get_platform() == 'linux':
+        rc, out, err = run_lambda('lscpu')
+    elif get_platform() == 'win32':
+        rc, out, err = run_lambda('wmic cpu get Name,Manufacturer,Family,Architecture,ProcessorType,DeviceID,\
+        CurrentClockSpeed,MaxClockSpeed,L2CacheSize,L2CacheSpeed,Revision /VALUE')
+    elif get_platform() == 'darwin':
+        rc, out, err = run_lambda("sysctl -n machdep.cpu.brand_string")
+    cpu_info = 'None'
+    if rc == 0:
+        cpu_info = out
+    else:
+        cpu_info = err
+    return cpu_info
+def get_platform():
+    if sys.platform.startswith('linux'):
+        return 'linux'
+    elif sys.platform.startswith('win32'):
+        return 'win32'
+    elif sys.platform.startswith('cygwin'):
+        return 'cygwin'
+    elif sys.platform.startswith('darwin'):
+        return 'darwin'
+    else:
+        return sys.platform
+def get_mac_version(run_lambda):
+    return run_and_parse_first_match(run_lambda, 'sw_vers -productVersion', r'(.*)')
+def get_windows_version(run_lambda):
+    system_root = os.environ.get('SYSTEMROOT', 'C:\\Windows')
+    wmic_cmd = os.path.join(system_root, 'System32', 'Wbem', 'wmic')
+    findstr_cmd = os.path.join(system_root, 'System32', 'findstr')
+    return run_and_read_all(run_lambda, '{} os get Caption | {} /v Caption'.format(wmic_cmd, findstr_cmd))
+def get_lsb_version(run_lambda):
+    return run_and_parse_first_match(run_lambda, 'lsb_release -a', r'Description:\t(.*)')
+def check_release_file(run_lambda):
+    return run_and_parse_first_match(run_lambda, 'cat /etc/*-release',
+                                     r'PRETTY_NAME="(.*)"')
+def get_os(run_lambda):
+    from platform import machine
+    platform = get_platform()
+    if platform == 'win32' or platform == 'cygwin':
+        return get_windows_version(run_lambda)
+    if platform == 'darwin':
+        version = get_mac_version(run_lambda)
+        if version is None:
+            return None
+        return 'macOS {} ({})'.format(version, machine())
+    if platform == 'linux':
+        # Ubuntu/Debian based
+        desc = get_lsb_version(run_lambda)
+        if desc is not None:
+            return '{} ({})'.format(desc, machine())
+        # Try reading /etc/*-release
+        desc = check_release_file(run_lambda)
+        if desc is not None:
+            return '{} ({})'.format(desc, machine())
+        return '{} ({})'.format(platform, machine())
+    # Unknown platform
+    return platform
+def get_python_platform():
+    import platform
+    return platform.platform()
+def get_libc_version():
+    import platform
+    if get_platform() != 'linux':
+        return 'N/A'
+    return '-'.join(platform.libc_ver())
+def get_pip_packages(run_lambda):
+    """Returns `pip list` output. Note: will also find conda-installed pytorch
+    and numpy packages."""
+    # People generally have `pip` as `pip` or `pip3`
+    # But here it is invoked as `python -mpip`
+    def run_with_pip(pip):
+        out = run_and_read_all(run_lambda, pip + ["list", "--format=freeze"])
+        return "\n".join(
+            line
+            for line in out.splitlines()
+            if any(
+                name in line
+                for name in {
+                    "torch",
+                    "numpy",
+                    "mypy",
+                    "flake8",
+                    "triton",
+                }
+            )
+        )
+    pip_version = 'pip3' if sys.version[0] == '3' else 'pip'
+    out = run_with_pip([sys.executable, '-mpip'])
+    return pip_version, out
+def get_cachingallocator_config():
+    ca_config = os.environ.get('PYTORCH_CUDA_ALLOC_CONF', '')
+    return ca_config
+def get_cuda_module_loading_config():
+    if TORCH_AVAILABLE and torch.cuda.is_available():
+        torch.cuda.init()
+        config = os.environ.get('CUDA_MODULE_LOADING', '')
+        return config
+    else:
+        return "N/A"
+def is_xnnpack_available():
+    if TORCH_AVAILABLE:
+        import torch.backends.xnnpack
+        return str(torch.backends.xnnpack.enabled)  # type: ignore[attr-defined]
+    else:
+        return "N/A"
+def get_env_info():
+    run_lambda = run
+    pip_version, pip_list_output = get_pip_packages(run_lambda)
+    if TORCH_AVAILABLE:
+        version_str = torch.__version__
+        debug_mode_str = str(torch.version.debug)
+        cuda_available_str = str(torch.cuda.is_available())
+        cuda_version_str = torch.version.cuda
+        if not hasattr(torch.version, 'hip') or torch.version.hip is None:  # cuda version
+            hip_compiled_version = hip_runtime_version = miopen_runtime_version = 'N/A'
+        else:  # HIP version
+            def get_version_or_na(cfg, prefix):
+                _lst = [s.rsplit(None, 1)[-1] for s in cfg if prefix in s]
+                return _lst[0] if _lst else 'N/A'
+            cfg = torch._C._show_config().split('\n')
+            hip_runtime_version = get_version_or_na(cfg, 'HIP Runtime')
+            miopen_runtime_version = get_version_or_na(cfg, 'MIOpen')
+            cuda_version_str = 'N/A'
+            hip_compiled_version = torch.version.hip
+    else:
+        version_str = debug_mode_str = cuda_available_str = cuda_version_str = 'N/A'
+        hip_compiled_version = hip_runtime_version = miopen_runtime_version = 'N/A'
+    sys_version = sys.version.replace("\n", " ")
+    return SystemEnv(
+        torch_version=version_str,
+        is_debug_build=debug_mode_str,
+        python_version='{} ({}-bit runtime)'.format(sys_version, sys.maxsize.bit_length() + 1),
+        python_platform=get_python_platform(),
+        is_cuda_available=cuda_available_str,
+        cuda_compiled_version=cuda_version_str,
+        cuda_runtime_version=get_running_cuda_version(run_lambda),
+        cuda_module_loading=get_cuda_module_loading_config(),
+        nvidia_gpu_models=get_gpu_info(run_lambda),
+        nvidia_driver_version=get_nvidia_driver_version(run_lambda),
+        cudnn_version=get_cudnn_version(run_lambda),
+        hip_compiled_version=hip_compiled_version,
+        hip_runtime_version=hip_runtime_version,
+        miopen_runtime_version=miopen_runtime_version,
+        pip_version=pip_version,
+        pip_packages=pip_list_output,
+        conda_packages=get_conda_packages(run_lambda),
+        os=get_os(run_lambda),
+        libc_version=get_libc_version(),
+        gcc_version=get_gcc_version(run_lambda),
+        clang_version=get_clang_version(run_lambda),
+        cmake_version=get_cmake_version(run_lambda),
+        caching_allocator_config=get_cachingallocator_config(),
+        is_xnnpack_available=is_xnnpack_available(),
+        cpu_info=get_cpu_info(run_lambda),
+    )
+env_info_fmt = """
+PyTorch version: {torch_version}
+Is debug build: {is_debug_build}
+CUDA used to build PyTorch: {cuda_compiled_version}
+ROCM used to build PyTorch: {hip_compiled_version}
+OS: {os}
+GCC version: {gcc_version}
+Clang version: {clang_version}
+CMake version: {cmake_version}
+Libc version: {libc_version}
+Python version: {python_version}
+Python platform: {python_platform}
+Is CUDA available: {is_cuda_available}
+CUDA runtime version: {cuda_runtime_version}
+CUDA_MODULE_LOADING set to: {cuda_module_loading}
+GPU models and configuration: {nvidia_gpu_models}
+Nvidia driver version: {nvidia_driver_version}
+cuDNN version: {cudnn_version}
+HIP runtime version: {hip_runtime_version}
+MIOpen runtime version: {miopen_runtime_version}
+Is XNNPACK available: {is_xnnpack_available}
+CPU:
+{cpu_info}
+Versions of relevant libraries:
+{pip_packages}
+{conda_packages}
+""".strip()
+def pretty_str(envinfo):
+    def replace_nones(dct, replacement='Could not collect'):
+        for key in dct.keys():
+            if dct[key] is not None:
+                continue
+            dct[key] = replacement
+        return dct
+    def replace_bools(dct, true='Yes', false='No'):
+        for key in dct.keys():
+            if dct[key] is True:
+                dct[key] = true
+            elif dct[key] is False:
+                dct[key] = false
+        return dct
+    def prepend(text, tag='[prepend]'):
+        lines = text.split('\n')
+        updated_lines = [tag + line for line in lines]
+        return '\n'.join(updated_lines)
+    def replace_if_empty(text, replacement='No relevant packages'):
+        if text is not None and len(text) == 0:
+            return replacement
+        return text
+    def maybe_start_on_next_line(string):
+        # If `string` is multiline, prepend a \n to it.
+        if string is not None and len(string.split('\n')) > 1:
+            return '\n{}\n'.format(string)
+        return string
+    mutable_dict = envinfo._asdict()
+    # If nvidia_gpu_models is multiline, start on the next line
+    mutable_dict['nvidia_gpu_models'] = \
+        maybe_start_on_next_line(envinfo.nvidia_gpu_models)
+    # If the machine doesn't have CUDA, report some fields as 'No CUDA'
+    dynamic_cuda_fields = [
+        'cuda_runtime_version',
+        'nvidia_gpu_models',
+        'nvidia_driver_version',
+    ]
+    all_cuda_fields = dynamic_cuda_fields + ['cudnn_version']
+    all_dynamic_cuda_fields_missing = all(
+        mutable_dict[field] is None for field in dynamic_cuda_fields)
+    if TORCH_AVAILABLE and not torch.cuda.is_available() and all_dynamic_cuda_fields_missing:
+        for field in all_cuda_fields:
+            mutable_dict[field] = 'No CUDA'
+        if envinfo.cuda_compiled_version is None:
+            mutable_dict['cuda_compiled_version'] = 'None'
+    # Replace True with Yes, False with No
+    mutable_dict = replace_bools(mutable_dict)
+    # Replace all None objects with 'Could not collect'
+    mutable_dict = replace_nones(mutable_dict)
+    # If either of these are '', replace with 'No relevant packages'
+    mutable_dict['pip_packages'] = replace_if_empty(mutable_dict['pip_packages'])
+    mutable_dict['conda_packages'] = replace_if_empty(mutable_dict['conda_packages'])
+    # Tag conda and pip packages with a prefix
+    # If they were previously None, they'll show up as ie '[conda] Could not collect'
+    if mutable_dict['pip_packages']:
+        mutable_dict['pip_packages'] = prepend(mutable_dict['pip_packages'],
+                                               '[{}] '.format(envinfo.pip_version))
+    if mutable_dict['conda_packages']:
+        mutable_dict['conda_packages'] = prepend(mutable_dict['conda_packages'],
+                                                 '[conda] ')
+    mutable_dict['cpu_info'] = envinfo.cpu_info
+    return env_info_fmt.format(**mutable_dict)
+def get_pretty_env_info():
+    return pretty_str(get_env_info())
+def main():
+    print("Collecting environment information...")
+    output = get_pretty_env_info()
+    print(output)
+    if TORCH_AVAILABLE and hasattr(torch, 'utils') and hasattr(torch.utils, '_crash_handler'):
+        minidump_dir = torch.utils._crash_handler.DEFAULT_MINIDUMP_DIR
+        if sys.platform == "linux" and os.path.exists(minidump_dir):
+            dumps = [os.path.join(minidump_dir, dump) for dump in os.listdir(minidump_dir)]
+            latest = max(dumps, key=os.path.getctime)
+            ctime = os.path.getctime(latest)
+            creation_time = datetime.datetime.fromtimestamp(ctime).strftime('%Y-%m-%d %H:%M:%S')
+            msg = "\n*** Detected a minidump at {} created on {}, ".format(latest, creation_time) + \
+                  "if this is related to your bug please include it when you file a report ***"
+            print(msg, file=sys.stderr)
+if __name__ == '__main__':
+    main()

init_image.png ADDED Viewed

mask_image.png ADDED Viewed

model_ids.txt CHANGED Viewed

The diff for this file is too large to render. See raw diff

new_scheduler.py ADDED Viewed

	@@ -0,0 +1,22 @@

+#!/usr/bin/env python3
+from diffusers import DiffusionPipeline, EulerAncestralDiscreteScheduler
+import torch
+path = "runwayml/stable-diffusion-v1-5"
+run_compile = False  # Set True / False
+use_karras_sigmas = False
+pipe = DiffusionPipeline.from_pretrained(path, torch_dtype=torch.float16)
+pipe = pipe.to("cuda")
+pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe.scheduler.config, use_karras_sigmas=use_karras_sigmas)
+pipe.unet.to(memory_format=torch.channels_last)
+if run_compile:
+    print("Run torch compile")
+    pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
+prompt = "ghibli style, a fantasy landscape with castles"
+for _ in range(3):
+    images = pipe(prompt=prompt).images

prompt_weight.py ADDED Viewed

	@@ -0,0 +1,35 @@

+#!/usr/bin/env python3
+import torch
+import os
+from compel import Compel, ReturnedEmbeddingsType
+from diffusers import DiffusionPipeline
+from huggingface_hub import HfApi
+from pathlib import Path
+api = HfApi()
+pipeline = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", variant="fp16", use_safetensors=True, torch_dtype=torch.float16).to("cuda")
+compel = Compel(tokenizer=[pipeline.tokenizer, pipeline.tokenizer_2] , text_encoder=[pipeline.text_encoder, pipeline.text_encoder_2], returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED, requires_pooled=[False, True])
+# upweight "ball"
+prompt = ["a red cat playing with a (ball)1.5", "a red cat playing with a (ball)0.6"]
+conditioning, pooled = compel(prompt)
+# generate image
+generator = [torch.Generator().manual_seed(33) for _ in range(len(prompt))]
+images = pipeline(prompt_embeds=conditioning, pooled_prompt_embeds=pooled, generator=generator, num_inference_steps=30).images
+for i, image in enumerate(images):
+    file_name = f"bb_1_{i}"
+    path = os.path.join(Path.home(), "images", f"{file_name}.png")
+    image.save(path)
+    api.upload_file(
+        path_or_fileobj=path,
+        path_in_repo=path.split("/")[-1],
+        repo_id="patrickvonplaten/images",
+        repo_type="dataset",
+    )
+    print(f"https://huggingface.co/datasets/patrickvonplaten/images/blob/main/{file_name}.png")

run_bug_conv.py ADDED Viewed

	@@ -0,0 +1,63 @@

+#!/usr/bin/env python3
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+class SuperConv(nn.Conv2d):
+    def __init__(self, *args, is_lora=False, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.is_lora = is_lora
+    def forward(self, *args, **kwargs):
+        if self.is_lora:
+            return 3 + super().forward(*args, **kwargs)
+        else:
+            return super().forward(*args, **kwargs)
+# Define a simple Convolutional Neural Network
+class SimpleCNN(nn.Module):
+    def __init__(self):
+        super(SimpleCNN, self).__init__()
+        self.conv1 = SuperConv(3, 6, 5) # Assuming input images are RGB, so 3 input channels
+        self.pool = nn.MaxPool2d(2, 2)
+        self.conv2 = SuperConv(6, 16, 5)
+        self.fc1 = nn.Linear(16 * 5 * 5, 120)
+        self.fc2 = nn.Linear(120, 84)
+        self.fc3 = nn.Linear(84, 10)
+    def forward(self, x):
+        x = self.pool(F.relu(self.conv1(x)))
+        x = self.pool(F.relu(self.conv2(x)))
+        x = x.view(-1, 16 * 5 * 5)
+        x = F.relu(self.fc1(x))
+        x = F.relu(self.fc2(x))
+        x = self.fc3(x)
+        return x
+# Create the network
+net = SimpleCNN()
+# Initialize weights with dummy values
+for m in net.modules():
+    if isinstance(m, nn.Conv2d):
+        nn.init.constant_(m.weight, 0.1)
+        nn.init.constant_(m.bias, 0.1)
+    elif isinstance(m, nn.Linear):
+        nn.init.constant_(m.weight, 0.1)
+        nn.init.constant_(m.bias, 0.1)
+# Perform inference
+input = torch.randn(1, 3, 32, 32).to("cuda")
+net = net.to("cuda")
+output = net(input)
+print(output)
+net = torch.compile(net, mode="reduce-overhead", fullgraph=True)
+output = net(input)
+print(output)

run_local_fuse_xl.py ADDED Viewed

	@@ -0,0 +1,38 @@

+#!/usr/bin/env python3
+from huggingface_hub import HfApi
+import torch
+from pathlib import Path
+import os
+import time
+api = HfApi()
+start_time = time.time()
+from diffusers import DiffusionPipeline
+import torch
+pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16)
+pipe.load_lora_weights("stabilityai/stable-diffusion-xl-base-1.0", weight_name="sd_xl_offset_example-lora_1.0.safetensors")
+pipe.unet.fuse_lora()
+pipe.to(torch_dtype=torch.float16)
+pipe.to("cuda")
+torch.manual_seed(0)
+prompt = "beautiful scenery nature glass bottle landscape, , purple galaxy bottle"
+negative_prompt = "text, watermark"
+image = pipe(prompt, negative_prompt=negative_prompt, num_inference_steps=25).images[0]
+file_name = f"aaa"
+path = os.path.join(Path.home(), "images", "ediffi_sdxl", f"{file_name}.png")
+image.save(path)
+api.upload_file(
+    path_or_fileobj=path,
+    path_in_repo=path.split("/")[-1],
+    repo_id="patrickvonplaten/images",
+    repo_type="dataset",
+)
+print(f"https://huggingface.co/datasets/patrickvonplaten/images/blob/main/{file_name}.png")

run_local_xl.py CHANGED Viewed

@@ -19,15 +19,14 @@ start_time = time.time()
 # use_refiner = bool(int(sys.argv[1]))
 use_refiner = True
 use_diffusers = True
-path = "/home/patrick/sai/stable-diffusion-xl-base-1.0"
-refiner_path = "/home/patrick/sai/stable-diffusion-xl-refiner-1.0"
-vae_path = "/home/patrick/sai/stable-diffusion-xl-base-1.0/vae/"
-vae_path = "/home/patrick/sai/sdxl-vae"
 vae = AutoencoderKL.from_pretrained(vae_path, torch_dtype=torch.float16, force_upcast=True)
 if use_diffusers:
     # pipe = StableDiffusionXLPipeline.from_pretrained(path, vae=vae, torch_dtype=torch.float16, variant="fp16", use_safetensors=True, local_files_only=True)
-    pipe = StableDiffusionXLPipeline.from_pretrained(path, torch_dtype=torch.float16, vae=vae, variant="fp16", use_safetensors=True, local_files_only=True)
     print(time.time() - start_time)
     pipe.to("cuda")

 # use_refiner = bool(int(sys.argv[1]))
 use_refiner = True
 use_diffusers = True
+path = "stabilityai/stable-diffusion-xl-base-1.0"
+refiner_path = "stabilityai/stable-diffusion-xl-refiner-1.0"
+vae_path = "stabilityai/sdxl-vae"
 vae = AutoencoderKL.from_pretrained(vae_path, torch_dtype=torch.float16, force_upcast=True)
 if use_diffusers:
     # pipe = StableDiffusionXLPipeline.from_pretrained(path, vae=vae, torch_dtype=torch.float16, variant="fp16", use_safetensors=True, local_files_only=True)
+    pipe = StableDiffusionXLPipeline.from_pretrained(path, torch_dtype=torch.float16, vae=vae, variant="fp16", use_safetensors=True, local_files_only=True, add_watermarker=False)
     print(time.time() - start_time)
     pipe.to("cuda")

run_lora.py ADDED Viewed

	@@ -0,0 +1,43 @@

+#!/usr/bin/env python3
+from diffusers import StableDiffusionPipeline, KDPM2DiscreteScheduler, StableDiffusionImg2ImgPipeline, HeunDiscreteScheduler, KDPM2AncestralDiscreteScheduler, DDIMScheduler,  DPMSolverMultistepScheduler
+import time
+import os
+from huggingface_hub import HfApi
+# from compel import Compel
+import torch
+import sys
+from pathlib import Path
+import requests
+from PIL import Image
+from io import BytesIO
+path = "runwayml/stable-diffusion-v1-5"
+lora_id = "takuma104/lora-test-text-encoder-lora-target"
+api = HfApi()
+start_time = time.time()
+pipe = StableDiffusionPipeline.from_pretrained(path, torch_dtype=torch.float16)
+pipe.load_lora_weights(lora_id)
+pipe = pipe.to("cuda")
+prompt = "a red sks dog"
+images = pipe(prompt=prompt,
+    num_inference_steps=15,
+    cross_attention_kwargs={"scale": 0.5},
+    generator=torch.manual_seed(0)
+).images
+for i, image in enumerate(images):
+    file_name = f"aa_{i}"
+    path = os.path.join(Path.home(), "images", f"{file_name}.png")
+    image.save(path)
+    api.upload_file(
+        path_or_fileobj=path,
+        path_in_repo=path.split("/")[-1],
+        repo_id="patrickvonplaten/images",
+        repo_type="dataset",
+    )
+    print(f"https://huggingface.co/datasets/patrickvonplaten/images/blob/main/{file_name}.png")

run_wuerst.py ADDED Viewed

	@@ -0,0 +1,37 @@

+#!/usr/bin/env python3
+import torch
+from diffusers import AutoPipelineForText2Image
+from huggingface_hub import HfApi
+from pathlib import Path
+import os
+from PIL import Image
+import numpy as np
+api = HfApi()
+pipe = AutoPipelineForText2Image.from_pretrained("warp-diffusion/WuerstchenGeneratorPipeline", torch_dtype=torch.float16).to("cuda")
+prompt = [
+    "An old destroyed car standing on a cliff in norway, cinematic photography",
+    "Western movie, closeup cinematic photography",
+    "Pink nike shoe commercial, closeup cinematic photography",
+    "Croatia, closeup cinematic photography",
+    "South Tyrol mountains at sunset, closeup cinematic photography",
+]
+images = pipe(prompt, guidance_scale=8.0, width=1024, height=1024).images
+for i, image in enumerate(images):
+    file_name = f"bb_1_{i}"
+    path = os.path.join(Path.home(), "images", f"{file_name}.png")
+    image.save(path)
+    api.upload_file(
+        path_or_fileobj=path,
+        path_in_repo=path.split("/")[-1],
+        repo_id="patrickvonplaten/images",
+        repo_type="dataset",
+    )
+    print(f"https://huggingface.co/datasets/patrickvonplaten/images/blob/main/{file_name}.png")

run_xl_lora.py CHANGED Viewed

@@ -8,7 +8,10 @@ import os
 api = HfApi()
 pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16)
-pipe.load_lora_weights("./sd_xl_offset_example-lora_1.0.safetensors")
 pipe.to(torch_dtype=torch.float16)
 pipe.to("cuda")

 api = HfApi()
 pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16)
+pipe.load_lora_weights("stabilityai/stable-diffusion-xl-base-1.0", weight_name="sd_xl_offset_example-lora_1.0.safetensors")
+# pipe.unet.fuse_lora()
+# 7.8 it/s to beat
+#
 pipe.to(torch_dtype=torch.float16)
 pipe.to("cuda")

sd_xl_inpaint.py ADDED Viewed

	@@ -0,0 +1,76 @@

+#!/usr/bin/env python3
+from diffusers import AutoPipelineForInpainting, AutoPipelineForImage2Image
+from diffusers.utils import load_image
+import torch
+from pathlib import Path
+import os
+from huggingface_hub import HfApi
+torch.backends.cuda.matmul.allow_tf32 = True
+torch_device = "cuda" if torch.cuda.is_available() else "cpu"
+api = HfApi()
+pipe = AutoPipelineForInpainting.from_pretrained("runwayml/stable-diffusion-inpainting", torch_dtype=torch.float16, variant="fp16", use_safetensors=True)
+pipe = pipe.to(torch_device)
+pipe.enable_xformers_memory_efficient_attention()
+img_url = "https://raw.githubusercontent.com/CompVis/latent-diffusion/main/data/inpainting_examples/overture-creations-5sI6fQgYIuo.png"
+mask_url = "https://raw.githubusercontent.com/CompVis/latent-diffusion/main/data/inpainting_examples/overture-creations-5sI6fQgYIuo_mask.png"
+image = load_image(img_url)
+mask_image = load_image(mask_url)
+prompt = "dslr photography of an empty bench, high quality"
+generator = torch.Generator(device="cuda").manual_seed(0)
+image = pipe(
+    prompt=prompt,
+    image=image,
+    mask_image=mask_image,
+    guidance_scale=8.0,
+    num_inference_steps=20,
+    generator=generator,
+).images[0]
+image = image.resize((1024, 1024))
+pipe = AutoPipelineForInpainting.from_pretrained("stabilityai/stable-diffusion-xl-refiner-1.0", torch_dtype=torch.float16, variant="fp16", use_safetensors=True)
+pipe.to("cuda")
+pipe.enable_xformers_memory_efficient_attention()
+image = pipe(
+    prompt=prompt,
+    image=image,
+    mask_image=mask_image,
+    guidance_scale=8.0,
+    num_inference_steps=100,
+    strength=0.2,
+    generator=generator,
+).images[0]
+pipe = AutoPipelineForImage2Image.from_pipe(pipe)
+pipe.enable_xformers_memory_efficient_attention()
+image = pipe(
+    prompt=prompt,
+    image=image,
+    guidance_scale=8.0,
+    num_inference_steps=100,
+    strength=0.2,
+    generator=generator,
+).images[0]
+file_name = f"aaa"
+path = os.path.join(Path.home(), "images", "ediffi_sdxl", f"{file_name}.png")
+image.save(path)
+api.upload_file(
+    path_or_fileobj=path,
+    path_in_repo=path.split("/")[-1],
+    repo_id="patrickvonplaten/images",
+    repo_type="dataset",
+)
+print(f"https://huggingface.co/datasets/patrickvonplaten/images/blob/main/{file_name}.png")

train_unet.py ADDED Viewed

	@@ -0,0 +1,24 @@

+#!/usr/bin/env python3
+from diffusers import UNet2DConditionModel
+import torch
+torch.cuda.set_per_process_memory_fraction(0.5, device="cuda:1")
+unet = UNet2DConditionModel.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", subfolder="unet", variant="fp16", torch_dtype=torch.float16)
+unet.train()
+unet.enable_gradient_checkpointing()
+unet = unet.to("cuda:1")
+batch_size = 2
+sample = torch.randn((1, 4, 128, 128)).half().to(unet.device).repeat(batch_size, 1, 1, 1)
+time_ids = (torch.arange(6) / 6)[None, :].half().to(unet.device).repeat(batch_size, 1)
+encoder_hidden_states = torch.randn((1, 77, 2048)).half().to(unet.device).repeat(batch_size, 1, 1)
+text_embeds = torch.randn((1, 1280)).half().to(unet.device).repeat(batch_size, 1)
+out = unet(sample, 1.0, added_cond_kwargs={"time_ids": time_ids, "text_embeds": text_embeds}, encoder_hidden_states=encoder_hidden_states).sample
+loss = ((out - sample) ** 2).mean()
+loss.backward()
+print(torch.cuda.max_memory_allocated(device=unet.device))