Spaces:

multimodalart
/

stable-video-diffusion

Running on Zero

App Files Files Community

diffusers-backend

by multimodalart HF staff - opened Nov 28, 2023

base: refs/heads/main

←

from: refs/pr/6

Discussion Files changed

+50

-16657

This view is limited to 50 files because it contains too many changes. See the raw diff here.

Files changed (50) hide show

.gitattributes +0 -38
CODEOWNERS +0 -1
LICENSE-CODE +0 -21
app.py +45 -211
assets/000.jpg +0 -0
assets/001_with_eval.png +0 -3
assets/test_image.png +0 -0
assets/tile.gif +0 -3
configs/.DS_Store +0 -0
configs/example_training/autoencoder/kl-f4/imagenet-attnfree-logvar.yaml +0 -104
configs/example_training/autoencoder/kl-f4/imagenet-kl_f8_8chn.yaml +0 -105
configs/example_training/imagenet-f8_cond.yaml +0 -185
configs/example_training/toy/cifar10_cond.yaml +0 -98
configs/example_training/toy/mnist.yaml +0 -79
configs/example_training/toy/mnist_cond.yaml +0 -98
configs/example_training/toy/mnist_cond_discrete_eps.yaml +0 -103
configs/example_training/toy/mnist_cond_l1_loss.yaml +0 -99
configs/example_training/toy/mnist_cond_with_ema.yaml +0 -100
configs/example_training/txt2img-clipl-legacy-ucg-training.yaml +0 -182
configs/example_training/txt2img-clipl.yaml +0 -184
configs/inference/sd_2_1.yaml +0 -60
configs/inference/sd_2_1_768.yaml +0 -60
configs/inference/sd_xl_base.yaml +0 -93
configs/inference/sd_xl_refiner.yaml +0 -86
configs/inference/svd.yaml +0 -131
configs/inference/svd_image_decoder.yaml +0 -114
data/DejaVuSans.ttf +0 -0
images/blink_meme.png +0 -0
images/confused2_meme.png +0 -0
images/confused_meme.png +0 -0
images/disaster_meme.png +0 -0
images/distracted_meme.png +0 -0
images/hide_meme.png +0 -0
images/nazare_meme.png +0 -0
images/success_meme.png +0 -0
images/willy_meme.png +0 -0
images/wink_meme.png +0 -0
main.py +0 -943
model_licenses/LICENSE-SDV +0 -31
model_licenses/LICENSE-SDXL0.9 +0 -75
model_licenses/LICENSE-SDXL1.0 +0 -175
pyproject.toml +0 -48
pytest.ini +0 -3
requirements.txt +5 -40
requirements/pt13.txt +0 -40
requirements/pt2.txt +0 -39
scripts/.DS_Store +0 -0
scripts/__init__.py +0 -0
scripts/demo/__init__.py +0 -0
scripts/demo/detect.py +0 -156

.gitattributes DELETED Viewed

@@ -1,38 +0,0 @@
-*.7z filter=lfs diff=lfs merge=lfs -text
-*.arrow filter=lfs diff=lfs merge=lfs -text
-*.bin filter=lfs diff=lfs merge=lfs -text
-*.bz2 filter=lfs diff=lfs merge=lfs -text
-*.ckpt filter=lfs diff=lfs merge=lfs -text
-*.ftz filter=lfs diff=lfs merge=lfs -text
-*.gz filter=lfs diff=lfs merge=lfs -text
-*.h5 filter=lfs diff=lfs merge=lfs -text
-*.joblib filter=lfs diff=lfs merge=lfs -text
-*.lfs.* filter=lfs diff=lfs merge=lfs -text
-*.mlmodel filter=lfs diff=lfs merge=lfs -text
-*.model filter=lfs diff=lfs merge=lfs -text
-*.msgpack filter=lfs diff=lfs merge=lfs -text
-*.npy filter=lfs diff=lfs merge=lfs -text
-*.npz filter=lfs diff=lfs merge=lfs -text
-*.onnx filter=lfs diff=lfs merge=lfs -text
-*.ot filter=lfs diff=lfs merge=lfs -text
-*.parquet filter=lfs diff=lfs merge=lfs -text
-*.pb filter=lfs diff=lfs merge=lfs -text
-*.pickle filter=lfs diff=lfs merge=lfs -text
-*.pkl filter=lfs diff=lfs merge=lfs -text
-*.pt filter=lfs diff=lfs merge=lfs -text
-*.pth filter=lfs diff=lfs merge=lfs -text
-*.rar filter=lfs diff=lfs merge=lfs -text
-*.safetensors filter=lfs diff=lfs merge=lfs -text
-saved_model/**/* filter=lfs diff=lfs merge=lfs -text
-*.tar.* filter=lfs diff=lfs merge=lfs -text
-*.tar filter=lfs diff=lfs merge=lfs -text
-*.tflite filter=lfs diff=lfs merge=lfs -text
-*.tgz filter=lfs diff=lfs merge=lfs -text
-*.wasm filter=lfs diff=lfs merge=lfs -text
-*.xz filter=lfs diff=lfs merge=lfs -text
-*.zip filter=lfs diff=lfs merge=lfs -text
-*.zst filter=lfs diff=lfs merge=lfs -text
-*tfevents* filter=lfs diff=lfs merge=lfs -text
-assets/001_with_eval.png filter=lfs diff=lfs merge=lfs -text
-assets/tile.gif filter=lfs diff=lfs merge=lfs -text
-outputs/000004.mp4 filter=lfs diff=lfs merge=lfs -text

CODEOWNERS DELETED Viewed

	@@ -1 +0,0 @@
1	- .github @Stability-AI/infrastructure

LICENSE-CODE DELETED Viewed

@@ -1,21 +0,0 @@
-MIT License
-Copyright (c) 2023 Stability AI
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.

app.py CHANGED Viewed

@@ -1,243 +1,59 @@
-import math
 import os
 from glob import glob
 from pathlib import Path
 from typing import Optional
-import cv2
-import numpy as np
-import torch
-from einops import rearrange, repeat
-from fire import Fire
-from omegaconf import OmegaConf
 from PIL import Image
-from torchvision.transforms import ToTensor
-from scripts.util.detection.nsfw_and_watermark_dectection import \
-    DeepFloydDataFiltering
-from sgm.inference.helpers import embed_watermark
-from sgm.util import default, instantiate_from_config
-import gradio as gr
 import uuid
 import random
 from huggingface_hub import hf_hub_download
-hf_hub_download(repo_id="stabilityai/stable-video-diffusion-img2vid-xt", filename="svd_xt.safetensors", local_dir="checkpoints")
-version = "svd_xt"
-device = "cuda"
-max_64_bit_int = 2**63 - 1
-def load_model(
-    config: str,
-    device: str,
-    num_frames: int,
-    num_steps: int,
-):
-    config = OmegaConf.load(config)
-    if device == "cuda":
-        config.model.params.conditioner_config.params.emb_models[
-            0
-        ].params.open_clip_embedding_config.params.init_device = device
-    config.model.params.sampler_config.params.num_steps = num_steps
-    config.model.params.sampler_config.params.guider_config.params.num_frames = (
-        num_frames
-    )
-    if device == "cuda":
-        with torch.device(device):
-            model = instantiate_from_config(config.model).to(device).eval()
-    else:
-        model = instantiate_from_config(config.model).to(device).eval()
-    filter = DeepFloydDataFiltering(verbose=False, device=device)
-    return model, filter
-if version == "svd_xt":
-    num_frames = 25
-    num_steps = 30
-    model_config = "scripts/sampling/configs/svd_xt.yaml"
-else:
-    raise ValueError(f"Version {version} does not exist.")
-model, filter = load_model(
-    model_config,
-    device,
-    num_frames,
-    num_steps,
 )
 def sample(
     image: Image,
-    seed: Optional[int] = None,
     randomize_seed: bool = True,
     motion_bucket_id: int = 127,
     fps_id: int = 6,
     version: str = "svd_xt",
     cond_aug: float = 0.02,
-    decoding_t: int = 5,  # Number of frames decoded at a time! This eats most VRAM. Reduce if necessary.
     device: str = "cuda",
     output_folder: str = "outputs",
-    progress=gr.Progress(track_tqdm=True)
 ):
     if(randomize_seed):
         seed = random.randint(0, max_64_bit_int)
-    torch.manual_seed(seed)
-    if image.mode == "RGBA":
-        image = image.convert("RGB")
-    w, h = image.size
-    if h % 64 != 0 or w % 64 != 0:
-        width, height = map(lambda x: x - x % 64, (w, h))
-        image = image.resize((width, height))
-        print(
-            f"WARNING: Your image is of size {h}x{w} which is not divisible by 64. We are resizing to {height}x{width}!"
-        )
-    image = ToTensor()(image)
-    image = image * 2.0 - 1.0
-    image = image.unsqueeze(0).to(device)
-    H, W = image.shape[2:]
-    assert image.shape[1] == 3
-    F = 8
-    C = 4
-    shape = (num_frames, C, H // F, W // F)
-    if (H, W) != (576, 1024):
-        print(
-            "WARNING: The conditioning frame you provided is not 576x1024. This leads to suboptimal performance as model was only trained on 576x1024. Consider increasing `cond_aug`."
-        )
-    if motion_bucket_id > 255:
-        print(
-            "WARNING: High motion bucket! This may lead to suboptimal performance."
-        )
-    if fps_id < 5:
-        print("WARNING: Small fps value! This may lead to suboptimal performance.")
-    if fps_id > 30:
-        print("WARNING: Large fps value! This may lead to suboptimal performance.")
-    value_dict = {}
-    value_dict["motion_bucket_id"] = motion_bucket_id
-    value_dict["fps_id"] = fps_id
-    value_dict["cond_aug"] = cond_aug
-    value_dict["cond_frames_without_noise"] = image
-    value_dict["cond_frames"] = image + cond_aug * torch.randn_like(image)
-    value_dict["cond_aug"] = cond_aug
-    with torch.no_grad():
-        with torch.autocast(device):
-            batch, batch_uc = get_batch(
-                get_unique_embedder_keys_from_conditioner(model.conditioner),
-                value_dict,
-                [1, num_frames],
-                T=num_frames,
-                device=device,
-            )
-            c, uc = model.conditioner.get_unconditional_conditioning(
-                batch,
-                batch_uc=batch_uc,
-                force_uc_zero_embeddings=[
-                    "cond_frames",
-                    "cond_frames_without_noise",
-                ],
-            )
-            for k in ["crossattn", "concat"]:
-                uc[k] = repeat(uc[k], "b ... -> b t ...", t=num_frames)
-                uc[k] = rearrange(uc[k], "b t ... -> (b t) ...", t=num_frames)
-                c[k] = repeat(c[k], "b ... -> b t ...", t=num_frames)
-                c[k] = rearrange(c[k], "b t ... -> (b t) ...", t=num_frames)
-            randn = torch.randn(shape, device=device)
-            additional_model_inputs = {}
-            additional_model_inputs["image_only_indicator"] = torch.zeros(
-                2, num_frames
-            ).to(device)
-            additional_model_inputs["num_video_frames"] = batch["num_video_frames"]
-            def denoiser(input, sigma, c):
-                return model.denoiser(
-                    model.model, input, sigma, c, **additional_model_inputs
-                )
-            samples_z = model.sampler(denoiser, randn, cond=c, uc=uc)
-            model.en_and_decode_n_samples_a_time = decoding_t
-            samples_x = model.decode_first_stage(samples_z)
-            samples = torch.clamp((samples_x + 1.0) / 2.0, min=0.0, max=1.0)
-            os.makedirs(output_folder, exist_ok=True)
-            base_count = len(glob(os.path.join(output_folder, "*.mp4")))
-            video_path = os.path.join(output_folder, f"{base_count:06d}.mp4")
-            writer = cv2.VideoWriter(
-                video_path,
-                cv2.VideoWriter_fourcc(*"mp4v"),
-                fps_id + 1,
-                (samples.shape[-1], samples.shape[-2]),
-            )
-            samples = embed_watermark(samples)
-            samples = filter(samples)
-            vid = (
-                (rearrange(samples, "t c h w -> t h w c") * 255)
-                .cpu()
-                .numpy()
-                .astype(np.uint8)
-            )
-            for frame in vid:
-                frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
-                writer.write(frame)
-            writer.release()
     return video_path, seed
-def get_unique_embedder_keys_from_conditioner(conditioner):
-    return list(set([x.input_key for x in conditioner.embedders]))
-def get_batch(keys, value_dict, N, T, device):
-    batch = {}
-    batch_uc = {}
-    for key in keys:
-        if key == "fps_id":
-            batch[key] = (
-                torch.tensor([value_dict["fps_id"]])
-                .to(device)
-                .repeat(int(math.prod(N)))
-            )
-        elif key == "motion_bucket_id":
-            batch[key] = (
-                torch.tensor([value_dict["motion_bucket_id"]])
-                .to(device)
-                .repeat(int(math.prod(N)))
-            )
-        elif key == "cond_aug":
-            batch[key] = repeat(
-                torch.tensor([value_dict["cond_aug"]]).to(device),
-                "1 -> b",
-                b=math.prod(N),
-            )
-        elif key == "cond_frames":
-            batch[key] = repeat(value_dict["cond_frames"], "1 ... -> b ...", b=N[0])
-        elif key == "cond_frames_without_noise":
-            batch[key] = repeat(
-                value_dict["cond_frames_without_noise"], "1 ... -> b ...", b=N[0]
-            )
-        else:
-            batch[key] = value_dict[key]
-    if T is not None:
-        batch["num_video_frames"] = T
-    for key in batch.keys():
-        if key not in batch_uc and isinstance(batch[key], torch.Tensor):
-            batch_uc[key] = torch.clone(batch[key])
-    return batch, batch_uc
 def resize_image(image, output_size=(1024, 576)):
     # Calculate aspect ratios
     target_aspect = output_size[0] / output_size[1]  # Aspect ratio of the desired size
@@ -286,7 +102,25 @@ with gr.Blocks() as demo:
   image.upload(fn=resize_image, inputs=image, outputs=image, queue=False)
   generate_btn.click(fn=sample, inputs=[image, seed, randomize_seed, motion_bucket_id, fps_id], outputs=[video, seed], api_name="video")
 if __name__ == "__main__":
     demo.queue(max_size=20)
     demo.launch(share=True)

+import gradio as gr
+import gradio.helpers
+import torch
 import os
 from glob import glob
 from pathlib import Path
 from typing import Optional
+from diffusers import StableVideoDiffusionPipeline
+from diffusers.utils import load_image, export_to_video
 from PIL import Image
 import uuid
 import random
 from huggingface_hub import hf_hub_download
+gradio.helpers.CACHED_FOLDER = '/data/cache'
+pipe = StableVideoDiffusionPipeline.from_pretrained(
+    "stabilityai/stable-video-diffusion-img2vid-xt", torch_dtype=torch.float16, variant="fp16"
 )
+pipe.to("cuda")
+pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
+pipe.vae = torch.compile(pipe.vae, mode="reduce-overhead", fullgraph=True)
+max_64_bit_int = 2**63 - 1
 def sample(
     image: Image,
+    seed: Optional[int] = 42,
     randomize_seed: bool = True,
     motion_bucket_id: int = 127,
     fps_id: int = 6,
     version: str = "svd_xt",
     cond_aug: float = 0.02,
+    decoding_t: int = 3,  # Number of frames decoded at a time! This eats most VRAM. Reduce if necessary.
     device: str = "cuda",
     output_folder: str = "outputs",
 ):
+    if image.mode == "RGBA":
+        image = image.convert("RGB")
     if(randomize_seed):
         seed = random.randint(0, max_64_bit_int)
+    generator = torch.manual_seed(seed)
+    os.makedirs(output_folder, exist_ok=True)
+    base_count = len(glob(os.path.join(output_folder, "*.mp4")))
+    video_path = os.path.join(output_folder, f"{base_count:06d}.mp4")
+    frames = pipe(image, decode_chunk_size=decoding_t, generator=generator, motion_bucket_id=motion_bucket_id, noise_aug_strength=0.1).frames[0]
+    export_to_video(frames, video_path, fps=fps_id)
+    torch.manual_seed(seed)
     return video_path, seed
 def resize_image(image, output_size=(1024, 576)):
     # Calculate aspect ratios
     target_aspect = output_size[0] / output_size[1]  # Aspect ratio of the desired size
   image.upload(fn=resize_image, inputs=image, outputs=image, queue=False)
   generate_btn.click(fn=sample, inputs=[image, seed, randomize_seed, motion_bucket_id, fps_id], outputs=[video, seed], api_name="video")
+  gr.Examples(
+    examples=[
+        "images/blink_meme.png",
+        "images/confused2_meme.png",
+        "images/confused_meme.png",
+        "images/disaster_meme.png",
+        "images/distracted_meme.png",
+        "images/hide_meme.png",
+        "images/nazare_meme.png",
+        "images/success_meme.png",
+        "images/willy_meme.png",
+        "images/wink_meme.png"
+    ],
+    inputs=image,
+    outputs=[video, seed],
+    fn=sample,
+    cache_examples=True,
+  )
 if __name__ == "__main__":
     demo.queue(max_size=20)
     demo.launch(share=True)

assets/000.jpg DELETED Viewed

Binary file (728 kB)

assets/001_with_eval.png DELETED Viewed

Git LFS Details

SHA256: 026fa14e30098729064a00fb7fcec41bb57dcddb33b36b548d553f601bc53634
Pointer size: 132 Bytes
Size of remote file: 4.19 MB

assets/test_image.png DELETED Viewed

Binary file (494 kB)

assets/tile.gif DELETED Viewed

Git LFS Details

SHA256: 2340a9809e36fa9634633c7cc5fd256737c620ba47151726c85173512dc5c8ff
Pointer size: 133 Bytes
Size of remote file: 18.6 MB

configs/.DS_Store DELETED Viewed

Binary file (6.15 kB)

configs/example_training/autoencoder/kl-f4/imagenet-attnfree-logvar.yaml DELETED Viewed

@@ -1,104 +0,0 @@
-model:
-  base_learning_rate: 4.5e-6
-  target: sgm.models.autoencoder.AutoencodingEngine
-  params:
-    input_key: jpg
-    monitor: val/rec_loss
-    loss_config:
-      target: sgm.modules.autoencoding.losses.GeneralLPIPSWithDiscriminator
-      params:
-        perceptual_weight: 0.25
-        disc_start: 20001
-        disc_weight: 0.5
-        learn_logvar: True
-        regularization_weights:
-          kl_loss: 1.0
-    regularizer_config:
-      target: sgm.modules.autoencoding.regularizers.DiagonalGaussianRegularizer
-    encoder_config:
-      target: sgm.modules.diffusionmodules.model.Encoder
-      params:
-        attn_type: none
-        double_z: True
-        z_channels: 4
-        resolution: 256
-        in_channels: 3
-        out_ch: 3
-        ch: 128
-        ch_mult: [1, 2, 4]
-        num_res_blocks: 4
-        attn_resolutions: []
-        dropout: 0.0
-    decoder_config:
-      target: sgm.modules.diffusionmodules.model.Decoder
-      params: ${model.params.encoder_config.params}
-data:
-  target: sgm.data.dataset.StableDataModuleFromConfig
-  params:
-    train:
-      datapipeline:
-        urls:
-          - DATA-PATH
-        pipeline_config:
-          shardshuffle: 10000
-          sample_shuffle: 10000
-        decoders:
-          - pil
-        postprocessors:
-          - target: sdata.mappers.TorchVisionImageTransforms
-            params:
-              key: jpg
-              transforms:
-                - target: torchvision.transforms.Resize
-                  params:
-                    size: 256
-                    interpolation: 3
-                - target: torchvision.transforms.ToTensor
-          - target: sdata.mappers.Rescaler
-          - target: sdata.mappers.AddOriginalImageSizeAsTupleAndCropToSquare
-            params:
-              h_key: height
-              w_key: width
-      loader:
-        batch_size: 8
-        num_workers: 4
-lightning:
-  strategy:
-    target: pytorch_lightning.strategies.DDPStrategy
-    params:
-      find_unused_parameters: True
-  modelcheckpoint:
-    params:
-      every_n_train_steps: 5000
-  callbacks:
-    metrics_over_trainsteps_checkpoint:
-      params:
-        every_n_train_steps: 50000
-    image_logger:
-      target: main.ImageLogger
-      params:
-        enable_autocast: False
-        batch_frequency: 1000
-        max_images: 8
-        increase_log_steps: True
-  trainer:
-    devices: 0,
-    limit_val_batches: 50
-    benchmark: True
-    accumulate_grad_batches: 1
-    val_check_interval: 10000

configs/example_training/autoencoder/kl-f4/imagenet-kl_f8_8chn.yaml DELETED Viewed

@@ -1,105 +0,0 @@
-model:
-  base_learning_rate: 4.5e-6
-  target: sgm.models.autoencoder.AutoencodingEngine
-  params:
-    input_key: jpg
-    monitor: val/loss/rec
-    disc_start_iter: 0
-    encoder_config:
-      target: sgm.modules.diffusionmodules.model.Encoder
-      params:
-        attn_type: vanilla-xformers
-        double_z: true
-        z_channels: 8
-        resolution: 256
-        in_channels: 3
-        out_ch: 3
-        ch: 128
-        ch_mult: [1, 2, 4, 4]
-        num_res_blocks: 2
-        attn_resolutions: []
-        dropout: 0.0
-    decoder_config:
-      target: sgm.modules.diffusionmodules.model.Decoder
-      params: ${model.params.encoder_config.params}
-    regularizer_config:
-      target: sgm.modules.autoencoding.regularizers.DiagonalGaussianRegularizer
-    loss_config:
-      target: sgm.modules.autoencoding.losses.GeneralLPIPSWithDiscriminator
-      params:
-        perceptual_weight: 0.25
-        disc_start: 20001
-        disc_weight: 0.5
-        learn_logvar: True
-        regularization_weights:
-          kl_loss: 1.0
-data:
-  target: sgm.data.dataset.StableDataModuleFromConfig
-  params:
-    train:
-      datapipeline:
-        urls:
-          - DATA-PATH
-        pipeline_config:
-          shardshuffle: 10000
-          sample_shuffle: 10000
-        decoders:
-          - pil
-        postprocessors:
-          - target: sdata.mappers.TorchVisionImageTransforms
-            params:
-              key: jpg
-              transforms:
-                - target: torchvision.transforms.Resize
-                  params:
-                    size: 256
-                    interpolation: 3
-                - target: torchvision.transforms.ToTensor
-          - target: sdata.mappers.Rescaler
-          - target: sdata.mappers.AddOriginalImageSizeAsTupleAndCropToSquare
-            params:
-              h_key: height
-              w_key: width
-      loader:
-        batch_size: 8
-        num_workers: 4
-lightning:
-  strategy:
-    target: pytorch_lightning.strategies.DDPStrategy
-    params:
-      find_unused_parameters: True
-  modelcheckpoint:
-    params:
-      every_n_train_steps: 5000
-  callbacks:
-    metrics_over_trainsteps_checkpoint:
-      params:
-        every_n_train_steps: 50000
-    image_logger:
-      target: main.ImageLogger
-      params:
-        enable_autocast: False
-        batch_frequency: 1000
-        max_images: 8
-        increase_log_steps: True
-  trainer:
-    devices: 0,
-    limit_val_batches: 50
-    benchmark: True
-    accumulate_grad_batches: 1
-    val_check_interval: 10000

configs/example_training/imagenet-f8_cond.yaml DELETED Viewed

@@ -1,185 +0,0 @@
-model:
-  base_learning_rate: 1.0e-4
-  target: sgm.models.diffusion.DiffusionEngine
-  params:
-    scale_factor: 0.13025
-    disable_first_stage_autocast: True
-    log_keys:
-      - cls
-    scheduler_config:
-      target: sgm.lr_scheduler.LambdaLinearScheduler
-      params:
-        warm_up_steps: [10000]
-        cycle_lengths: [10000000000000]
-        f_start: [1.e-6]
-        f_max: [1.]
-        f_min: [1.]
-    denoiser_config:
-      target: sgm.modules.diffusionmodules.denoiser.DiscreteDenoiser
-      params:
-        num_idx: 1000
-        scaling_config:
-          target: sgm.modules.diffusionmodules.denoiser_scaling.EpsScaling
-        discretization_config:
-          target: sgm.modules.diffusionmodules.discretizer.LegacyDDPMDiscretization
-    network_config:
-      target: sgm.modules.diffusionmodules.openaimodel.UNetModel
-      params:
-        use_checkpoint: True
-        in_channels: 4
-        out_channels: 4
-        model_channels: 256
-        attention_resolutions: [1, 2, 4]
-        num_res_blocks: 2
-        channel_mult: [1, 2, 4]
-        num_head_channels: 64
-        num_classes: sequential
-        adm_in_channels: 1024
-        transformer_depth: 1
-        context_dim: 1024
-        spatial_transformer_attn_type: softmax-xformers
-    conditioner_config:
-      target: sgm.modules.GeneralConditioner
-      params:
-        emb_models:
-          - is_trainable: True
-            input_key: cls
-            ucg_rate: 0.2
-            target: sgm.modules.encoders.modules.ClassEmbedder
-            params:
-              add_sequence_dim: True
-              embed_dim: 1024
-              n_classes: 1000
-          - is_trainable: False
-            ucg_rate: 0.2
-            input_key: original_size_as_tuple
-            target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND
-            params:
-              outdim: 256
-          - is_trainable: False
-            input_key: crop_coords_top_left
-            ucg_rate: 0.2
-            target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND
-            params:
-              outdim: 256
-    first_stage_config:
-      target: sgm.models.autoencoder.AutoencoderKL
-      params:
-        ckpt_path: CKPT_PATH
-        embed_dim: 4
-        monitor: val/rec_loss
-        ddconfig:
-          attn_type: vanilla-xformers
-          double_z: true
-          z_channels: 4
-          resolution: 256
-          in_channels: 3
-          out_ch: 3
-          ch: 128
-          ch_mult: [1, 2, 4, 4]
-          num_res_blocks: 2
-          attn_resolutions: []
-          dropout: 0.0
-        lossconfig:
-          target: torch.nn.Identity
-    loss_fn_config:
-      target: sgm.modules.diffusionmodules.loss.StandardDiffusionLoss
-      params:
-        loss_weighting_config:
-          target: sgm.modules.diffusionmodules.loss_weighting.EpsWeighting
-        sigma_sampler_config:
-          target: sgm.modules.diffusionmodules.sigma_sampling.DiscreteSampling
-          params:
-            num_idx: 1000
-            discretization_config:
-              target: sgm.modules.diffusionmodules.discretizer.LegacyDDPMDiscretization
-    sampler_config:
-      target: sgm.modules.diffusionmodules.sampling.EulerEDMSampler
-      params:
-        num_steps: 50
-        discretization_config:
-          target: sgm.modules.diffusionmodules.discretizer.LegacyDDPMDiscretization
-        guider_config:
-          target: sgm.modules.diffusionmodules.guiders.VanillaCFG
-          params:
-            scale: 5.0
-data:
-  target: sgm.data.dataset.StableDataModuleFromConfig
-  params:
-    train:
-      datapipeline:
-        urls:
-          # USER: adapt this path the root of your custom dataset
-          - DATA_PATH
-        pipeline_config:
-          shardshuffle: 10000
-          sample_shuffle: 10000 # USER: you might wanna adapt depending on your available RAM
-        decoders:
-          - pil
-        postprocessors:
-          - target: sdata.mappers.TorchVisionImageTransforms
-            params:
-              key: jpg # USER: you might wanna adapt this for your custom dataset
-              transforms:
-                - target: torchvision.transforms.Resize
-                  params:
-                    size: 256
-                    interpolation: 3
-                - target: torchvision.transforms.ToTensor
-          - target: sdata.mappers.Rescaler
-          - target: sdata.mappers.AddOriginalImageSizeAsTupleAndCropToSquare
-            params:
-              h_key: height # USER: you might wanna adapt this for your custom dataset
-              w_key: width # USER: you might wanna adapt this for your custom dataset
-      loader:
-        batch_size: 64
-        num_workers: 6
-lightning:
-  modelcheckpoint:
-    params:
-      every_n_train_steps: 5000
-  callbacks:
-    metrics_over_trainsteps_checkpoint:
-      params:
-        every_n_train_steps: 25000
-    image_logger:
-      target: main.ImageLogger
-      params:
-        disabled: False
-        enable_autocast: False
-        batch_frequency: 1000
-        max_images: 8
-        increase_log_steps: True
-        log_first_step: False
-        log_images_kwargs:
-          use_ema_scope: False
-          N: 8
-          n_rows: 2
-  trainer:
-    devices: 0,
-    benchmark: True
-    num_sanity_val_steps: 0
-    accumulate_grad_batches: 1
-    max_epochs: 1000

configs/example_training/toy/cifar10_cond.yaml DELETED Viewed

@@ -1,98 +0,0 @@
-model:
-  base_learning_rate: 1.0e-4
-  target: sgm.models.diffusion.DiffusionEngine
-  params:
-    denoiser_config:
-      target: sgm.modules.diffusionmodules.denoiser.Denoiser
-      params:
-        scaling_config:
-          target: sgm.modules.diffusionmodules.denoiser_scaling.EDMScaling
-          params:
-            sigma_data: 1.0
-    network_config:
-      target: sgm.modules.diffusionmodules.openaimodel.UNetModel
-      params:
-        in_channels: 3
-        out_channels: 3
-        model_channels: 32
-        attention_resolutions: []
-        num_res_blocks: 4
-        channel_mult: [1, 2, 2]
-        num_head_channels: 32
-        num_classes: sequential
-        adm_in_channels: 128
-    conditioner_config:
-      target: sgm.modules.GeneralConditioner
-      params:
-        emb_models:
-          - is_trainable: True
-            input_key: cls
-            ucg_rate: 0.2
-            target: sgm.modules.encoders.modules.ClassEmbedder
-            params:
-              embed_dim: 128
-              n_classes: 10
-    first_stage_config:
-      target: sgm.models.autoencoder.IdentityFirstStage
-    loss_fn_config:
-      target: sgm.modules.diffusionmodules.loss.StandardDiffusionLoss
-      params:
-        loss_weighting_config:
-          target: sgm.modules.diffusionmodules.loss_weighting.EDMWeighting
-          params:
-            sigma_data: 1.0
-        sigma_sampler_config:
-          target: sgm.modules.diffusionmodules.sigma_sampling.EDMSampling
-    sampler_config:
-      target: sgm.modules.diffusionmodules.sampling.EulerEDMSampler
-      params:
-        num_steps: 50
-        discretization_config:
-          target: sgm.modules.diffusionmodules.discretizer.EDMDiscretization
-        guider_config:
-          target: sgm.modules.diffusionmodules.guiders.VanillaCFG
-          params:
-            scale: 3.0
-data:
-  target: sgm.data.cifar10.CIFAR10Loader
-  params:
-    batch_size: 512
-    num_workers: 1
-lightning:
-  modelcheckpoint:
-    params:
-      every_n_train_steps: 5000
-  callbacks:
-    metrics_over_trainsteps_checkpoint:
-      params:
-        every_n_train_steps: 25000
-    image_logger:
-      target: main.ImageLogger
-      params:
-        disabled: False
-        batch_frequency: 1000
-        max_images: 64
-        increase_log_steps: True
-        log_first_step: False
-        log_images_kwargs:
-          use_ema_scope: False
-          N: 64
-          n_rows: 8
-  trainer:
-    devices: 0,
-    benchmark: True
-    num_sanity_val_steps: 0
-    accumulate_grad_batches: 1
-    max_epochs: 20

configs/example_training/toy/mnist.yaml DELETED Viewed

@@ -1,79 +0,0 @@
-model:
-  base_learning_rate: 1.0e-4
-  target: sgm.models.diffusion.DiffusionEngine
-  params:
-    denoiser_config:
-      target: sgm.modules.diffusionmodules.denoiser.Denoiser
-      params:
-        scaling_config:
-          target: sgm.modules.diffusionmodules.denoiser_scaling.EDMScaling
-          params:
-            sigma_data: 1.0
-    network_config:
-      target: sgm.modules.diffusionmodules.openaimodel.UNetModel
-      params:
-        in_channels: 1
-        out_channels: 1
-        model_channels: 32
-        attention_resolutions: []
-        num_res_blocks: 4
-        channel_mult: [1, 2, 2]
-        num_head_channels: 32
-    first_stage_config:
-      target: sgm.models.autoencoder.IdentityFirstStage
-    loss_fn_config:
-      target: sgm.modules.diffusionmodules.loss.StandardDiffusionLoss
-      params:
-        loss_weighting_config:
-          target: sgm.modules.diffusionmodules.loss_weighting.EDMWeighting
-          params:
-            sigma_data: 1.0
-        sigma_sampler_config:
-          target: sgm.modules.diffusionmodules.sigma_sampling.EDMSampling
-    sampler_config:
-      target: sgm.modules.diffusionmodules.sampling.EulerEDMSampler
-      params:
-        num_steps: 50
-        discretization_config:
-          target: sgm.modules.diffusionmodules.discretizer.EDMDiscretization
-data:
-  target: sgm.data.mnist.MNISTLoader
-  params:
-    batch_size: 512
-    num_workers: 1
-lightning:
-  modelcheckpoint:
-    params:
-      every_n_train_steps: 5000
-  callbacks:
-    metrics_over_trainsteps_checkpoint:
-      params:
-        every_n_train_steps: 25000
-    image_logger:
-      target: main.ImageLogger
-      params:
-        disabled: False
-        batch_frequency: 1000
-        max_images: 64
-        increase_log_steps: False
-        log_first_step: False
-        log_images_kwargs:
-          use_ema_scope: False
-          N: 64
-          n_rows: 8
-  trainer:
-    devices: 0,
-    benchmark: True
-    num_sanity_val_steps: 0
-    accumulate_grad_batches: 1
-    max_epochs: 10

configs/example_training/toy/mnist_cond.yaml DELETED Viewed

@@ -1,98 +0,0 @@
-model:
-  base_learning_rate: 1.0e-4
-  target: sgm.models.diffusion.DiffusionEngine
-  params:
-    denoiser_config:
-      target: sgm.modules.diffusionmodules.denoiser.Denoiser
-      params:
-        scaling_config:
-          target: sgm.modules.diffusionmodules.denoiser_scaling.EDMScaling
-          params:
-            sigma_data: 1.0
-    network_config:
-      target: sgm.modules.diffusionmodules.openaimodel.UNetModel
-      params:
-        in_channels: 1
-        out_channels: 1
-        model_channels: 32
-        attention_resolutions: []
-        num_res_blocks: 4
-        channel_mult: [1, 2, 2]
-        num_head_channels: 32
-        num_classes: sequential
-        adm_in_channels: 128
-    conditioner_config:
-      target: sgm.modules.GeneralConditioner
-      params:
-        emb_models:
-          - is_trainable: True
-            input_key: cls
-            ucg_rate: 0.2
-            target: sgm.modules.encoders.modules.ClassEmbedder
-            params:
-              embed_dim: 128
-              n_classes: 10
-    first_stage_config:
-      target: sgm.models.autoencoder.IdentityFirstStage
-    loss_fn_config:
-      target: sgm.modules.diffusionmodules.loss.StandardDiffusionLoss
-      params:
-        loss_weighting_config:
-          target: sgm.modules.diffusionmodules.loss_weighting.EDMWeighting
-          params:
-            sigma_data: 1.0
-        sigma_sampler_config:
-          target: sgm.modules.diffusionmodules.sigma_sampling.EDMSampling
-    sampler_config:
-      target: sgm.modules.diffusionmodules.sampling.EulerEDMSampler
-      params:
-        num_steps: 50
-        discretization_config:
-          target: sgm.modules.diffusionmodules.discretizer.EDMDiscretization
-        guider_config:
-          target: sgm.modules.diffusionmodules.guiders.VanillaCFG
-          params:
-            scale: 3.0
-data:
-  target: sgm.data.mnist.MNISTLoader
-  params:
-    batch_size: 512
-    num_workers: 1
-lightning:
-  modelcheckpoint:
-    params:
-      every_n_train_steps: 5000
-  callbacks:
-    metrics_over_trainsteps_checkpoint:
-      params:
-        every_n_train_steps: 25000
-    image_logger:
-      target: main.ImageLogger
-      params:
-        disabled: False
-        batch_frequency: 1000
-        max_images: 16
-        increase_log_steps: True
-        log_first_step: False
-        log_images_kwargs:
-          use_ema_scope: False
-          N: 16
-          n_rows: 4
-  trainer:
-    devices: 0,
-    benchmark: True
-    num_sanity_val_steps: 0
-    accumulate_grad_batches: 1
-    max_epochs: 20

configs/example_training/toy/mnist_cond_discrete_eps.yaml DELETED Viewed

@@ -1,103 +0,0 @@
-model:
-  base_learning_rate: 1.0e-4
-  target: sgm.models.diffusion.DiffusionEngine
-  params:
-    denoiser_config:
-      target: sgm.modules.diffusionmodules.denoiser.DiscreteDenoiser
-      params:
-        num_idx: 1000
-        scaling_config:
-          target: sgm.modules.diffusionmodules.denoiser_scaling.EDMScaling
-        discretization_config:
-          target: sgm.modules.diffusionmodules.discretizer.LegacyDDPMDiscretization
-    network_config:
-      target: sgm.modules.diffusionmodules.openaimodel.UNetModel
-      params:
-        in_channels: 1
-        out_channels: 1
-        model_channels: 32
-        attention_resolutions: []
-        num_res_blocks: 4
-        channel_mult: [1, 2, 2]
-        num_head_channels: 32
-        num_classes: sequential
-        adm_in_channels: 128
-    conditioner_config:
-      target: sgm.modules.GeneralConditioner
-      params:
-        emb_models:
-          - is_trainable: True
-            input_key: cls
-            ucg_rate: 0.2
-            target: sgm.modules.encoders.modules.ClassEmbedder
-            params:
-              embed_dim: 128
-              n_classes: 10
-    first_stage_config:
-      target: sgm.models.autoencoder.IdentityFirstStage
-    loss_fn_config:
-      target: sgm.modules.diffusionmodules.loss.StandardDiffusionLoss
-      params:
-        loss_weighting_config:
-          target: sgm.modules.diffusionmodules.loss_weighting.EDMWeighting
-        sigma_sampler_config:
-          target: sgm.modules.diffusionmodules.sigma_sampling.DiscreteSampling
-          params:
-            num_idx: 1000
-            discretization_config:
-              target: sgm.modules.diffusionmodules.discretizer.LegacyDDPMDiscretization
-    sampler_config:
-      target: sgm.modules.diffusionmodules.sampling.EulerEDMSampler
-      params:
-        num_steps: 50
-        discretization_config:
-          target: sgm.modules.diffusionmodules.discretizer.LegacyDDPMDiscretization
-        guider_config:
-          target: sgm.modules.diffusionmodules.guiders.VanillaCFG
-          params:
-            scale: 5.0
-data:
-  target: sgm.data.mnist.MNISTLoader
-  params:
-    batch_size: 512
-    num_workers: 1
-lightning:
-  modelcheckpoint:
-    params:
-      every_n_train_steps: 5000
-  callbacks:
-    metrics_over_trainsteps_checkpoint:
-      params:
-        every_n_train_steps: 25000
-    image_logger:
-      target: main.ImageLogger
-      params:
-        disabled: False
-        batch_frequency: 1000
-        max_images: 16
-        increase_log_steps: True
-        log_first_step: False
-        log_images_kwargs:
-          use_ema_scope: False
-          N: 16
-          n_rows: 4
-  trainer:
-    devices: 0,
-    benchmark: True
-    num_sanity_val_steps: 0
-    accumulate_grad_batches: 1
-    max_epochs: 20

configs/example_training/toy/mnist_cond_l1_loss.yaml DELETED Viewed

@@ -1,99 +0,0 @@
-model:
-  base_learning_rate: 1.0e-4
-  target: sgm.models.diffusion.DiffusionEngine
-  params:
-    denoiser_config:
-      target: sgm.modules.diffusionmodules.denoiser.Denoiser
-      params:
-        scaling_config:
-          target: sgm.modules.diffusionmodules.denoiser_scaling.EDMScaling
-          params:
-            sigma_data: 1.0
-    network_config:
-      target: sgm.modules.diffusionmodules.openaimodel.UNetModel
-      params:
-        in_channels: 1
-        out_channels: 1
-        model_channels: 32
-        attention_resolutions: []
-        num_res_blocks: 4
-        channel_mult: [1, 2, 2]
-        num_head_channels: 32
-        num_classes: sequential
-        adm_in_channels: 128
-    conditioner_config:
-      target: sgm.modules.GeneralConditioner
-      params:
-        emb_models:
-          - is_trainable: True
-            input_key: cls
-            ucg_rate: 0.2
-            target: sgm.modules.encoders.modules.ClassEmbedder
-            params:
-              embed_dim: 128
-              n_classes: 10
-    first_stage_config:
-      target: sgm.models.autoencoder.IdentityFirstStage
-    loss_fn_config:
-      target: sgm.modules.diffusionmodules.loss.StandardDiffusionLoss
-      params:
-        loss_type: l1
-        loss_weighting_config:
-          target: sgm.modules.diffusionmodules.loss_weighting.EDMWeighting
-          params:
-            sigma_data: 1.0
-        sigma_sampler_config:
-          target: sgm.modules.diffusionmodules.sigma_sampling.EDMSampling
-    sampler_config:
-      target: sgm.modules.diffusionmodules.sampling.EulerEDMSampler
-      params:
-        num_steps: 50
-        discretization_config:
-          target: sgm.modules.diffusionmodules.discretizer.EDMDiscretization
-        guider_config:
-          target: sgm.modules.diffusionmodules.guiders.VanillaCFG
-          params:
-            scale: 3.0
-data:
-  target: sgm.data.mnist.MNISTLoader
-  params:
-    batch_size: 512
-    num_workers: 1
-lightning:
-  modelcheckpoint:
-    params:
-      every_n_train_steps: 5000
-  callbacks:
-    metrics_over_trainsteps_checkpoint:
-      params:
-        every_n_train_steps: 25000
-    image_logger:
-      target: main.ImageLogger
-      params:
-        disabled: False
-        batch_frequency: 1000
-        max_images: 64
-        increase_log_steps: True
-        log_first_step: False
-        log_images_kwargs:
-          use_ema_scope: False
-          N: 64
-          n_rows: 8
-  trainer:
-    devices: 0,
-    benchmark: True
-    num_sanity_val_steps: 0
-    accumulate_grad_batches: 1
-    max_epochs: 20

configs/example_training/toy/mnist_cond_with_ema.yaml DELETED Viewed

@@ -1,100 +0,0 @@
-model:
-  base_learning_rate: 1.0e-4
-  target: sgm.models.diffusion.DiffusionEngine
-  params:
-    use_ema: True
-    denoiser_config:
-      target: sgm.modules.diffusionmodules.denoiser.Denoiser
-      params:
-        scaling_config:
-          target: sgm.modules.diffusionmodules.denoiser_scaling.EDMScaling
-          params:
-            sigma_data: 1.0
-    network_config:
-      target: sgm.modules.diffusionmodules.openaimodel.UNetModel
-      params:
-        in_channels: 1
-        out_channels: 1
-        model_channels: 32
-        attention_resolutions: []
-        num_res_blocks: 4
-        channel_mult: [1, 2, 2]
-        num_head_channels: 32
-        num_classes: sequential
-        adm_in_channels: 128
-    conditioner_config:
-      target: sgm.modules.GeneralConditioner
-      params:
-        emb_models:
-          - is_trainable: True
-            input_key: cls
-            ucg_rate: 0.2
-            target: sgm.modules.encoders.modules.ClassEmbedder
-            params:
-              embed_dim: 128
-              n_classes: 10
-    first_stage_config:
-      target: sgm.models.autoencoder.IdentityFirstStage
-    loss_fn_config:
-      target: sgm.modules.diffusionmodules.loss.StandardDiffusionLoss
-      params:
-        loss_weighting_config:
-          target: sgm.modules.diffusionmodules.loss_weighting.EDMWeighting
-          params:
-            sigma_data: 1.0
-        sigma_sampler_config:
-          target: sgm.modules.diffusionmodules.sigma_sampling.EDMSampling
-    sampler_config:
-      target: sgm.modules.diffusionmodules.sampling.EulerEDMSampler
-      params:
-        num_steps: 50
-        discretization_config:
-          target: sgm.modules.diffusionmodules.discretizer.EDMDiscretization
-        guider_config:
-          target: sgm.modules.diffusionmodules.guiders.VanillaCFG
-          params:
-            scale: 3.0
-data:
-  target: sgm.data.mnist.MNISTLoader
-  params:
-    batch_size: 512
-    num_workers: 1
-lightning:
-  modelcheckpoint:
-    params:
-      every_n_train_steps: 5000
-  callbacks:
-    metrics_over_trainsteps_checkpoint:
-      params:
-        every_n_train_steps: 25000
-    image_logger:
-      target: main.ImageLogger
-      params:
-        disabled: False
-        batch_frequency: 1000
-        max_images: 64
-        increase_log_steps: True
-        log_first_step: False
-        log_images_kwargs:
-          use_ema_scope: False
-          N: 64
-          n_rows: 8
-  trainer:
-    devices: 0,
-    benchmark: True
-    num_sanity_val_steps: 0
-    accumulate_grad_batches: 1
-    max_epochs: 20

configs/example_training/txt2img-clipl-legacy-ucg-training.yaml DELETED Viewed

@@ -1,182 +0,0 @@
-model:
-  base_learning_rate: 1.0e-4
-  target: sgm.models.diffusion.DiffusionEngine
-  params:
-    scale_factor: 0.13025
-    disable_first_stage_autocast: True
-    log_keys:
-      - txt
-    scheduler_config:
-      target: sgm.lr_scheduler.LambdaLinearScheduler
-      params:
-        warm_up_steps: [10000]
-        cycle_lengths: [10000000000000]
-        f_start: [1.e-6]
-        f_max: [1.]
-        f_min: [1.]
-    denoiser_config:
-      target: sgm.modules.diffusionmodules.denoiser.DiscreteDenoiser
-      params:
-        num_idx: 1000
-        scaling_config:
-          target: sgm.modules.diffusionmodules.denoiser_scaling.EpsScaling
-        discretization_config:
-          target: sgm.modules.diffusionmodules.discretizer.LegacyDDPMDiscretization
-    network_config:
-      target: sgm.modules.diffusionmodules.openaimodel.UNetModel
-      params:
-        use_checkpoint: True
-        in_channels: 4
-        out_channels: 4
-        model_channels: 320
-        attention_resolutions: [1, 2, 4]
-        num_res_blocks: 2
-        channel_mult: [1, 2, 4, 4]
-        num_head_channels: 64
-        num_classes: sequential
-        adm_in_channels: 1792
-        num_heads: 1
-        transformer_depth: 1
-        context_dim: 768
-        spatial_transformer_attn_type: softmax-xformers
-    conditioner_config:
-      target: sgm.modules.GeneralConditioner
-      params:
-        emb_models:
-          - is_trainable: True
-            input_key: txt
-            ucg_rate: 0.1
-            legacy_ucg_value: ""
-            target: sgm.modules.encoders.modules.FrozenCLIPEmbedder
-            params:
-              always_return_pooled: True
-          - is_trainable: False
-            ucg_rate: 0.1
-            input_key: original_size_as_tuple
-            target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND
-            params:
-              outdim: 256
-          - is_trainable: False
-            input_key: crop_coords_top_left
-            ucg_rate: 0.1
-            target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND
-            params:
-              outdim: 256
-    first_stage_config:
-      target: sgm.models.autoencoder.AutoencoderKL
-      params:
-        ckpt_path: CKPT_PATH
-        embed_dim: 4
-        monitor: val/rec_loss
-        ddconfig:
-          attn_type: vanilla-xformers
-          double_z: true
-          z_channels: 4
-          resolution: 256
-          in_channels: 3
-          out_ch: 3
-          ch: 128
-          ch_mult: [ 1, 2, 4, 4 ]
-          num_res_blocks: 2
-          attn_resolutions: [ ]
-          dropout: 0.0
-        lossconfig:
-          target: torch.nn.Identity
-    loss_fn_config:
-      target: sgm.modules.diffusionmodules.loss.StandardDiffusionLoss
-      params:
-        loss_weighting_config:
-          target: sgm.modules.diffusionmodules.loss_weighting.EpsWeighting
-        sigma_sampler_config:
-          target: sgm.modules.diffusionmodules.sigma_sampling.DiscreteSampling
-          params:
-            num_idx: 1000
-            discretization_config:
-              target: sgm.modules.diffusionmodules.discretizer.LegacyDDPMDiscretization
-    sampler_config:
-      target: sgm.modules.diffusionmodules.sampling.EulerEDMSampler
-      params:
-        num_steps: 50
-        discretization_config:
-          target: sgm.modules.diffusionmodules.discretizer.LegacyDDPMDiscretization
-        guider_config:
-          target: sgm.modules.diffusionmodules.guiders.VanillaCFG
-          params:
-            scale: 7.5
-data:
-  target: sgm.data.dataset.StableDataModuleFromConfig
-  params:
-    train:
-      datapipeline:
-        urls:
-          # USER: adapt this path the root of your custom dataset
-          - DATA_PATH
-        pipeline_config:
-          shardshuffle: 10000
-          sample_shuffle: 10000 # USER: you might wanna adapt depending on your available RAM
-        decoders:
-          - pil
-        postprocessors:
-          - target: sdata.mappers.TorchVisionImageTransforms
-            params:
-              key: jpg # USER: you might wanna adapt this for your custom dataset
-              transforms:
-                - target: torchvision.transforms.Resize
-                  params:
-                    size: 256
-                    interpolation: 3
-                - target: torchvision.transforms.ToTensor
-          - target: sdata.mappers.Rescaler
-          - target: sdata.mappers.AddOriginalImageSizeAsTupleAndCropToSquare
-            # USER: you might wanna use non-default parameters due to your custom dataset
-      loader:
-        batch_size: 64
-        num_workers: 6
-lightning:
-  modelcheckpoint:
-    params:
-      every_n_train_steps: 5000
-  callbacks:
-    metrics_over_trainsteps_checkpoint:
-      params:
-        every_n_train_steps: 25000
-    image_logger:
-      target: main.ImageLogger
-      params:
-        disabled: False
-        enable_autocast: False
-        batch_frequency: 1000
-        max_images: 8
-        increase_log_steps: True
-        log_first_step: False
-        log_images_kwargs:
-          use_ema_scope: False
-          N: 8
-          n_rows: 2
-  trainer:
-    devices: 0,
-    benchmark: True
-    num_sanity_val_steps: 0
-    accumulate_grad_batches: 1
-    max_epochs: 1000

configs/example_training/txt2img-clipl.yaml DELETED Viewed

@@ -1,184 +0,0 @@
-model:
-  base_learning_rate: 1.0e-4
-  target: sgm.models.diffusion.DiffusionEngine
-  params:
-    scale_factor: 0.13025
-    disable_first_stage_autocast: True
-    log_keys:
-      - txt
-    scheduler_config:
-      target: sgm.lr_scheduler.LambdaLinearScheduler
-      params:
-        warm_up_steps: [10000]
-        cycle_lengths: [10000000000000]
-        f_start: [1.e-6]
-        f_max: [1.]
-        f_min: [1.]
-    denoiser_config:
-      target: sgm.modules.diffusionmodules.denoiser.DiscreteDenoiser
-      params:
-        num_idx: 1000
-        scaling_config:
-          target: sgm.modules.diffusionmodules.denoiser_scaling.EpsScaling
-        discretization_config:
-          target: sgm.modules.diffusionmodules.discretizer.LegacyDDPMDiscretization
-    network_config:
-      target: sgm.modules.diffusionmodules.openaimodel.UNetModel
-      params:
-        use_checkpoint: True
-        in_channels: 4
-        out_channels: 4
-        model_channels: 320
-        attention_resolutions: [1, 2, 4]
-        num_res_blocks: 2
-        channel_mult: [1, 2, 4, 4]
-        num_head_channels: 64
-        num_classes: sequential
-        adm_in_channels: 1792
-        num_heads: 1
-        transformer_depth: 1
-        context_dim: 768
-        spatial_transformer_attn_type: softmax-xformers
-    conditioner_config:
-      target: sgm.modules.GeneralConditioner
-      params:
-        emb_models:
-          - is_trainable: True
-            input_key: txt
-            ucg_rate: 0.1
-            legacy_ucg_value: ""
-            target: sgm.modules.encoders.modules.FrozenCLIPEmbedder
-            params:
-              always_return_pooled: True
-          - is_trainable: False
-            ucg_rate: 0.1
-            input_key: original_size_as_tuple
-            target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND
-            params:
-              outdim: 256
-          - is_trainable: False
-            input_key: crop_coords_top_left
-            ucg_rate: 0.1
-            target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND
-            params:
-              outdim: 256
-    first_stage_config:
-      target: sgm.models.autoencoder.AutoencoderKL
-      params:
-        ckpt_path: CKPT_PATH
-        embed_dim: 4
-        monitor: val/rec_loss
-        ddconfig:
-          attn_type: vanilla-xformers
-          double_z: true
-          z_channels: 4
-          resolution: 256
-          in_channels: 3
-          out_ch: 3
-          ch: 128
-          ch_mult: [1, 2, 4, 4]
-          num_res_blocks: 2
-          attn_resolutions: []
-          dropout: 0.0
-        lossconfig:
-          target: torch.nn.Identity
-    loss_fn_config:
-      target: sgm.modules.diffusionmodules.loss.StandardDiffusionLoss
-      params:
-        loss_weighting_config:
-          target: sgm.modules.diffusionmodules.loss_weighting.EpsWeighting
-        sigma_sampler_config:
-          target: sgm.modules.diffusionmodules.sigma_sampling.DiscreteSampling
-          params:
-            num_idx: 1000
-            discretization_config:
-              target: sgm.modules.diffusionmodules.discretizer.LegacyDDPMDiscretization
-    sampler_config:
-      target: sgm.modules.diffusionmodules.sampling.EulerEDMSampler
-      params:
-        num_steps: 50
-        discretization_config:
-          target: sgm.modules.diffusionmodules.discretizer.LegacyDDPMDiscretization
-        guider_config:
-          target: sgm.modules.diffusionmodules.guiders.VanillaCFG
-          params:
-            scale: 7.5
-data:
-  target: sgm.data.dataset.StableDataModuleFromConfig
-  params:
-    train:
-      datapipeline:
-        urls:
-          # USER: adapt this path the root of your custom dataset
-          - DATA_PATH
-        pipeline_config:
-          shardshuffle: 10000
-          sample_shuffle: 10000
-        decoders:
-          - pil
-        postprocessors:
-          - target: sdata.mappers.TorchVisionImageTransforms
-            params:
-              key: jpg # USER: you might wanna adapt this for your custom dataset
-              transforms:
-                - target: torchvision.transforms.Resize
-                  params:
-                    size: 256
-                    interpolation: 3
-                - target: torchvision.transforms.ToTensor
-          - target: sdata.mappers.Rescaler
-            # USER: you might wanna use non-default parameters due to your custom dataset
-          - target: sdata.mappers.AddOriginalImageSizeAsTupleAndCropToSquare
-            # USER: you might wanna use non-default parameters due to your custom dataset
-      loader:
-        batch_size: 64
-        num_workers: 6
-lightning:
-  modelcheckpoint:
-    params:
-      every_n_train_steps: 5000
-  callbacks:
-    metrics_over_trainsteps_checkpoint:
-      params:
-        every_n_train_steps: 25000
-    image_logger:
-      target: main.ImageLogger
-      params:
-        disabled: False
-        enable_autocast: False
-        batch_frequency: 1000
-        max_images: 8
-        increase_log_steps: True
-        log_first_step: False
-        log_images_kwargs:
-          use_ema_scope: False
-          N: 8
-          n_rows: 2
-  trainer:
-    devices: 0,
-    benchmark: True
-    num_sanity_val_steps: 0
-    accumulate_grad_batches: 1
-    max_epochs: 1000

configs/inference/sd_2_1.yaml DELETED Viewed

@@ -1,60 +0,0 @@
-model:
-  target: sgm.models.diffusion.DiffusionEngine
-  params:
-    scale_factor: 0.18215
-    disable_first_stage_autocast: True
-    denoiser_config:
-      target: sgm.modules.diffusionmodules.denoiser.DiscreteDenoiser
-      params:
-        num_idx: 1000
-        scaling_config:
-          target: sgm.modules.diffusionmodules.denoiser_scaling.EpsScaling
-        discretization_config:
-          target: sgm.modules.diffusionmodules.discretizer.LegacyDDPMDiscretization
-    network_config:
-      target: sgm.modules.diffusionmodules.openaimodel.UNetModel
-      params:
-        use_checkpoint: True
-        in_channels: 4
-        out_channels: 4
-        model_channels: 320
-        attention_resolutions: [4, 2, 1]
-        num_res_blocks: 2
-        channel_mult: [1, 2, 4, 4]
-        num_head_channels: 64
-        use_linear_in_transformer: True
-        transformer_depth: 1
-        context_dim: 1024
-    conditioner_config:
-      target: sgm.modules.GeneralConditioner
-      params:
-        emb_models:
-          - is_trainable: False
-            input_key: txt
-            target: sgm.modules.encoders.modules.FrozenOpenCLIPEmbedder
-            params:
-              freeze: true
-              layer: penultimate
-    first_stage_config:
-      target: sgm.models.autoencoder.AutoencoderKL
-      params:
-        embed_dim: 4
-        monitor: val/rec_loss
-        ddconfig:
-          double_z: true
-          z_channels: 4
-          resolution: 256
-          in_channels: 3
-          out_ch: 3
-          ch: 128
-          ch_mult: [1, 2, 4, 4]
-          num_res_blocks: 2
-          attn_resolutions: []
-          dropout: 0.0
-        lossconfig:
-          target: torch.nn.Identity

configs/inference/sd_2_1_768.yaml DELETED Viewed

@@ -1,60 +0,0 @@
-model:
-  target: sgm.models.diffusion.DiffusionEngine
-  params:
-    scale_factor: 0.18215
-    disable_first_stage_autocast: True
-    denoiser_config:
-      target: sgm.modules.diffusionmodules.denoiser.DiscreteDenoiser
-      params:
-        num_idx: 1000
-        scaling_config:
-          target: sgm.modules.diffusionmodules.denoiser_scaling.VScaling
-        discretization_config:
-          target: sgm.modules.diffusionmodules.discretizer.LegacyDDPMDiscretization
-    network_config:
-      target: sgm.modules.diffusionmodules.openaimodel.UNetModel
-      params:
-        use_checkpoint: True
-        in_channels: 4
-        out_channels: 4
-        model_channels: 320
-        attention_resolutions: [4, 2, 1]
-        num_res_blocks: 2
-        channel_mult: [1, 2, 4, 4]
-        num_head_channels: 64
-        use_linear_in_transformer: True
-        transformer_depth: 1
-        context_dim: 1024
-    conditioner_config:
-      target: sgm.modules.GeneralConditioner
-      params:
-        emb_models:
-          - is_trainable: False
-            input_key: txt
-            target: sgm.modules.encoders.modules.FrozenOpenCLIPEmbedder
-            params:
-              freeze: true
-              layer: penultimate
-    first_stage_config:
-      target: sgm.models.autoencoder.AutoencoderKL
-      params:
-        embed_dim: 4
-        monitor: val/rec_loss
-        ddconfig:
-          double_z: true
-          z_channels: 4
-          resolution: 256
-          in_channels: 3
-          out_ch: 3
-          ch: 128
-          ch_mult: [1, 2, 4, 4]
-          num_res_blocks: 2
-          attn_resolutions: []
-          dropout: 0.0
-        lossconfig:
-          target: torch.nn.Identity

configs/inference/sd_xl_base.yaml DELETED Viewed

@@ -1,93 +0,0 @@
-model:
-  target: sgm.models.diffusion.DiffusionEngine
-  params:
-    scale_factor: 0.13025
-    disable_first_stage_autocast: True
-    denoiser_config:
-      target: sgm.modules.diffusionmodules.denoiser.DiscreteDenoiser
-      params:
-        num_idx: 1000
-        scaling_config:
-          target: sgm.modules.diffusionmodules.denoiser_scaling.EpsScaling
-        discretization_config:
-          target: sgm.modules.diffusionmodules.discretizer.LegacyDDPMDiscretization
-    network_config:
-      target: sgm.modules.diffusionmodules.openaimodel.UNetModel
-      params:
-        adm_in_channels: 2816
-        num_classes: sequential
-        use_checkpoint: True
-        in_channels: 4
-        out_channels: 4
-        model_channels: 320
-        attention_resolutions: [4, 2]
-        num_res_blocks: 2
-        channel_mult: [1, 2, 4]
-        num_head_channels: 64
-        use_linear_in_transformer: True
-        transformer_depth: [1, 2, 10]
-        context_dim: 2048
-        spatial_transformer_attn_type: softmax-xformers
-    conditioner_config:
-      target: sgm.modules.GeneralConditioner
-      params:
-        emb_models:
-          - is_trainable: False
-            input_key: txt
-            target: sgm.modules.encoders.modules.FrozenCLIPEmbedder
-            params:
-              layer: hidden
-              layer_idx: 11
-          - is_trainable: False
-            input_key: txt
-            target: sgm.modules.encoders.modules.FrozenOpenCLIPEmbedder2
-            params:
-              arch: ViT-bigG-14
-              version: laion2b_s39b_b160k
-              freeze: True
-              layer: penultimate
-              always_return_pooled: True
-              legacy: False
-          - is_trainable: False
-            input_key: original_size_as_tuple
-            target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND
-            params:
-              outdim: 256
-          - is_trainable: False
-            input_key: crop_coords_top_left
-            target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND
-            params:
-              outdim: 256
-          - is_trainable: False
-            input_key: target_size_as_tuple
-            target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND
-            params:
-              outdim: 256
-    first_stage_config:
-      target: sgm.models.autoencoder.AutoencoderKL
-      params:
-        embed_dim: 4
-        monitor: val/rec_loss
-        ddconfig:
-          attn_type: vanilla-xformers
-          double_z: true
-          z_channels: 4
-          resolution: 256
-          in_channels: 3
-          out_ch: 3
-          ch: 128
-          ch_mult: [1, 2, 4, 4]
-          num_res_blocks: 2
-          attn_resolutions: []
-          dropout: 0.0
-        lossconfig:
-          target: torch.nn.Identity

configs/inference/sd_xl_refiner.yaml DELETED Viewed

@@ -1,86 +0,0 @@
-model:
-  target: sgm.models.diffusion.DiffusionEngine
-  params:
-    scale_factor: 0.13025
-    disable_first_stage_autocast: True
-    denoiser_config:
-      target: sgm.modules.diffusionmodules.denoiser.DiscreteDenoiser
-      params:
-        num_idx: 1000
-        scaling_config:
-          target: sgm.modules.diffusionmodules.denoiser_scaling.EpsScaling
-        discretization_config:
-          target: sgm.modules.diffusionmodules.discretizer.LegacyDDPMDiscretization
-    network_config:
-      target: sgm.modules.diffusionmodules.openaimodel.UNetModel
-      params:
-        adm_in_channels: 2560
-        num_classes: sequential
-        use_checkpoint: True
-        in_channels: 4
-        out_channels: 4
-        model_channels: 384
-        attention_resolutions: [4, 2]
-        num_res_blocks: 2
-        channel_mult: [1, 2, 4, 4]
-        num_head_channels: 64
-        use_linear_in_transformer: True
-        transformer_depth: 4
-        context_dim: [1280, 1280, 1280, 1280]
-        spatial_transformer_attn_type: softmax-xformers
-    conditioner_config:
-      target: sgm.modules.GeneralConditioner
-      params:
-        emb_models:
-          - is_trainable: False
-            input_key: txt
-            target: sgm.modules.encoders.modules.FrozenOpenCLIPEmbedder2
-            params:
-              arch: ViT-bigG-14
-              version: laion2b_s39b_b160k
-              legacy: False
-              freeze: True
-              layer: penultimate
-              always_return_pooled: True
-          - is_trainable: False
-            input_key: original_size_as_tuple
-            target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND
-            params:
-              outdim: 256
-          - is_trainable: False
-            input_key: crop_coords_top_left
-            target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND
-            params:
-              outdim: 256
-          - is_trainable: False
-            input_key: aesthetic_score
-            target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND
-            params:
-              outdim: 256
-    first_stage_config:
-      target: sgm.models.autoencoder.AutoencoderKL
-      params:
-        embed_dim: 4
-        monitor: val/rec_loss
-        ddconfig:
-          attn_type: vanilla-xformers
-          double_z: true
-          z_channels: 4
-          resolution: 256
-          in_channels: 3
-          out_ch: 3
-          ch: 128
-          ch_mult: [1, 2, 4, 4]
-          num_res_blocks: 2
-          attn_resolutions: []
-          dropout: 0.0
-        lossconfig:
-          target: torch.nn.Identity

configs/inference/svd.yaml DELETED Viewed

@@ -1,131 +0,0 @@
-model:
-  target: sgm.models.diffusion.DiffusionEngine
-  params:
-    scale_factor: 0.18215
-    disable_first_stage_autocast: True
-    denoiser_config:
-      target: sgm.modules.diffusionmodules.denoiser.Denoiser
-      params:
-        scaling_config:
-          target: sgm.modules.diffusionmodules.denoiser_scaling.VScalingWithEDMcNoise
-    network_config:
-      target: sgm.modules.diffusionmodules.video_model.VideoUNet
-      params:
-        adm_in_channels: 768
-        num_classes: sequential
-        use_checkpoint: True
-        in_channels: 8
-        out_channels: 4
-        model_channels: 320
-        attention_resolutions: [4, 2, 1]
-        num_res_blocks: 2
-        channel_mult: [1, 2, 4, 4]
-        num_head_channels: 64
-        use_linear_in_transformer: True
-        transformer_depth: 1
-        context_dim: 1024
-        spatial_transformer_attn_type: softmax-xformers
-        extra_ff_mix_layer: True
-        use_spatial_context: True
-        merge_strategy: learned_with_images
-        video_kernel_size: [3, 1, 1]
-    conditioner_config:
-      target: sgm.modules.GeneralConditioner
-      params:
-        emb_models:
-        - is_trainable: False
-          input_key: cond_frames_without_noise
-          target: sgm.modules.encoders.modules.FrozenOpenCLIPImagePredictionEmbedder
-          params:
-            n_cond_frames: 1
-            n_copies: 1
-            open_clip_embedding_config:
-              target: sgm.modules.encoders.modules.FrozenOpenCLIPImageEmbedder
-              params:
-                freeze: True
-        - input_key: fps_id
-          is_trainable: False
-          target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND
-          params:
-            outdim: 256
-        - input_key: motion_bucket_id
-          is_trainable: False
-          target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND
-          params:
-            outdim: 256
-        - input_key: cond_frames
-          is_trainable: False
-          target: sgm.modules.encoders.modules.VideoPredictionEmbedderWithEncoder
-          params:
-            disable_encoder_autocast: True
-            n_cond_frames: 1
-            n_copies: 1
-            is_ae: True
-            encoder_config:
-              target: sgm.models.autoencoder.AutoencoderKLModeOnly
-              params:
-                embed_dim: 4
-                monitor: val/rec_loss
-                ddconfig:
-                  attn_type: vanilla-xformers
-                  double_z: True
-                  z_channels: 4
-                  resolution: 256
-                  in_channels: 3
-                  out_ch: 3
-                  ch: 128
-                  ch_mult: [1, 2, 4, 4]
-                  num_res_blocks: 2
-                  attn_resolutions: []
-                  dropout: 0.0
-                lossconfig:
-                  target: torch.nn.Identity
-        - input_key: cond_aug
-          is_trainable: False
-          target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND
-          params:
-            outdim: 256
-    first_stage_config:
-      target: sgm.models.autoencoder.AutoencodingEngine
-      params:
-        loss_config:
-          target: torch.nn.Identity
-        regularizer_config:
-          target: sgm.modules.autoencoding.regularizers.DiagonalGaussianRegularizer
-        encoder_config:
-          target: sgm.modules.diffusionmodules.model.Encoder
-          params:
-            attn_type: vanilla
-            double_z: True
-            z_channels: 4
-            resolution: 256
-            in_channels: 3
-            out_ch: 3
-            ch: 128
-            ch_mult: [1, 2, 4, 4]
-            num_res_blocks: 2
-            attn_resolutions: []
-            dropout: 0.0
-        decoder_config:
-          target: sgm.modules.autoencoding.temporal_ae.VideoDecoder
-          params:
-            attn_type: vanilla
-            double_z: True
-            z_channels: 4
-            resolution: 256
-            in_channels: 3
-            out_ch: 3
-            ch: 128
-            ch_mult: [1, 2, 4, 4]
-            num_res_blocks: 2
-            attn_resolutions: []
-            dropout: 0.0
-            video_kernel_size: [3, 1, 1]

configs/inference/svd_image_decoder.yaml DELETED Viewed

@@ -1,114 +0,0 @@
-model:
-  target: sgm.models.diffusion.DiffusionEngine
-  params:
-    scale_factor: 0.18215
-    disable_first_stage_autocast: True
-    denoiser_config:
-      target: sgm.modules.diffusionmodules.denoiser.Denoiser
-      params:
-        scaling_config:
-          target: sgm.modules.diffusionmodules.denoiser_scaling.VScalingWithEDMcNoise
-    network_config:
-      target: sgm.modules.diffusionmodules.video_model.VideoUNet
-      params:
-        adm_in_channels: 768
-        num_classes: sequential
-        use_checkpoint: True
-        in_channels: 8
-        out_channels: 4
-        model_channels: 320
-        attention_resolutions: [4, 2, 1]
-        num_res_blocks: 2
-        channel_mult: [1, 2, 4, 4]
-        num_head_channels: 64
-        use_linear_in_transformer: True
-        transformer_depth: 1
-        context_dim: 1024
-        spatial_transformer_attn_type: softmax-xformers
-        extra_ff_mix_layer: True
-        use_spatial_context: True
-        merge_strategy: learned_with_images
-        video_kernel_size: [3, 1, 1]
-    conditioner_config:
-      target: sgm.modules.GeneralConditioner
-      params:
-        emb_models:
-        - is_trainable: False
-          input_key: cond_frames_without_noise
-          target: sgm.modules.encoders.modules.FrozenOpenCLIPImagePredictionEmbedder
-          params:
-            n_cond_frames: 1
-            n_copies: 1
-            open_clip_embedding_config:
-              target: sgm.modules.encoders.modules.FrozenOpenCLIPImageEmbedder
-              params:
-                freeze: True
-        - input_key: fps_id
-          is_trainable: False
-          target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND
-          params:
-            outdim: 256
-        - input_key: motion_bucket_id
-          is_trainable: False
-          target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND
-          params:
-            outdim: 256
-        - input_key: cond_frames
-          is_trainable: False
-          target: sgm.modules.encoders.modules.VideoPredictionEmbedderWithEncoder
-          params:
-            disable_encoder_autocast: True
-            n_cond_frames: 1
-            n_copies: 1
-            is_ae: True
-            encoder_config:
-              target: sgm.models.autoencoder.AutoencoderKLModeOnly
-              params:
-                embed_dim: 4
-                monitor: val/rec_loss
-                ddconfig:
-                  attn_type: vanilla-xformers
-                  double_z: True
-                  z_channels: 4
-                  resolution: 256
-                  in_channels: 3
-                  out_ch: 3
-                  ch: 128
-                  ch_mult: [1, 2, 4, 4]
-                  num_res_blocks: 2
-                  attn_resolutions: []
-                  dropout: 0.0
-                lossconfig:
-                  target: torch.nn.Identity
-        - input_key: cond_aug
-          is_trainable: False
-          target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND
-          params:
-            outdim: 256
-    first_stage_config:
-      target: sgm.models.autoencoder.AutoencoderKL
-      params:
-        embed_dim: 4
-        monitor: val/rec_loss
-        ddconfig:
-          attn_type: vanilla-xformers
-          double_z: True
-          z_channels: 4
-          resolution: 256
-          in_channels: 3
-          out_ch: 3
-          ch: 128
-          ch_mult: [1, 2, 4, 4]
-          num_res_blocks: 2
-          attn_resolutions: []
-          dropout: 0.0
-        lossconfig:
-          target: torch.nn.Identity

data/DejaVuSans.ttf DELETED Viewed

Binary file (757 kB)

images/blink_meme.png ADDED Viewed

images/confused2_meme.png ADDED Viewed

images/confused_meme.png ADDED Viewed

images/disaster_meme.png ADDED Viewed

images/distracted_meme.png ADDED Viewed

images/hide_meme.png ADDED Viewed

images/nazare_meme.png ADDED Viewed

images/success_meme.png ADDED Viewed

images/willy_meme.png ADDED Viewed

images/wink_meme.png ADDED Viewed

main.py DELETED Viewed

@@ -1,943 +0,0 @@
-import argparse
-import datetime
-import glob
-import inspect
-import os
-import sys
-from inspect import Parameter
-from typing import Union
-import numpy as np
-import pytorch_lightning as pl
-import torch
-import torchvision
-import wandb
-from matplotlib import pyplot as plt
-from natsort import natsorted
-from omegaconf import OmegaConf
-from packaging import version
-from PIL import Image
-from pytorch_lightning import seed_everything
-from pytorch_lightning.callbacks import Callback
-from pytorch_lightning.loggers import WandbLogger
-from pytorch_lightning.trainer import Trainer
-from pytorch_lightning.utilities import rank_zero_only
-from sgm.util import exists, instantiate_from_config, isheatmap
-MULTINODE_HACKS = True
-def default_trainer_args():
-    argspec = dict(inspect.signature(Trainer.__init__).parameters)
-    argspec.pop("self")
-    default_args = {
-        param: argspec[param].default
-        for param in argspec
-        if argspec[param] != Parameter.empty
-    }
-    return default_args
-def get_parser(**parser_kwargs):
-    def str2bool(v):
-        if isinstance(v, bool):
-            return v
-        if v.lower() in ("yes", "true", "t", "y", "1"):
-            return True
-        elif v.lower() in ("no", "false", "f", "n", "0"):
-            return False
-        else:
-            raise argparse.ArgumentTypeError("Boolean value expected.")
-    parser = argparse.ArgumentParser(**parser_kwargs)
-    parser.add_argument(
-        "-n",
-        "--name",
-        type=str,
-        const=True,
-        default="",
-        nargs="?",
-        help="postfix for logdir",
-    )
-    parser.add_argument(
-        "--no_date",
-        type=str2bool,
-        nargs="?",
-        const=True,
-        default=False,
-        help="if True, skip date generation for logdir and only use naming via opt.base or opt.name (+ opt.postfix, optionally)",
-    )
-    parser.add_argument(
-        "-r",
-        "--resume",
-        type=str,
-        const=True,
-        default="",
-        nargs="?",
-        help="resume from logdir or checkpoint in logdir",
-    )
-    parser.add_argument(
-        "-b",
-        "--base",
-        nargs="*",
-        metavar="base_config.yaml",
-        help="paths to base configs. Loaded from left-to-right. "
-        "Parameters can be overwritten or added with command-line options of the form `--key value`.",
-        default=list(),
-    )
-    parser.add_argument(
-        "-t",
-        "--train",
-        type=str2bool,
-        const=True,
-        default=True,
-        nargs="?",
-        help="train",
-    )
-    parser.add_argument(
-        "--no-test",
-        type=str2bool,
-        const=True,
-        default=False,
-        nargs="?",
-        help="disable test",
-    )
-    parser.add_argument(
-        "-p", "--project", help="name of new or path to existing project"
-    )
-    parser.add_argument(
-        "-d",
-        "--debug",
-        type=str2bool,
-        nargs="?",
-        const=True,
-        default=False,
-        help="enable post-mortem debugging",
-    )
-    parser.add_argument(
-        "-s",
-        "--seed",
-        type=int,
-        default=23,
-        help="seed for seed_everything",
-    )
-    parser.add_argument(
-        "-f",
-        "--postfix",
-        type=str,
-        default="",
-        help="post-postfix for default name",
-    )
-    parser.add_argument(
-        "--projectname",
-        type=str,
-        default="stablediffusion",
-    )
-    parser.add_argument(
-        "-l",
-        "--logdir",
-        type=str,
-        default="logs",
-        help="directory for logging dat shit",
-    )
-    parser.add_argument(
-        "--scale_lr",
-        type=str2bool,
-        nargs="?",
-        const=True,
-        default=False,
-        help="scale base-lr by ngpu * batch_size * n_accumulate",
-    )
-    parser.add_argument(
-        "--legacy_naming",
-        type=str2bool,
-        nargs="?",
-        const=True,
-        default=False,
-        help="name run based on config file name if true, else by whole path",
-    )
-    parser.add_argument(
-        "--enable_tf32",
-        type=str2bool,
-        nargs="?",
-        const=True,
-        default=False,
-        help="enables the TensorFloat32 format both for matmuls and cuDNN for pytorch 1.12",
-    )
-    parser.add_argument(
-        "--startup",
-        type=str,
-        default=None,
-        help="Startuptime from distributed script",
-    )
-    parser.add_argument(
-        "--wandb",
-        type=str2bool,
-        nargs="?",
-        const=True,
-        default=False,  # TODO: later default to True
-        help="log to wandb",
-    )
-    parser.add_argument(
-        "--no_base_name",
-        type=str2bool,
-        nargs="?",
-        const=True,
-        default=False,  # TODO: later default to True
-        help="log to wandb",
-    )
-    if version.parse(torch.__version__) >= version.parse("2.0.0"):
-        parser.add_argument(
-            "--resume_from_checkpoint",
-            type=str,
-            default=None,
-            help="single checkpoint file to resume from",
-        )
-    default_args = default_trainer_args()
-    for key in default_args:
-        parser.add_argument("--" + key, default=default_args[key])
-    return parser
-def get_checkpoint_name(logdir):
-    ckpt = os.path.join(logdir, "checkpoints", "last**.ckpt")
-    ckpt = natsorted(glob.glob(ckpt))
-    print('available "last" checkpoints:')
-    print(ckpt)
-    if len(ckpt) > 1:
-        print("got most recent checkpoint")
-        ckpt = sorted(ckpt, key=lambda x: os.path.getmtime(x))[-1]
-        print(f"Most recent ckpt is {ckpt}")
-        with open(os.path.join(logdir, "most_recent_ckpt.txt"), "w") as f:
-            f.write(ckpt + "\n")
-        try:
-            version = int(ckpt.split("/")[-1].split("-v")[-1].split(".")[0])
-        except Exception as e:
-            print("version confusion but not bad")
-            print(e)
-            version = 1
-        # version = last_version + 1
-    else:
-        # in this case, we only have one "last.ckpt"
-        ckpt = ckpt[0]
-        version = 1
-    melk_ckpt_name = f"last-v{version}.ckpt"
-    print(f"Current melk ckpt name: {melk_ckpt_name}")
-    return ckpt, melk_ckpt_name
-class SetupCallback(Callback):
-    def __init__(
-        self,
-        resume,
-        now,
-        logdir,
-        ckptdir,
-        cfgdir,
-        config,
-        lightning_config,
-        debug,
-        ckpt_name=None,
-    ):
-        super().__init__()
-        self.resume = resume
-        self.now = now
-        self.logdir = logdir
-        self.ckptdir = ckptdir
-        self.cfgdir = cfgdir
-        self.config = config
-        self.lightning_config = lightning_config
-        self.debug = debug
-        self.ckpt_name = ckpt_name
-    def on_exception(self, trainer: pl.Trainer, pl_module, exception):
-        if not self.debug and trainer.global_rank == 0:
-            print("Summoning checkpoint.")
-            if self.ckpt_name is None:
-                ckpt_path = os.path.join(self.ckptdir, "last.ckpt")
-            else:
-                ckpt_path = os.path.join(self.ckptdir, self.ckpt_name)
-            trainer.save_checkpoint(ckpt_path)
-    def on_fit_start(self, trainer, pl_module):
-        if trainer.global_rank == 0:
-            # Create logdirs and save configs
-            os.makedirs(self.logdir, exist_ok=True)
-            os.makedirs(self.ckptdir, exist_ok=True)
-            os.makedirs(self.cfgdir, exist_ok=True)
-            if "callbacks" in self.lightning_config:
-                if (
-                    "metrics_over_trainsteps_checkpoint"
-                    in self.lightning_config["callbacks"]
-                ):
-                    os.makedirs(
-                        os.path.join(self.ckptdir, "trainstep_checkpoints"),
-                        exist_ok=True,
-                    )
-            print("Project config")
-            print(OmegaConf.to_yaml(self.config))
-            if MULTINODE_HACKS:
-                import time
-                time.sleep(5)
-            OmegaConf.save(
-                self.config,
-                os.path.join(self.cfgdir, "{}-project.yaml".format(self.now)),
-            )
-            print("Lightning config")
-            print(OmegaConf.to_yaml(self.lightning_config))
-            OmegaConf.save(
-                OmegaConf.create({"lightning": self.lightning_config}),
-                os.path.join(self.cfgdir, "{}-lightning.yaml".format(self.now)),
-            )
-        else:
-            # ModelCheckpoint callback created log directory --- remove it
-            if not MULTINODE_HACKS and not self.resume and os.path.exists(self.logdir):
-                dst, name = os.path.split(self.logdir)
-                dst = os.path.join(dst, "child_runs", name)
-                os.makedirs(os.path.split(dst)[0], exist_ok=True)
-                try:
-                    os.rename(self.logdir, dst)
-                except FileNotFoundError:
-                    pass
-class ImageLogger(Callback):
-    def __init__(
-        self,
-        batch_frequency,
-        max_images,
-        clamp=True,
-        increase_log_steps=True,
-        rescale=True,
-        disabled=False,
-        log_on_batch_idx=False,
-        log_first_step=False,
-        log_images_kwargs=None,
-        log_before_first_step=False,
-        enable_autocast=True,
-    ):
-        super().__init__()
-        self.enable_autocast = enable_autocast
-        self.rescale = rescale
-        self.batch_freq = batch_frequency
-        self.max_images = max_images
-        self.log_steps = [2**n for n in range(int(np.log2(self.batch_freq)) + 1)]
-        if not increase_log_steps:
-            self.log_steps = [self.batch_freq]
-        self.clamp = clamp
-        self.disabled = disabled
-        self.log_on_batch_idx = log_on_batch_idx
-        self.log_images_kwargs = log_images_kwargs if log_images_kwargs else {}
-        self.log_first_step = log_first_step
-        self.log_before_first_step = log_before_first_step
-    @rank_zero_only
-    def log_local(
-        self,
-        save_dir,
-        split,
-        images,
-        global_step,
-        current_epoch,
-        batch_idx,
-        pl_module: Union[None, pl.LightningModule] = None,
-    ):
-        root = os.path.join(save_dir, "images", split)
-        for k in images:
-            if isheatmap(images[k]):
-                fig, ax = plt.subplots()
-                ax = ax.matshow(
-                    images[k].cpu().numpy(), cmap="hot", interpolation="lanczos"
-                )
-                plt.colorbar(ax)
-                plt.axis("off")
-                filename = "{}_gs-{:06}_e-{:06}_b-{:06}.png".format(
-                    k, global_step, current_epoch, batch_idx
-                )
-                os.makedirs(root, exist_ok=True)
-                path = os.path.join(root, filename)
-                plt.savefig(path)
-                plt.close()
-                # TODO: support wandb
-            else:
-                grid = torchvision.utils.make_grid(images[k], nrow=4)
-                if self.rescale:
-                    grid = (grid + 1.0) / 2.0  # -1,1 -> 0,1; c,h,w
-                grid = grid.transpose(0, 1).transpose(1, 2).squeeze(-1)
-                grid = grid.numpy()
-                grid = (grid * 255).astype(np.uint8)
-                filename = "{}_gs-{:06}_e-{:06}_b-{:06}.png".format(
-                    k, global_step, current_epoch, batch_idx
-                )
-                path = os.path.join(root, filename)
-                os.makedirs(os.path.split(path)[0], exist_ok=True)
-                img = Image.fromarray(grid)
-                img.save(path)
-                if exists(pl_module):
-                    assert isinstance(
-                        pl_module.logger, WandbLogger
-                    ), "logger_log_image only supports WandbLogger currently"
-                    pl_module.logger.log_image(
-                        key=f"{split}/{k}",
-                        images=[
-                            img,
-                        ],
-                        step=pl_module.global_step,
-                    )
-    @rank_zero_only
-    def log_img(self, pl_module, batch, batch_idx, split="train"):
-        check_idx = batch_idx if self.log_on_batch_idx else pl_module.global_step
-        if (
-            self.check_frequency(check_idx)
-            and hasattr(pl_module, "log_images")  # batch_idx % self.batch_freq == 0
-            and callable(pl_module.log_images)
-            and
-            # batch_idx > 5 and
-            self.max_images > 0
-        ):
-            logger = type(pl_module.logger)
-            is_train = pl_module.training
-            if is_train:
-                pl_module.eval()
-            gpu_autocast_kwargs = {
-                "enabled": self.enable_autocast,  # torch.is_autocast_enabled(),
-                "dtype": torch.get_autocast_gpu_dtype(),
-                "cache_enabled": torch.is_autocast_cache_enabled(),
-            }
-            with torch.no_grad(), torch.cuda.amp.autocast(**gpu_autocast_kwargs):
-                images = pl_module.log_images(
-                    batch, split=split, **self.log_images_kwargs
-                )
-            for k in images:
-                N = min(images[k].shape[0], self.max_images)
-                if not isheatmap(images[k]):
-                    images[k] = images[k][:N]
-                if isinstance(images[k], torch.Tensor):
-                    images[k] = images[k].detach().float().cpu()
-                    if self.clamp and not isheatmap(images[k]):
-                        images[k] = torch.clamp(images[k], -1.0, 1.0)
-            self.log_local(
-                pl_module.logger.save_dir,
-                split,
-                images,
-                pl_module.global_step,
-                pl_module.current_epoch,
-                batch_idx,
-                pl_module=pl_module
-                if isinstance(pl_module.logger, WandbLogger)
-                else None,
-            )
-            if is_train:
-                pl_module.train()
-    def check_frequency(self, check_idx):
-        if ((check_idx % self.batch_freq) == 0 or (check_idx in self.log_steps)) and (
-            check_idx > 0 or self.log_first_step
-        ):
-            try:
-                self.log_steps.pop(0)
-            except IndexError as e:
-                print(e)
-                pass
-            return True
-        return False
-    @rank_zero_only
-    def on_train_batch_end(self, trainer, pl_module, outputs, batch, batch_idx):
-        if not self.disabled and (pl_module.global_step > 0 or self.log_first_step):
-            self.log_img(pl_module, batch, batch_idx, split="train")
-    @rank_zero_only
-    def on_train_batch_start(self, trainer, pl_module, batch, batch_idx):
-        if self.log_before_first_step and pl_module.global_step == 0:
-            print(f"{self.__class__.__name__}: logging before training")
-            self.log_img(pl_module, batch, batch_idx, split="train")
-    @rank_zero_only
-    def on_validation_batch_end(
-        self, trainer, pl_module, outputs, batch, batch_idx, *args, **kwargs
-    ):
-        if not self.disabled and pl_module.global_step > 0:
-            self.log_img(pl_module, batch, batch_idx, split="val")
-        if hasattr(pl_module, "calibrate_grad_norm"):
-            if (
-                pl_module.calibrate_grad_norm and batch_idx % 25 == 0
-            ) and batch_idx > 0:
-                self.log_gradients(trainer, pl_module, batch_idx=batch_idx)
-@rank_zero_only
-def init_wandb(save_dir, opt, config, group_name, name_str):
-    print(f"setting WANDB_DIR to {save_dir}")
-    os.makedirs(save_dir, exist_ok=True)
-    os.environ["WANDB_DIR"] = save_dir
-    if opt.debug:
-        wandb.init(project=opt.projectname, mode="offline", group=group_name)
-    else:
-        wandb.init(
-            project=opt.projectname,
-            config=config,
-            settings=wandb.Settings(code_dir="./sgm"),
-            group=group_name,
-            name=name_str,
-        )
-if __name__ == "__main__":
-    # custom parser to specify config files, train, test and debug mode,
-    # postfix, resume.
-    # `--key value` arguments are interpreted as arguments to the trainer.
-    # `nested.key=value` arguments are interpreted as config parameters.
-    # configs are merged from left-to-right followed by command line parameters.
-    # model:
-    #   base_learning_rate: float
-    #   target: path to lightning module
-    #   params:
-    #       key: value
-    # data:
-    #   target: main.DataModuleFromConfig
-    #   params:
-    #      batch_size: int
-    #      wrap: bool
-    #      train:
-    #          target: path to train dataset
-    #          params:
-    #              key: value
-    #      validation:
-    #          target: path to validation dataset
-    #          params:
-    #              key: value
-    #      test:
-    #          target: path to test dataset
-    #          params:
-    #              key: value
-    # lightning: (optional, has sane defaults and can be specified on cmdline)
-    #   trainer:
-    #       additional arguments to trainer
-    #   logger:
-    #       logger to instantiate
-    #   modelcheckpoint:
-    #       modelcheckpoint to instantiate
-    #   callbacks:
-    #       callback1:
-    #           target: importpath
-    #           params:
-    #               key: value
-    now = datetime.datetime.now().strftime("%Y-%m-%dT%H-%M-%S")
-    # add cwd for convenience and to make classes in this file available when
-    # running as `python main.py`
-    # (in particular `main.DataModuleFromConfig`)
-    sys.path.append(os.getcwd())
-    parser = get_parser()
-    opt, unknown = parser.parse_known_args()
-    if opt.name and opt.resume:
-        raise ValueError(
-            "-n/--name and -r/--resume cannot be specified both."
-            "If you want to resume training in a new log folder, "
-            "use -n/--name in combination with --resume_from_checkpoint"
-        )
-    melk_ckpt_name = None
-    name = None
-    if opt.resume:
-        if not os.path.exists(opt.resume):
-            raise ValueError("Cannot find {}".format(opt.resume))
-        if os.path.isfile(opt.resume):
-            paths = opt.resume.split("/")
-            # idx = len(paths)-paths[::-1].index("logs")+1
-            # logdir = "/".join(paths[:idx])
-            logdir = "/".join(paths[:-2])
-            ckpt = opt.resume
-            _, melk_ckpt_name = get_checkpoint_name(logdir)
-        else:
-            assert os.path.isdir(opt.resume), opt.resume
-            logdir = opt.resume.rstrip("/")
-            ckpt, melk_ckpt_name = get_checkpoint_name(logdir)
-        print("#" * 100)
-        print(f'Resuming from checkpoint "{ckpt}"')
-        print("#" * 100)
-        opt.resume_from_checkpoint = ckpt
-        base_configs = sorted(glob.glob(os.path.join(logdir, "configs/*.yaml")))
-        opt.base = base_configs + opt.base
-        _tmp = logdir.split("/")
-        nowname = _tmp[-1]
-    else:
-        if opt.name:
-            name = "_" + opt.name
-        elif opt.base:
-            if opt.no_base_name:
-                name = ""
-            else:
-                if opt.legacy_naming:
-                    cfg_fname = os.path.split(opt.base[0])[-1]
-                    cfg_name = os.path.splitext(cfg_fname)[0]
-                else:
-                    assert "configs" in os.path.split(opt.base[0])[0], os.path.split(
-                        opt.base[0]
-                    )[0]
-                    cfg_path = os.path.split(opt.base[0])[0].split(os.sep)[
-                        os.path.split(opt.base[0])[0].split(os.sep).index("configs")
-                        + 1 :
-                    ]  # cut away the first one (we assert all configs are in "configs")
-                    cfg_name = os.path.splitext(os.path.split(opt.base[0])[-1])[0]
-                    cfg_name = "-".join(cfg_path) + f"-{cfg_name}"
-                name = "_" + cfg_name
-        else:
-            name = ""
-        if not opt.no_date:
-            nowname = now + name + opt.postfix
-        else:
-            nowname = name + opt.postfix
-            if nowname.startswith("_"):
-                nowname = nowname[1:]
-        logdir = os.path.join(opt.logdir, nowname)
-        print(f"LOGDIR: {logdir}")
-    ckptdir = os.path.join(logdir, "checkpoints")
-    cfgdir = os.path.join(logdir, "configs")
-    seed_everything(opt.seed, workers=True)
-    # move before model init, in case a torch.compile(...) is called somewhere
-    if opt.enable_tf32:
-        # pt_version = version.parse(torch.__version__)
-        torch.backends.cuda.matmul.allow_tf32 = True
-        torch.backends.cudnn.allow_tf32 = True
-        print(f"Enabling TF32 for PyTorch {torch.__version__}")
-    else:
-        print(f"Using default TF32 settings for PyTorch {torch.__version__}:")
-        print(
-            f"torch.backends.cuda.matmul.allow_tf32={torch.backends.cuda.matmul.allow_tf32}"
-        )
-        print(f"torch.backends.cudnn.allow_tf32={torch.backends.cudnn.allow_tf32}")
-    try:
-        # init and save configs
-        configs = [OmegaConf.load(cfg) for cfg in opt.base]
-        cli = OmegaConf.from_dotlist(unknown)
-        config = OmegaConf.merge(*configs, cli)
-        lightning_config = config.pop("lightning", OmegaConf.create())
-        # merge trainer cli with config
-        trainer_config = lightning_config.get("trainer", OmegaConf.create())
-        # default to gpu
-        trainer_config["accelerator"] = "gpu"
-        #
-        standard_args = default_trainer_args()
-        for k in standard_args:
-            if getattr(opt, k) != standard_args[k]:
-                trainer_config[k] = getattr(opt, k)
-        ckpt_resume_path = opt.resume_from_checkpoint
-        if not "devices" in trainer_config and trainer_config["accelerator"] != "gpu":
-            del trainer_config["accelerator"]
-            cpu = True
-        else:
-            gpuinfo = trainer_config["devices"]
-            print(f"Running on GPUs {gpuinfo}")
-            cpu = False
-        trainer_opt = argparse.Namespace(**trainer_config)
-        lightning_config.trainer = trainer_config
-        # model
-        model = instantiate_from_config(config.model)
-        # trainer and callbacks
-        trainer_kwargs = dict()
-        # default logger configs
-        default_logger_cfgs = {
-            "wandb": {
-                "target": "pytorch_lightning.loggers.WandbLogger",
-                "params": {
-                    "name": nowname,
-                    # "save_dir": logdir,
-                    "offline": opt.debug,
-                    "id": nowname,
-                    "project": opt.projectname,
-                    "log_model": False,
-                    # "dir": logdir,
-                },
-            },
-            "csv": {
-                "target": "pytorch_lightning.loggers.CSVLogger",
-                "params": {
-                    "name": "testtube",  # hack for sbord fanatics
-                    "save_dir": logdir,
-                },
-            },
-        }
-        default_logger_cfg = default_logger_cfgs["wandb" if opt.wandb else "csv"]
-        if opt.wandb:
-            # TODO change once leaving "swiffer" config directory
-            try:
-                group_name = nowname.split(now)[-1].split("-")[1]
-            except:
-                group_name = nowname
-            default_logger_cfg["params"]["group"] = group_name
-            init_wandb(
-                os.path.join(os.getcwd(), logdir),
-                opt=opt,
-                group_name=group_name,
-                config=config,
-                name_str=nowname,
-            )
-        if "logger" in lightning_config:
-            logger_cfg = lightning_config.logger
-        else:
-            logger_cfg = OmegaConf.create()
-        logger_cfg = OmegaConf.merge(default_logger_cfg, logger_cfg)
-        trainer_kwargs["logger"] = instantiate_from_config(logger_cfg)
-        # modelcheckpoint - use TrainResult/EvalResult(checkpoint_on=metric) to
-        # specify which metric is used to determine best models
-        default_modelckpt_cfg = {
-            "target": "pytorch_lightning.callbacks.ModelCheckpoint",
-            "params": {
-                "dirpath": ckptdir,
-                "filename": "{epoch:06}",
-                "verbose": True,
-                "save_last": True,
-            },
-        }
-        if hasattr(model, "monitor"):
-            print(f"Monitoring {model.monitor} as checkpoint metric.")
-            default_modelckpt_cfg["params"]["monitor"] = model.monitor
-            default_modelckpt_cfg["params"]["save_top_k"] = 3
-        if "modelcheckpoint" in lightning_config:
-            modelckpt_cfg = lightning_config.modelcheckpoint
-        else:
-            modelckpt_cfg = OmegaConf.create()
-        modelckpt_cfg = OmegaConf.merge(default_modelckpt_cfg, modelckpt_cfg)
-        print(f"Merged modelckpt-cfg: \n{modelckpt_cfg}")
-        # https://pytorch-lightning.readthedocs.io/en/stable/extensions/strategy.html
-        # default to ddp if not further specified
-        default_strategy_config = {"target": "pytorch_lightning.strategies.DDPStrategy"}
-        if "strategy" in lightning_config:
-            strategy_cfg = lightning_config.strategy
-        else:
-            strategy_cfg = OmegaConf.create()
-            default_strategy_config["params"] = {
-                "find_unused_parameters": False,
-                # "static_graph": True,
-                # "ddp_comm_hook": default.fp16_compress_hook  # TODO: experiment with this, also for DDPSharded
-            }
-        strategy_cfg = OmegaConf.merge(default_strategy_config, strategy_cfg)
-        print(
-            f"strategy config: \n ++++++++++++++ \n {strategy_cfg} \n ++++++++++++++ "
-        )
-        trainer_kwargs["strategy"] = instantiate_from_config(strategy_cfg)
-        # add callback which sets up log directory
-        default_callbacks_cfg = {
-            "setup_callback": {
-                "target": "main.SetupCallback",
-                "params": {
-                    "resume": opt.resume,
-                    "now": now,
-                    "logdir": logdir,
-                    "ckptdir": ckptdir,
-                    "cfgdir": cfgdir,
-                    "config": config,
-                    "lightning_config": lightning_config,
-                    "debug": opt.debug,
-                    "ckpt_name": melk_ckpt_name,
-                },
-            },
-            "image_logger": {
-                "target": "main.ImageLogger",
-                "params": {"batch_frequency": 1000, "max_images": 4, "clamp": True},
-            },
-            "learning_rate_logger": {
-                "target": "pytorch_lightning.callbacks.LearningRateMonitor",
-                "params": {
-                    "logging_interval": "step",
-                    # "log_momentum": True
-                },
-            },
-        }
-        if version.parse(pl.__version__) >= version.parse("1.4.0"):
-            default_callbacks_cfg.update({"checkpoint_callback": modelckpt_cfg})
-        if "callbacks" in lightning_config:
-            callbacks_cfg = lightning_config.callbacks
-        else:
-            callbacks_cfg = OmegaConf.create()
-        if "metrics_over_trainsteps_checkpoint" in callbacks_cfg:
-            print(
-                "Caution: Saving checkpoints every n train steps without deleting. This might require some free space."
-            )
-            default_metrics_over_trainsteps_ckpt_dict = {
-                "metrics_over_trainsteps_checkpoint": {
-                    "target": "pytorch_lightning.callbacks.ModelCheckpoint",
-                    "params": {
-                        "dirpath": os.path.join(ckptdir, "trainstep_checkpoints"),
-                        "filename": "{epoch:06}-{step:09}",
-                        "verbose": True,
-                        "save_top_k": -1,
-                        "every_n_train_steps": 10000,
-                        "save_weights_only": True,
-                    },
-                }
-            }
-            default_callbacks_cfg.update(default_metrics_over_trainsteps_ckpt_dict)
-        callbacks_cfg = OmegaConf.merge(default_callbacks_cfg, callbacks_cfg)
-        if "ignore_keys_callback" in callbacks_cfg and ckpt_resume_path is not None:
-            callbacks_cfg.ignore_keys_callback.params["ckpt_path"] = ckpt_resume_path
-        elif "ignore_keys_callback" in callbacks_cfg:
-            del callbacks_cfg["ignore_keys_callback"]
-        trainer_kwargs["callbacks"] = [
-            instantiate_from_config(callbacks_cfg[k]) for k in callbacks_cfg
-        ]
-        if not "plugins" in trainer_kwargs:
-            trainer_kwargs["plugins"] = list()
-        # cmd line trainer args (which are in trainer_opt) have always priority over config-trainer-args (which are in trainer_kwargs)
-        trainer_opt = vars(trainer_opt)
-        trainer_kwargs = {
-            key: val for key, val in trainer_kwargs.items() if key not in trainer_opt
-        }
-        trainer = Trainer(**trainer_opt, **trainer_kwargs)
-        trainer.logdir = logdir  ###
-        # data
-        data = instantiate_from_config(config.data)
-        # NOTE according to https://pytorch-lightning.readthedocs.io/en/latest/datamodules.html
-        # calling these ourselves should not be necessary but it is.
-        # lightning still takes care of proper multiprocessing though
-        data.prepare_data()
-        # data.setup()
-        print("#### Data #####")
-        try:
-            for k in data.datasets:
-                print(
-                    f"{k}, {data.datasets[k].__class__.__name__}, {len(data.datasets[k])}"
-                )
-        except:
-            print("datasets not yet initialized.")
-        # configure learning rate
-        if "batch_size" in config.data.params:
-            bs, base_lr = config.data.params.batch_size, config.model.base_learning_rate
-        else:
-            bs, base_lr = (
-                config.data.params.train.loader.batch_size,
-                config.model.base_learning_rate,
-            )
-        if not cpu:
-            ngpu = len(lightning_config.trainer.devices.strip(",").split(","))
-        else:
-            ngpu = 1
-        if "accumulate_grad_batches" in lightning_config.trainer:
-            accumulate_grad_batches = lightning_config.trainer.accumulate_grad_batches
-        else:
-            accumulate_grad_batches = 1
-        print(f"accumulate_grad_batches = {accumulate_grad_batches}")
-        lightning_config.trainer.accumulate_grad_batches = accumulate_grad_batches
-        if opt.scale_lr:
-            model.learning_rate = accumulate_grad_batches * ngpu * bs * base_lr
-            print(
-                "Setting learning rate to {:.2e} = {} (accumulate_grad_batches) * {} (num_gpus) * {} (batchsize) * {:.2e} (base_lr)".format(
-                    model.learning_rate, accumulate_grad_batches, ngpu, bs, base_lr
-                )
-            )
-        else:
-            model.learning_rate = base_lr
-            print("++++ NOT USING LR SCALING ++++")
-            print(f"Setting learning rate to {model.learning_rate:.2e}")
-        # allow checkpointing via USR1
-        def melk(*args, **kwargs):
-            # run all checkpoint hooks
-            if trainer.global_rank == 0:
-                print("Summoning checkpoint.")
-                if melk_ckpt_name is None:
-                    ckpt_path = os.path.join(ckptdir, "last.ckpt")
-                else:
-                    ckpt_path = os.path.join(ckptdir, melk_ckpt_name)
-                trainer.save_checkpoint(ckpt_path)
-        def divein(*args, **kwargs):
-            if trainer.global_rank == 0:
-                import pudb
-                pudb.set_trace()
-        import signal
-        signal.signal(signal.SIGUSR1, melk)
-        signal.signal(signal.SIGUSR2, divein)
-        # run
-        if opt.train:
-            try:
-                trainer.fit(model, data, ckpt_path=ckpt_resume_path)
-            except Exception:
-                if not opt.debug:
-                    melk()
-                raise
-        if not opt.no_test and not trainer.interrupted:
-            trainer.test(model, data)
-    except RuntimeError as err:
-        if MULTINODE_HACKS:
-            import datetime
-            import os
-            import socket
-            import requests
-            device = os.environ.get("CUDA_VISIBLE_DEVICES", "?")
-            hostname = socket.gethostname()
-            ts = datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S")
-            resp = requests.get("http://169.254.169.254/latest/meta-data/instance-id")
-            print(
-                f"ERROR at {ts} on {hostname}/{resp.text} (CUDA_VISIBLE_DEVICES={device}): {type(err).__name__}: {err}",
-                flush=True,
-            )
-        raise err
-    except Exception:
-        if opt.debug and trainer.global_rank == 0:
-            try:
-                import pudb as debugger
-            except ImportError:
-                import pdb as debugger
-            debugger.post_mortem()
-        raise
-    finally:
-        # move newly created debug project to debug_runs
-        if opt.debug and not opt.resume and trainer.global_rank == 0:
-            dst, name = os.path.split(logdir)
-            dst = os.path.join(dst, "debug_runs", name)
-            os.makedirs(os.path.split(dst)[0], exist_ok=True)
-            os.rename(logdir, dst)
-        if opt.wandb:
-            wandb.finish()
-        # if trainer.global_rank == 0:
-        #    print(trainer.profiler.summary())

model_licenses/LICENSE-SDV DELETED Viewed

@@ -1,31 +0,0 @@
-STABLE VIDEO DIFFUSION NON-COMMERCIAL COMMUNITY LICENSE AGREEMENT
-Dated: November 21, 2023
-“AUP” means the Stability AI Acceptable Use Policy available at https://stability.ai/use-policy, as may be updated from time to time.
-"Agreement" means the terms and conditions for use, reproduction, distribution and modification of the Software Products set forth herein.
-"Derivative Work(s)” means (a) any derivative work of the Software Products as recognized by U.S. copyright laws and (b) any modifications to a Model, and any other model created which is based on or derived from the Model or the Model’s output. For clarity, Derivative Works do not include the output of any Model.
-“Documentation” means any specifications, manuals, documentation, and other written information provided by Stability AI related to the Software.
-"Licensee" or "you" means you, or your employer or any other person or entity (if you are entering into this Agreement on such person or entity's behalf), of the age required under applicable laws, rules or regulations to provide legal consent and that has legal authority to bind your employer or such other person or entity if you are entering in this Agreement on their behalf.
-"Stability AI" or "we" means Stability AI Ltd.
-"Software" means, collectively, Stability AI’s proprietary StableCode made available under this Agreement.
-“Software Products” means Software and Documentation.
-By using or distributing any portion or element of the Software Products, you agree to be bound by this Agreement.
-License Rights and Redistribution.
-Subject to your compliance with this Agreement, the AUP (which is hereby incorporated herein by reference), and the Documentation, Stability AI grants you a non-exclusive, worldwide, non-transferable, non-sublicensable, revocable, royalty free and limited license under Stability AI’s intellectual property or other rights owned by Stability AI embodied in the Software Products to reproduce, distribute, and create Derivative Works of the Software Products for purposes other than commercial or production use.
-b.	If you distribute or make the Software Products, or any Derivative Works thereof, available to a third party, the Software Products, Derivative Works, or any portion thereof, respectively, will remain subject to this Agreement and you must (i) provide a copy of this Agreement to such third party, and (ii) retain the following attribution notice within a "Notice" text file distributed as a part of such copies: "Stable Video Diffusion is licensed under the Stable Video Diffusion Research License, Copyright (c) Stability AI Ltd. All Rights Reserved.” If you create a Derivative Work of a Software Product, you may add your own attribution notices to the Notice file included with the Software Product, provided that you clearly indicate which attributions apply to the Software Product and you must state in the NOTICE file that you changed the Software Product and how it was modified.
-2. 	  Disclaimer of Warranty. UNLESS REQUIRED BY APPLICABLE LAW, THE SOFTWARE PRODUCTS  AND ANY OUTPUT AND RESULTS THEREFROM ARE PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, ANY WARRANTIES OF TITLE, NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. YOU ARE SOLELY RESPONSIBLE FOR DETERMINING THE APPROPRIATENESS OF USING OR REDISTRIBUTING THE SOFTWARE PRODUCTS AND ASSUME ANY RISKS ASSOCIATED WITH YOUR USE OF THE SOFTWARE PRODUCTS AND ANY OUTPUT AND RESULTS.
-3.   Limitation of Liability. IN NO EVENT WILL STABILITY AI OR ITS AFFILIATES BE LIABLE UNDER ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, TORT, NEGLIGENCE, PRODUCTS LIABILITY, OR OTHERWISE, ARISING OUT OF THIS AGREEMENT, FOR ANY LOST PROFITS OR ANY INDIRECT, SPECIAL, CONSEQUENTIAL, INCIDENTAL, EXEMPLARY OR PUNITIVE DAMAGES, EVEN IF STABILITY AI OR ITS AFFILIATES HAVE BEEN ADVISED OF THE POSSIBILITY OF ANY OF THE FOREGOING.
-3.   Intellectual Property.
-a. 	No trademark licenses are granted under this Agreement, and in connection with the Software Products, neither Stability AI nor Licensee may use any name or mark owned by or associated with the other or any of its affiliates, except as required for reasonable and customary use in describing and redistributing the Software Products.
-Subject to Stability AI’s ownership of the Software Products and Derivative Works made by or for Stability AI, with respect to any Derivative Works that are made by you, as between you and Stability AI, you are and will be the owner of such Derivative Works.
-If you institute litigation or other proceedings against Stability AI (including a cross-claim or counterclaim in a lawsuit) alleging that the Software Products or associated outputs or results, or any portion of any of the foregoing, constitutes infringement of intellectual property or other rights owned or licensable by you, then any licenses granted to you under this Agreement shall terminate as of the date such litigation or claim is filed or instituted. You will indemnify and hold harmless Stability AI from and against any claim by any third party arising out of or related to your use or distribution of the Software Products in violation of this Agreement.
-4.   Term and Termination. The term of this Agreement will commence upon your acceptance of this Agreement or access to the Software Products and will continue in full force and effect until terminated in accordance with the terms and conditions herein. Stability AI may terminate this Agreement if you are in breach of any term or condition of this Agreement. Upon termination of this Agreement, you shall delete and cease use of the Software Products. Sections 2-4 shall survive the termination of this Agreement.

model_licenses/LICENSE-SDXL0.9 DELETED Viewed

@@ -1,75 +0,0 @@
-SDXL 0.9 RESEARCH LICENSE AGREEMENT
-Copyright (c) Stability AI Ltd.
-This License Agreement (as may be amended in accordance with this License Agreement, “License”), between you, or your employer or other entity (if you are entering into this agreement on behalf of your employer or other entity) (“Licensee” or “you”) and Stability AI Ltd. (“Stability AI” or “we”) applies to your use of any computer program, algorithm, source code, object code, or software that is made available by Stability AI under this License (“Software”) and any specifications, manuals, documentation, and other written information provided by Stability AI related to the Software (“Documentation”).
-By clicking “I Accept” below or by using the Software, you agree to the terms of this License. If you do not agree to this License, then you do not have any rights to use the Software or Documentation (collectively, the “Software Products”), and you must immediately cease using the Software Products. If you are agreeing to be bound by the terms of this License on behalf of your employer or other entity, you represent and warrant to Stability AI that you have full legal authority to bind your employer or such entity to this License. If you do not have the requisite authority, you may not accept the License or access the Software Products on behalf of your employer or other entity.
-1. LICENSE GRANT
-a. Subject to your compliance with the Documentation and Sections 2, 3, and 5, Stability AI grants you a non-exclusive, worldwide, non-transferable, non-sublicensable, revocable, royalty free and limited license under Stability AI’s copyright interests to reproduce, distribute, and create derivative works of the Software solely for your non-commercial research purposes. The foregoing license is personal to you, and you may not assign or sublicense this License or any other rights or obligations under this License without Stability AI’s prior written consent; any such assignment or sublicense will be void and will automatically and immediately terminate this License.
-b. You may make a reasonable number of copies of the Documentation solely for use in connection with the license to the Software granted above.
-c. The grant of rights expressly set forth in this Section 1 (License Grant) are the complete grant of rights to you in the Software Products, and no other licenses are granted, whether by waiver, estoppel, implication, equity or otherwise. Stability AI and its licensors reserve all rights not expressly granted by this License.
-2. RESTRICTIONS
-You will not, and will not permit, assist or cause any third party to:
-a. use, modify, copy, reproduce, create derivative works of, or distribute the Software Products (or any derivative works thereof, works incorporating the Software Products, or any data produced by the Software), in whole or in part, for (i) any commercial or production purposes, (ii) military purposes or in the service of nuclear technology, (iii) purposes of surveillance, including any research or development relating to surveillance, (iv) biometric processing, (v) in any manner that infringes, misappropriates, or otherwise violates any third-party rights, or (vi) in any manner that violates any applicable law and violating any privacy or security laws, rules, regulations, directives, or governmental requirements (including the General Data Privacy Regulation (Regulation (EU) 2016/679), the California Consumer Privacy Act, and any and all laws governing the processing of biometric information), as well as all amendments and successor laws to any of the foregoing;
-b. alter or remove copyright and other proprietary notices which appear on or in the Software Products;
-c. utilize any equipment, device, software, or other means to circumvent or remove any security or protection used by Stability AI in connection with the Software, or to circumvent or remove any usage restrictions, or to enable functionality disabled by Stability AI; or
-d. offer or impose any terms on the Software Products that alter, restrict, or are inconsistent with the terms of this License.
-e. 1) violate any applicable U.S. and non-U.S. export control and trade sanctions laws (“Export Laws”); 2) directly or indirectly export, re-export, provide, or otherwise transfer Software Products: (a) to any individual, entity, or country prohibited by Export Laws; (b) to anyone on U.S. or non-U.S. government restricted parties lists; or (c) for any purpose prohibited by Export Laws, including nuclear, chemical or biological weapons, or missile technology applications; 3) use or download Software Products if you or they are: (a) located in a comprehensively sanctioned jurisdiction, (b) currently listed on any U.S. or non-U.S. restricted parties list, or (c) for any purpose prohibited by Export Laws; and (4) will not disguise your location through IP proxying or other methods.
-3. ATTRIBUTION
-Together with any copies of the Software Products (as well as derivative works thereof or works incorporating the Software Products) that you distribute, you must provide (i) a copy of this License, and (ii) the following attribution notice: “SDXL 0.9 is licensed under the SDXL Research License, Copyright (c) Stability AI Ltd. All Rights Reserved.”
-4. DISCLAIMERS
-THE SOFTWARE PRODUCTS ARE PROVIDED “AS IS” AND “WITH ALL FAULTS” WITH NO WARRANTY OF ANY KIND, EXPRESS OR IMPLIED. STABILITY AIEXPRESSLY DISCLAIMS ALL REPRESENTATIONS AND WARRANTIES, EXPRESS OR IMPLIED, WHETHER BY STATUTE, CUSTOM, USAGE OR OTHERWISE AS TO ANY MATTERS RELATED TO THE SOFTWARE PRODUCTS, INCLUDING BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE, SATISFACTORY QUALITY, OR NON-INFRINGEMENT. STABILITY AI MAKES NO WARRANTIES OR REPRESENTATIONS THAT THE SOFTWARE PRODUCTS WILL BE ERROR FREE OR FREE OF VIRUSES OR OTHER HARMFUL COMPONENTS, OR PRODUCE ANY PARTICULAR RESULTS.
-5. LIMITATION OF LIABILITY
-TO THE FULLEST EXTENT PERMITTED BY LAW, IN NO EVENT WILL STABILITY AI BE LIABLE TO YOU (A) UNDER ANY THEORY OF LIABILITY, WHETHER BASED IN CONTRACT, TORT, NEGLIGENCE, STRICT LIABILITY, WARRANTY, OR OTHERWISE UNDER THIS LICENSE, OR (B) FOR ANY INDIRECT, CONSEQUENTIAL, EXEMPLARY, INCIDENTAL, PUNITIVE OR SPECIAL DAMAGES OR LOST PROFITS, EVEN IF STABILITY AI HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. THE SOFTWARE PRODUCTS, THEIR CONSTITUENT COMPONENTS, AND ANY OUTPUT (COLLECTIVELY, “SOFTWARE MATERIALS”) ARE NOT DESIGNED OR INTENDED FOR USE IN ANY APPLICATION OR SITUATION WHERE FAILURE OR FAULT OF THE SOFTWARE MATERIALS COULD REASONABLY BE ANTICIPATED TO LEAD TO SERIOUS INJURY OF ANY PERSON, INCLUDING POTENTIAL DISCRIMINATION OR VIOLATION OF AN INDIVIDUAL’S PRIVACY RIGHTS, OR TO SEVERE PHYSICAL, PROPERTY, OR ENVIRONMENTAL DAMAGE (EACH, A “HIGH-RISK USE”). IF YOU ELECT TO USE ANY OF THE SOFTWARE MATERIALS FOR A HIGH-RISK USE, YOU DO SO AT YOUR OWN RISK. YOU AGREE TO DESIGN AND IMPLEMENT APPROPRIATE DECISION-MAKING AND RISK-MITIGATION PROCEDURES AND POLICIES IN CONNECTION WITH A HIGH-RISK USE SUCH THAT EVEN IF THERE IS A FAILURE OR FAULT IN ANY OF THE SOFTWARE MATERIALS, THE SAFETY OF PERSONS OR PROPERTY AFFECTED BY THE ACTIVITY STAYS AT A LEVEL THAT IS REASONABLE, APPROPRIATE, AND LAWFUL FOR THE FIELD OF THE HIGH-RISK USE.
-6. INDEMNIFICATION
-You will indemnify, defend and hold harmless Stability AI and our subsidiaries and affiliates, and each of our respective shareholders, directors, officers, employees, agents, successors, and assigns (collectively, the “Stability AI Parties”) from and against any losses, liabilities, damages, fines, penalties, and expenses (including reasonable attorneys’ fees) incurred by any Stability AI Party in connection with any claim, demand, allegation, lawsuit, proceeding, or investigation (collectively, “Claims”) arising out of or related to: (a) your access to or use of the Software Products (as well as any results or data generated from such access or use), including any High-Risk Use (defined below); (b) your violation of this License; or (c) your violation, misappropriation or infringement of any rights of another (including intellectual property or other proprietary rights and privacy rights). You will promptly notify the Stability AI Parties of any such Claims, and cooperate with Stability AI Parties in defending such Claims. You will also grant the Stability AI Parties sole control of the defense or settlement, at Stability AI’s sole option, of any Claims. This indemnity is in addition to, and not in lieu of, any other indemnities or remedies set forth in a written agreement between you and Stability AI or the other Stability AI Parties.
-7. TERMINATION; SURVIVAL
-a. This License will automatically terminate upon any breach by you of the terms of this License.
-b. We may terminate this License, in whole or in part, at any time upon notice (including electronic) to you.
-c. The following sections survive termination of this License: 2 (Restrictions), 3 (Attribution), 4 (Disclaimers), 5 (Limitation on Liability), 6 (Indemnification) 7 (Termination; Survival), 8 (Third Party Materials), 9 (Trademarks), 10 (Applicable Law; Dispute Resolution), and 11 (Miscellaneous).
-8. THIRD PARTY MATERIALS
-The Software Products may contain third-party software or other components (including free and open source software) (all of the foregoing, “Third Party Materials”), which are subject to the license terms of the respective third-party licensors. Your dealings or correspondence with third parties and your use of or interaction with any Third Party Materials are solely between you and the third party. Stability AI does not control or endorse, and makes no representations or warranties regarding, any Third Party Materials, and your access to and use of such Third Party Materials are at your own risk.
-9. TRADEMARKS
-Licensee has not been granted any trademark license as part of this License and may not use any name or mark associated with Stability AI without the prior written permission of Stability AI, except to the extent necessary to make the reference required by the “ATTRIBUTION” section of this Agreement.
-10. APPLICABLE LAW; DISPUTE RESOLUTION
-This License will be governed and construed under the laws of the State of California without regard to conflicts of law provisions. Any suit or proceeding arising out of or relating to this License will be brought in the federal or state courts, as applicable, in San Mateo County, California, and each party irrevocably submits to the jurisdiction and venue of such courts.
-11. MISCELLANEOUS
-If any provision or part of a provision of this License is unlawful, void or unenforceable, that provision or part of the provision is deemed severed from this License, and will not affect the validity and enforceability of any remaining provisions. The failure of Stability AI to exercise or enforce any right or provision of this License will not operate as a waiver of such right or provision. This License does not confer any third-party beneficiary rights upon any other person or entity. This License, together with the Documentation, contains the entire understanding between you and Stability AI regarding the subject matter of this License, and supersedes all other written or oral agreements and understandings between you and Stability AI regarding such subject matter. No change or addition to any provision of this License will be binding unless it is in writing and signed by an authorized representative of both you and Stability AI.

model_licenses/LICENSE-SDXL1.0 DELETED Viewed

@@ -1,175 +0,0 @@
-Copyright (c) 2023 Stability AI CreativeML Open RAIL++-M License dated July 26, 2023
-Section I: PREAMBLE Multimodal generative models are being widely adopted and used, and
-have the potential to transform the way artists, among other individuals, conceive and
-benefit from AI or ML technologies as a tool for content creation. Notwithstanding the
-current and potential benefits that these artifacts can bring to society at large, there
-are also concerns about potential misuses of them, either due to their technical
-limitations or ethical considerations. In short, this license strives for both the open
-and responsible downstream use of the accompanying model. When it comes to the open
-character, we took inspiration from open source permissive licenses regarding the grant
-of IP rights. Referring to the downstream responsible use, we added use-based
-restrictions not permitting the use of the model in very specific scenarios, in order
-for the licensor to be able to enforce the license in case potential misuses of the
-Model may occur. At the same time, we strive to promote open and responsible research on
-generative models for art and content generation. Even though downstream derivative
-versions of the model could be released under different licensing terms, the latter will
-always have to include - at minimum - the same use-based restrictions as the ones in the
-original license (this license). We believe in the intersection between open and
-responsible AI development; thus, this agreement aims to strike a balance between both
-in order to enable responsible open-science in the field of AI. This CreativeML Open
-RAIL++-M License governs the use of the model (and its derivatives) and is informed by
-the model card associated with the model. NOW THEREFORE, You and Licensor agree as
-follows: Definitions "License" means the terms and conditions for use, reproduction, and
-Distribution as defined in this document. "Data" means a collection of information
-and/or content extracted from the dataset used with the Model, including to train,
-pretrain, or otherwise evaluate the Model. The Data is not licensed under this License.
-"Output" means the results of operating a Model as embodied in informational content
-resulting therefrom. "Model" means any accompanying machine-learning based assemblies
-(including checkpoints), consisting of learnt weights, parameters (including optimizer
-states), corresponding to the model architecture as embodied in the Complementary
-Material, that have been trained or tuned, in whole or in part on the Data, using the
-Complementary Material. "Derivatives of the Model" means all modifications to the Model,
-works based on the Model, or any other model which is created or initialized by transfer
-of patterns of the weights, parameters, activations or output of the Model, to the other
-model, in order to cause the other model to perform similarly to the Model, including -
-but not limited to - distillation methods entailing the use of intermediate data
-representations or methods based on the generation of synthetic data by the Model for
-training the other model. "Complementary Material" means the accompanying source code
-and scripts used to define, run, load, benchmark or evaluate the Model, and used to
-prepare data for training or evaluation, if any. This includes any accompanying
-documentation, tutorials, examples, etc, if any. "Distribution" means any transmission,
-reproduction, publication or other sharing of the Model or Derivatives of the Model to a
-third party, including providing the Model as a hosted service made available by
-electronic or other remote means - e.g. API-based or web access. "Licensor" means the
-copyright owner or entity authorized by the copyright owner that is granting the
-License, including the persons or entities that may have rights in the Model and/or
-distributing the Model. "You" (or "Your") means an individual or Legal Entity exercising
-permissions granted by this License and/or making use of the Model for whichever purpose
-and in any field of use, including usage of the Model in an end-use application - e.g.
-chatbot, translator, image generator. "Third Parties" means individuals or legal
-entities that are not under common control with Licensor or You. "Contribution" means
-any work of authorship, including the original version of the Model and any
-modifications or additions to that Model or Derivatives of the Model thereof, that is
-intentionally submitted to Licensor for inclusion in the Model by the copyright owner or
-by an individual or Legal Entity authorized to submit on behalf of the copyright owner.
-For the purposes of this definition, "submitted" means any form of electronic, verbal,
-or written communication sent to the Licensor or its representatives, including but not
-limited to communication on electronic mailing lists, source code control systems, and
-issue tracking systems that are managed by, or on behalf of, the Licensor for the
-purpose of discussing and improving the Model, but excluding communication that is
-conspicuously marked or otherwise designated in writing by the copyright owner as "Not a
-Contribution." "Contributor" means Licensor and any individual or Legal Entity on behalf
-of whom a Contribution has been received by Licensor and subsequently incorporated
-within the Model.
-Section II: INTELLECTUAL PROPERTY RIGHTS Both copyright and patent grants apply to the
-Model, Derivatives of the Model and Complementary Material. The Model and Derivatives of
-the Model are subject to additional terms as described in
-Section III. Grant of Copyright License. Subject to the terms and conditions of this
-License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive,
-no-charge, royalty-free, irrevocable copyright license to reproduce, prepare, publicly
-display, publicly perform, sublicense, and distribute the Complementary Material, the
-Model, and Derivatives of the Model. Grant of Patent License. Subject to the terms and
-conditions of this License and where and as applicable, each Contributor hereby grants
-to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-(except as stated in this paragraph) patent license to make, have made, use, offer to
-sell, sell, import, and otherwise transfer the Model and the Complementary Material,
-where such license applies only to those patent claims licensable by such Contributor
-that are necessarily infringed by their Contribution(s) alone or by combination of their
-Contribution(s) with the Model to which such Contribution(s) was submitted. If You
-institute patent litigation against any entity (including a cross-claim or counterclaim
-in a lawsuit) alleging that the Model and/or Complementary Material or a Contribution
-incorporated within the Model and/or Complementary Material constitutes direct or
-contributory patent infringement, then any patent licenses granted to You under this
-License for the Model and/or Work shall terminate as of the date such litigation is
-asserted or filed. Section III: CONDITIONS OF USAGE, DISTRIBUTION AND REDISTRIBUTION
-Distribution and Redistribution. You may host for Third Party remote access purposes
-(e.g. software-as-a-service), reproduce and distribute copies of the Model or
-Derivatives of the Model thereof in any medium, with or without modifications, provided
-that You meet the following conditions: Use-based restrictions as referenced in
-paragraph 5 MUST be included as an enforceable provision by You in any type of legal
-agreement (e.g. a license) governing the use and/or distribution of the Model or
-Derivatives of the Model, and You shall give notice to subsequent users You Distribute
-to, that the Model or Derivatives of the Model are subject to paragraph 5. This
-provision does not apply to the use of Complementary Material. You must give any Third
-Party recipients of the Model or Derivatives of the Model a copy of this License; You
-must cause any modified files to carry prominent notices stating that You changed the
-files; You must retain all copyright, patent, trademark, and attribution notices
-excluding those notices that do not pertain to any part of the Model, Derivatives of the
-Model. You may add Your own copyright statement to Your modifications and may provide
-additional or different license terms and conditions - respecting paragraph 4.a. - for
-use, reproduction, or Distribution of Your modifications, or for any such Derivatives of
-the Model as a whole, provided Your use, reproduction, and Distribution of the Model
-otherwise complies with the conditions stated in this License. Use-based restrictions.
-The restrictions set forth in Attachment A are considered Use-based restrictions.
-Therefore You cannot use the Model and the Derivatives of the Model for the specified
-restricted uses. You may use the Model subject to this License, including only for
-lawful purposes and in accordance with the License. Use may include creating any content
-with, finetuning, updating, running, training, evaluating and/or reparametrizing the
-Model. You shall require all of Your users who use the Model or a Derivative of the
-Model to comply with the terms of this paragraph (paragraph 5). The Output You Generate.
-Except as set forth herein, Licensor claims no rights in the Output You generate using
-the Model. You are accountable for the Output you generate and its subsequent uses. No
-use of the output can contravene any provision as stated in the License.
-Section IV: OTHER PROVISIONS Updates and Runtime Restrictions. To the maximum extent
-permitted by law, Licensor reserves the right to restrict (remotely or otherwise) usage
-of the Model in violation of this License. Trademarks and related. Nothing in this
-License permits You to make use of Licensors’ trademarks, trade names, logos or to
-otherwise suggest endorsement or misrepresent the relationship between the parties; and
-any rights not expressly granted herein are reserved by the Licensors. Disclaimer of
-Warranty. Unless required by applicable law or agreed to in writing, Licensor provides
-the Model and the Complementary Material (and each Contributor provides its
-Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
-express or implied, including, without limitation, any warranties or conditions of
-TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are
-solely responsible for determining the appropriateness of using or redistributing the
-Model, Derivatives of the Model, and the Complementary Material and assume any risks
-associated with Your exercise of permissions under this License. Limitation of
-Liability. In no event and under no legal theory, whether in tort (including
-negligence), contract, or otherwise, unless required by applicable law (such as
-deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be
-liable to You for damages, including any direct, indirect, special, incidental, or
-consequential damages of any character arising as a result of this License or out of the
-use or inability to use the Model and the Complementary Material (including but not
-limited to damages for loss of goodwill, work stoppage, computer failure or malfunction,
-or any and all other commercial damages or losses), even if such Contributor has been
-advised of the possibility of such damages. Accepting Warranty or Additional Liability.
-While redistributing the Model, Derivatives of the Model and the Complementary Material
-thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty,
-indemnity, or other liability obligations and/or rights consistent with this License.
-However, in accepting such obligations, You may act only on Your own behalf and on Your
-sole responsibility, not on behalf of any other Contributor, and only if You agree to
-indemnify, defend, and hold each Contributor harmless for any liability incurred by, or
-claims asserted against, such Contributor by reason of your accepting any such warranty
-or additional liability. If any provision of this License is held to be invalid, illegal
-or unenforceable, the remaining provisions shall be unaffected thereby and remain valid
-as if such provision had not been set forth herein.
-END OF TERMS AND CONDITIONS
-Attachment A Use Restrictions
-You agree not to use the Model or Derivatives of the Model:
-In any way that violates any applicable national, federal, state, local or
-international law or regulation; For the purpose of exploiting, harming or attempting to
-exploit or harm minors in any way; To generate or disseminate verifiably false
-information and/or content with the purpose of harming others; To generate or
-disseminate personal identifiable information that can be used to harm an individual; To
-defame, disparage or otherwise harass others; For fully automated decision making that
-adversely impacts an individual’s legal rights or otherwise creates or modifies a
-binding, enforceable obligation; For any use intended to or which has the effect of
-discriminating against or harming individuals or groups based on online or offline
-social behavior or known or predicted personal or personality characteristics; To
-exploit any of the vulnerabilities of a specific group of persons based on their age,
-social, physical or mental characteristics, in order to materially distort the behavior
-of a person pertaining to that group in a manner that causes or is likely to cause that
-person or another person physical or psychological harm; For any use intended to or
-which has the effect of discriminating against individuals or groups based on legally
-protected characteristics or categories; To provide medical advice and medical results
-interpretation; To generate or disseminate information for the purpose to be used for
-administration of justice, law enforcement, immigration or asylum processes, such as
-predicting an individual will commit fraud/crime commitment (e.g. by text profiling,
-drawing causal relationships between assertions made in documents, indiscriminate and
-arbitrarily-targeted use).

pyproject.toml DELETED Viewed

@@ -1,48 +0,0 @@
-[build-system]
-requires = ["hatchling"]
-build-backend = "hatchling.build"
-[project]
-name = "sgm"
-dynamic = ["version"]
-description = "Stability Generative Models"
-readme = "README.md"
-license-files = { paths = ["LICENSE-CODE"] }
-requires-python = ">=3.8"
-[project.urls]
-Homepage = "https://github.com/Stability-AI/generative-models"
-[tool.hatch.version]
-path = "sgm/__init__.py"
-[tool.hatch.build]
-# This needs to be explicitly set so the configuration files
-# grafted into the `sgm` directory get included in the wheel's
-# RECORD file.
-include = [
-    "sgm",
-]
-# The force-include configurations below make Hatch copy
-# the configs/ directory (containing the various YAML files required
-# to generatively model) into the source distribution and the wheel.
-[tool.hatch.build.targets.sdist.force-include]
-"./configs" = "sgm/configs"
-[tool.hatch.build.targets.wheel.force-include]
-"./configs" = "sgm/configs"
-[tool.hatch.envs.ci]
-skip-install = false
-dependencies = [
-    "pytest"
-]
-[tool.hatch.envs.ci.scripts]
-test-inference = [
-    "pip install torch==2.0.1+cu118 torchvision==0.15.2+cu118 torchaudio==2.0.2+cu118 --index-url https://download.pytorch.org/whl/cu118",
-    "pip install -r requirements/pt2.txt",
-    "pytest -v tests/inference/test_inference.py {args}",
-]

pytest.ini DELETED Viewed

@@ -1,3 +0,0 @@
-[pytest]
-markers =
-  inference: mark as inference test (deselect with '-m "not inference"')

requirements.txt CHANGED Viewed

@@ -1,42 +1,7 @@
 https://gradio-builds.s3.amazonaws.com/756e3431d65172df986a7e335dce8136206a293a/gradio-4.7.1-py3-none-any.whl
-black==23.7.0
-chardet==5.1.0
-clip @ git+https://github.com/openai/CLIP.git
-einops>=0.6.1
-fairscale>=0.4.13
-fire>=0.5.0
-fsspec>=2023.6.0
-invisible-watermark>=0.2.0
-kornia==0.6.9
-matplotlib>=3.7.2
-natsort>=8.4.0
-ninja>=1.11.1
-numpy>=1.24.4
-omegaconf>=2.3.0
-open-clip-torch>=2.20.0
-opencv-python==4.6.0.66
-pandas>=2.0.3
-pillow>=9.5.0
-pudb>=2022.1.3
-pytorch-lightning==2.0.1
-pyyaml>=6.0.1
-scipy>=1.10.1
-streamlit>=0.73.1
-tensorboardx==2.6
-timm>=0.9.2
-tokenizers==0.12.1
-torch>=2.0.1
-torchaudio>=2.0.2
-torchdata==0.6.1
-torchmetrics>=1.0.1
-torchvision>=0.15.2
-tqdm>=4.65.0
-transformers==4.19.1
-triton==2.0.0
-urllib3<1.27,>=1.25.4
-wandb>=0.15.6
-webdataset>=0.2.33
-wheel>=0.41.0
-xformers>=0.0.20
-fire
 uuid

 https://gradio-builds.s3.amazonaws.com/756e3431d65172df986a7e335dce8136206a293a/gradio-4.7.1-py3-none-any.whl
+git+https://github.com/huggingface/diffusers.git@refs/pull/5895/head
+transformers
+accelerate
+safetensors
+opencv-python
 uuid

requirements/pt13.txt DELETED Viewed

@@ -1,40 +0,0 @@
-black==23.7.0
-chardet>=5.1.0
-clip @ git+https://github.com/openai/CLIP.git
-einops>=0.6.1
-fairscale>=0.4.13
-fire>=0.5.0
-fsspec>=2023.6.0
-invisible-watermark>=0.2.0
-kornia==0.6.9
-matplotlib>=3.7.2
-natsort>=8.4.0
-numpy>=1.24.4
-omegaconf>=2.3.0
-onnx<=1.12.0
-open-clip-torch>=2.20.0
-opencv-python==4.6.0.66
-pandas>=2.0.3
-pillow>=9.5.0
-pudb>=2022.1.3
-pytorch-lightning==1.8.5
-pyyaml>=6.0.1
-scipy>=1.10.1
-streamlit>=1.25.0
-tensorboardx==2.5.1
-timm>=0.9.2
-tokenizers==0.12.1
---extra-index-url https://download.pytorch.org/whl/cu117
-torch==1.13.1+cu117
-torchaudio==0.13.1
-torchdata==0.5.1
-torchmetrics>=1.0.1
-torchvision==0.14.1+cu117
-tqdm>=4.65.0
-transformers==4.19.1
-triton==2.0.0.post1
-urllib3<1.27,>=1.25.4
-wandb>=0.15.6
-webdataset>=0.2.33
-wheel>=0.41.0
-xformers==0.0.16

requirements/pt2.txt DELETED Viewed

@@ -1,39 +0,0 @@
-black==23.7.0
-chardet==5.1.0
-clip @ git+https://github.com/openai/CLIP.git
-einops>=0.6.1
-fairscale>=0.4.13
-fire>=0.5.0
-fsspec>=2023.6.0
-invisible-watermark>=0.2.0
-kornia==0.6.9
-matplotlib>=3.7.2
-natsort>=8.4.0
-ninja>=1.11.1
-numpy>=1.24.4
-omegaconf>=2.3.0
-open-clip-torch>=2.20.0
-opencv-python==4.6.0.66
-pandas>=2.0.3
-pillow>=9.5.0
-pudb>=2022.1.3
-pytorch-lightning==2.0.1
-pyyaml>=6.0.1
-scipy>=1.10.1
-streamlit>=0.73.1
-tensorboardx==2.6
-timm>=0.9.2
-tokenizers==0.12.1
-torch>=2.0.1
-torchaudio>=2.0.2
-torchdata==0.6.1
-torchmetrics>=1.0.1
-torchvision>=0.15.2
-tqdm>=4.65.0
-transformers==4.19.1
-triton==2.0.0
-urllib3<1.27,>=1.25.4
-wandb>=0.15.6
-webdataset>=0.2.33
-wheel>=0.41.0
-xformers>=0.0.20

scripts/.DS_Store DELETED Viewed

Binary file (6.15 kB)

scripts/__init__.py DELETED Viewed

File without changes

scripts/demo/__init__.py DELETED Viewed

File without changes

scripts/demo/detect.py DELETED Viewed

@@ -1,156 +0,0 @@
-import argparse
-import cv2
-import numpy as np
-try:
-    from imwatermark import WatermarkDecoder
-except ImportError as e:
-    try:
-        # Assume some of the other dependencies such as torch are not fulfilled
-        # import file without loading unnecessary libraries.
-        import importlib.util
-        import sys
-        spec = importlib.util.find_spec("imwatermark.maxDct")
-        assert spec is not None
-        maxDct = importlib.util.module_from_spec(spec)
-        sys.modules["maxDct"] = maxDct
-        spec.loader.exec_module(maxDct)
-        class WatermarkDecoder(object):
-            """A minimal version of
-            https://github.com/ShieldMnt/invisible-watermark/blob/main/imwatermark/watermark.py
-            to only reconstruct bits using dwtDct"""
-            def __init__(self, wm_type="bytes", length=0):
-                assert wm_type == "bits", "Only bits defined in minimal import"
-                self._wmType = wm_type
-                self._wmLen = length
-            def reconstruct(self, bits):
-                if len(bits) != self._wmLen:
-                    raise RuntimeError("bits are not matched with watermark length")
-                return bits
-            def decode(self, cv2Image, method="dwtDct", **configs):
-                (r, c, channels) = cv2Image.shape
-                if r * c < 256 * 256:
-                    raise RuntimeError("image too small, should be larger than 256x256")
-                bits = []
-                assert method == "dwtDct"
-                embed = maxDct.EmbedMaxDct(watermarks=[], wmLen=self._wmLen, **configs)
-                bits = embed.decode(cv2Image)
-                return self.reconstruct(bits)
-    except:
-        raise e
-# A fixed 48-bit message that was choosen at random
-# WATERMARK_MESSAGE = 0xB3EC907BB19E
-WATERMARK_MESSAGE = 0b101100111110110010010000011110111011000110011110
-# bin(x)[2:] gives bits of x as str, use int to convert them to 0/1
-WATERMARK_BITS = [int(bit) for bit in bin(WATERMARK_MESSAGE)[2:]]
-MATCH_VALUES = [
-    [27, "No watermark detected"],
-    [33, "Partial watermark match. Cannot determine with certainty."],
-    [
-        35,
-        (
-            "Likely watermarked. In our test 0.02% of real images were "
-            'falsely detected as "Likely watermarked"'
-        ),
-    ],
-    [
-        49,
-        (
-            "Very likely watermarked. In our test no real images were "
-            'falsely detected as "Very likely watermarked"'
-        ),
-    ],
-]
-class GetWatermarkMatch:
-    def __init__(self, watermark):
-        self.watermark = watermark
-        self.num_bits = len(self.watermark)
-        self.decoder = WatermarkDecoder("bits", self.num_bits)
-    def __call__(self, x: np.ndarray) -> np.ndarray:
-        """
-        Detects the number of matching bits the predefined watermark with one
-        or multiple images. Images should be in cv2 format, e.g. h x w x c BGR.
-        Args:
-            x: ([B], h w, c) in range [0, 255]
-        Returns:
-           number of matched bits ([B],)
-        """
-        squeeze = len(x.shape) == 3
-        if squeeze:
-            x = x[None, ...]
-        bs = x.shape[0]
-        detected = np.empty((bs, self.num_bits), dtype=bool)
-        for k in range(bs):
-            detected[k] = self.decoder.decode(x[k], "dwtDct")
-        result = np.sum(detected == self.watermark, axis=-1)
-        if squeeze:
-            return result[0]
-        else:
-            return result
-get_watermark_match = GetWatermarkMatch(WATERMARK_BITS)
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-        "filename",
-        nargs="+",
-        type=str,
-        help="Image files to check for watermarks",
-    )
-    opts = parser.parse_args()
-    print(
-        """
-        This script tries to detect watermarked images. Please be aware of
-        the following:
-        - As the watermark is supposed to be invisible, there is the risk that
-          watermarked images may not be detected.
-        - To maximize the chance of detection make sure that the image has the same
-          dimensions as when the watermark was applied (most likely 1024x1024
-          or 512x512).
-        - Specific image manipulation may drastically decrease the chance that
-          watermarks can be detected.
-        - There is also the chance that an image has the characteristics of the
-          watermark by chance.
-        - The watermark script is public, anybody may watermark any images, and
-          could therefore claim it to be generated.
-        - All numbers below are based on a test using 10,000 images without any
-          modifications after applying the watermark.
-        """
-    )
-    for fn in opts.filename:
-        image = cv2.imread(fn)
-        if image is None:
-            print(f"Couldn't read {fn}. Skipping")
-            continue
-        num_bits = get_watermark_match(image)
-        k = 0
-        while num_bits > MATCH_VALUES[k][0]:
-            k += 1
-        print(
-            f"{fn}: {MATCH_VALUES[k][1]}",
-            f"Bits that matched the watermark {num_bits} from {len(WATERMARK_BITS)}\n",
-            sep="\n\t",
-        )