metercai commited on
Commit
f45f506
·
verified ·
1 Parent(s): c561095

Delete files checkpoints/ clip/ clip_vision/ configs/ with huggingface_hub

Browse files
Files changed (39) hide show
  1. checkpoints/animaPencilXL_v500.safetensors +0 -3
  2. checkpoints/flux-hyp8-Q5_K_M.gguf +0 -3
  3. checkpoints/flux1-dev-bnb-nf4-v2.safetensors +0 -3
  4. checkpoints/flux1-dev.safetensors +0 -3
  5. checkpoints/hunyuan_dit_1.2.safetensors +0 -3
  6. checkpoints/juggernautXL_juggXIByRundiffusion.safetensors +0 -3
  7. checkpoints/playground-v2.5-1024px.safetensors +0 -3
  8. checkpoints/ponyDiffusionV6XL.safetensors +0 -3
  9. checkpoints/put_checkpoints_here +0 -0
  10. checkpoints/realisticStockPhoto_v20.safetensors +0 -3
  11. checkpoints/realisticVisionV60B1_v51VAE.safetensors +0 -3
  12. checkpoints/sd3_medium_incl_clips.safetensors +0 -3
  13. checkpoints/sd3_medium_incl_clips_t5xxlfp16.safetensors +0 -3
  14. checkpoints/sd3_medium_incl_clips_t5xxlfp8.safetensors +0 -3
  15. clip/EVA02_CLIP_L_336_psz14_s6B.pt +0 -3
  16. clip/clip_l.safetensors +0 -3
  17. clip/put_clip_or_text_encoder_models_here +0 -0
  18. clip/t5xxl_fp16.safetensors +0 -3
  19. clip/t5xxl_fp8_e4m3fn.safetensors +0 -3
  20. clip_vision/clip-vit-large-patch14.bin +0 -3
  21. clip_vision/clip-vit-large-patch14/merges.txt +0 -0
  22. clip_vision/clip-vit-large-patch14/special_tokens_map.json +0 -1
  23. clip_vision/clip-vit-large-patch14/tokenizer_config.json +0 -34
  24. clip_vision/clip-vit-large-patch14/vocab.json +0 -0
  25. clip_vision/clip_vision_vit_h.safetensors +0 -3
  26. clip_vision/model_base_caption_capfilt_large.pth +0 -3
  27. clip_vision/put_clip_vision_models_here +0 -0
  28. clip_vision/wd-v1-4-moat-tagger-v2.onnx +0 -3
  29. configs/anything_v3.yaml +0 -73
  30. configs/v1-inference.yaml +0 -70
  31. configs/v1-inference_clip_skip_2.yaml +0 -73
  32. configs/v1-inference_clip_skip_2_fp16.yaml +0 -74
  33. configs/v1-inference_fp16.yaml +0 -71
  34. configs/v1-inpainting-inference.yaml +0 -71
  35. configs/v2-inference-v.yaml +0 -68
  36. configs/v2-inference-v_fp32.yaml +0 -68
  37. configs/v2-inference.yaml +0 -67
  38. configs/v2-inference_fp32.yaml +0 -67
  39. configs/v2-inpainting-inference.yaml +0 -158
checkpoints/animaPencilXL_v500.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:896faa18cd6852ccf977e2dec76191c38f256d031204e233cb3ed76f6088d55b
3
- size 6938041144
 
 
 
 
checkpoints/flux-hyp8-Q5_K_M.gguf DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:a0dac309ffb497fde0d1bbfa0291f5371d0d05c66173df830318bc475777c68a
3
- size 8421981408
 
 
 
 
checkpoints/flux1-dev-bnb-nf4-v2.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:fef37763b8f2c8cc3463139bbb6d91aa517c605b654d4e5c540de52813f30306
3
- size 12044280207
 
 
 
 
checkpoints/flux1-dev.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:4610115bb0c89560703c892c59ac2742fa821e60ef5871b33493ba544683abd7
3
- size 23802932552
 
 
 
 
checkpoints/hunyuan_dit_1.2.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:4fb84f84079cda457d171b3c6b15d1be95b5a3e5d9825703951a99ddf92d1787
3
- size 8240228270
 
 
 
 
checkpoints/juggernautXL_juggXIByRundiffusion.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:33e58e86686f6b386c526682b5da9228ead4f91d994abd4b053442dc5b42719e
3
- size 7105350536
 
 
 
 
checkpoints/playground-v2.5-1024px.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:bcaa7dd6780974f000b17b5a6c63e6f867a75c51ffa85c67d6b196882c69b992
3
- size 6938040576
 
 
 
 
checkpoints/ponyDiffusionV6XL.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:67ab2fd8ec439a89b3fedb15cc65f54336af163c7eb5e4f2acc98f090a29b0b3
3
- size 6938041050
 
 
 
 
checkpoints/put_checkpoints_here DELETED
File without changes
checkpoints/realisticStockPhoto_v20.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:f99f3dec38a09b4834a4a073bdc45aabd42b422b4d327f5e8001afcb5ffb5f45
3
- size 6938054242
 
 
 
 
checkpoints/realisticVisionV60B1_v51VAE.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:15012c538f503ce2ebfc2c8547b268c75ccdaff7a281db55399940ff1d70e21d
3
- size 2132625894
 
 
 
 
checkpoints/sd3_medium_incl_clips.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:3bb7f21bc5fb450220f4eb78a2f276b15422309d5166a4bdeb8c3b763a3a0581
3
- size 5973224240
 
 
 
 
checkpoints/sd3_medium_incl_clips_t5xxlfp16.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:69a950c5d143ce782a7423c532c8a12b75da6a37b0e6f26a322acf4e76208912
3
- size 15761074532
 
 
 
 
checkpoints/sd3_medium_incl_clips_t5xxlfp8.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:92db4295e9c9ab8401ef60566d975656a35b0bd0f6d9ce0d083725171f7b3174
3
- size 10867168284
 
 
 
 
clip/EVA02_CLIP_L_336_psz14_s6B.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:84c3a17a228c567a155259b2245b0b59072bf7da510260a0a02ec54de6d50b05
3
- size 856461210
 
 
 
 
clip/clip_l.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:660c6f5b1abae9dc498ac2d21e1347d2abdb0cf6c0c0c8576cd796491d9a6cdd
3
- size 246144152
 
 
 
 
clip/put_clip_or_text_encoder_models_here DELETED
File without changes
clip/t5xxl_fp16.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:6e480b09fae049a72d2a8c5fbccb8d3e92febeb233bbe9dfe7256958a9167635
3
- size 9787841024
 
 
 
 
clip/t5xxl_fp8_e4m3fn.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:7d330da4816157540d6bb7838bf63a0f02f573fc48ca4d8de34bb0cbfd514f09
3
- size 4893934904
 
 
 
 
clip_vision/clip-vit-large-patch14.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:f1a17cdbe0f36fec524f5cafb1c261ea3bbbc13e346e0f74fc9eb0460dedd0d3
3
- size 1710671599
 
 
 
 
clip_vision/clip-vit-large-patch14/merges.txt DELETED
The diff for this file is too large to render. See raw diff
 
clip_vision/clip-vit-large-patch14/special_tokens_map.json DELETED
@@ -1 +0,0 @@
1
- {"bos_token": {"content": "<|startoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "eos_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "unk_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "pad_token": "<|endoftext|>"}
 
 
clip_vision/clip-vit-large-patch14/tokenizer_config.json DELETED
@@ -1,34 +0,0 @@
1
- {
2
- "unk_token": {
3
- "content": "<|endoftext|>",
4
- "single_word": false,
5
- "lstrip": false,
6
- "rstrip": false,
7
- "normalized": true,
8
- "__type": "AddedToken"
9
- },
10
- "bos_token": {
11
- "content": "<|startoftext|>",
12
- "single_word": false,
13
- "lstrip": false,
14
- "rstrip": false,
15
- "normalized": true,
16
- "__type": "AddedToken"
17
- },
18
- "eos_token": {
19
- "content": "<|endoftext|>",
20
- "single_word": false,
21
- "lstrip": false,
22
- "rstrip": false,
23
- "normalized": true,
24
- "__type": "AddedToken"
25
- },
26
- "pad_token": "<|endoftext|>",
27
- "add_prefix_space": false,
28
- "errors": "replace",
29
- "do_lower_case": true,
30
- "name_or_path": "openai/clip-vit-base-patch32",
31
- "model_max_length": 77,
32
- "special_tokens_map_file": "./special_tokens_map.json",
33
- "tokenizer_class": "CLIPTokenizer"
34
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
clip_vision/clip-vit-large-patch14/vocab.json DELETED
The diff for this file is too large to render. See raw diff
 
clip_vision/clip_vision_vit_h.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:9a11c14945fb98c7ac9a54fab5e498885731a0780260dad7adf41f6f59655ee5
3
- size 1972298538
 
 
 
 
clip_vision/model_base_caption_capfilt_large.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:96ac8749bd0a568c274ebe302b3a3748ab9be614c737f3d8c529697139174086
3
- size 896081425
 
 
 
 
clip_vision/put_clip_vision_models_here DELETED
File without changes
clip_vision/wd-v1-4-moat-tagger-v2.onnx DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:b8cef913be4c9e8d93f9f903e74271416502ce0b4b04df0ff1e2f00df488aa03
3
- size 326197340
 
 
 
 
configs/anything_v3.yaml DELETED
@@ -1,73 +0,0 @@
1
- model:
2
- base_learning_rate: 1.0e-04
3
- target: ldm.models.diffusion.ddpm.LatentDiffusion
4
- params:
5
- linear_start: 0.00085
6
- linear_end: 0.0120
7
- num_timesteps_cond: 1
8
- log_every_t: 200
9
- timesteps: 1000
10
- first_stage_key: "jpg"
11
- cond_stage_key: "txt"
12
- image_size: 64
13
- channels: 4
14
- cond_stage_trainable: false # Note: different from the one we trained before
15
- conditioning_key: crossattn
16
- monitor: val/loss_simple_ema
17
- scale_factor: 0.18215
18
- use_ema: False
19
-
20
- scheduler_config: # 10000 warmup steps
21
- target: ldm.lr_scheduler.LambdaLinearScheduler
22
- params:
23
- warm_up_steps: [ 10000 ]
24
- cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases
25
- f_start: [ 1.e-6 ]
26
- f_max: [ 1. ]
27
- f_min: [ 1. ]
28
-
29
- unet_config:
30
- target: ldm.modules.diffusionmodules.openaimodel.UNetModel
31
- params:
32
- image_size: 32 # unused
33
- in_channels: 4
34
- out_channels: 4
35
- model_channels: 320
36
- attention_resolutions: [ 4, 2, 1 ]
37
- num_res_blocks: 2
38
- channel_mult: [ 1, 2, 4, 4 ]
39
- num_heads: 8
40
- use_spatial_transformer: True
41
- transformer_depth: 1
42
- context_dim: 768
43
- use_checkpoint: True
44
- legacy: False
45
-
46
- first_stage_config:
47
- target: ldm.models.autoencoder.AutoencoderKL
48
- params:
49
- embed_dim: 4
50
- monitor: val/rec_loss
51
- ddconfig:
52
- double_z: true
53
- z_channels: 4
54
- resolution: 256
55
- in_channels: 3
56
- out_ch: 3
57
- ch: 128
58
- ch_mult:
59
- - 1
60
- - 2
61
- - 4
62
- - 4
63
- num_res_blocks: 2
64
- attn_resolutions: []
65
- dropout: 0.0
66
- lossconfig:
67
- target: torch.nn.Identity
68
-
69
- cond_stage_config:
70
- target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
71
- params:
72
- layer: "hidden"
73
- layer_idx: -2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
configs/v1-inference.yaml DELETED
@@ -1,70 +0,0 @@
1
- model:
2
- base_learning_rate: 1.0e-04
3
- target: ldm.models.diffusion.ddpm.LatentDiffusion
4
- params:
5
- linear_start: 0.00085
6
- linear_end: 0.0120
7
- num_timesteps_cond: 1
8
- log_every_t: 200
9
- timesteps: 1000
10
- first_stage_key: "jpg"
11
- cond_stage_key: "txt"
12
- image_size: 64
13
- channels: 4
14
- cond_stage_trainable: false # Note: different from the one we trained before
15
- conditioning_key: crossattn
16
- monitor: val/loss_simple_ema
17
- scale_factor: 0.18215
18
- use_ema: False
19
-
20
- scheduler_config: # 10000 warmup steps
21
- target: ldm.lr_scheduler.LambdaLinearScheduler
22
- params:
23
- warm_up_steps: [ 10000 ]
24
- cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases
25
- f_start: [ 1.e-6 ]
26
- f_max: [ 1. ]
27
- f_min: [ 1. ]
28
-
29
- unet_config:
30
- target: ldm.modules.diffusionmodules.openaimodel.UNetModel
31
- params:
32
- image_size: 32 # unused
33
- in_channels: 4
34
- out_channels: 4
35
- model_channels: 320
36
- attention_resolutions: [ 4, 2, 1 ]
37
- num_res_blocks: 2
38
- channel_mult: [ 1, 2, 4, 4 ]
39
- num_heads: 8
40
- use_spatial_transformer: True
41
- transformer_depth: 1
42
- context_dim: 768
43
- use_checkpoint: True
44
- legacy: False
45
-
46
- first_stage_config:
47
- target: ldm.models.autoencoder.AutoencoderKL
48
- params:
49
- embed_dim: 4
50
- monitor: val/rec_loss
51
- ddconfig:
52
- double_z: true
53
- z_channels: 4
54
- resolution: 256
55
- in_channels: 3
56
- out_ch: 3
57
- ch: 128
58
- ch_mult:
59
- - 1
60
- - 2
61
- - 4
62
- - 4
63
- num_res_blocks: 2
64
- attn_resolutions: []
65
- dropout: 0.0
66
- lossconfig:
67
- target: torch.nn.Identity
68
-
69
- cond_stage_config:
70
- target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
configs/v1-inference_clip_skip_2.yaml DELETED
@@ -1,73 +0,0 @@
1
- model:
2
- base_learning_rate: 1.0e-04
3
- target: ldm.models.diffusion.ddpm.LatentDiffusion
4
- params:
5
- linear_start: 0.00085
6
- linear_end: 0.0120
7
- num_timesteps_cond: 1
8
- log_every_t: 200
9
- timesteps: 1000
10
- first_stage_key: "jpg"
11
- cond_stage_key: "txt"
12
- image_size: 64
13
- channels: 4
14
- cond_stage_trainable: false # Note: different from the one we trained before
15
- conditioning_key: crossattn
16
- monitor: val/loss_simple_ema
17
- scale_factor: 0.18215
18
- use_ema: False
19
-
20
- scheduler_config: # 10000 warmup steps
21
- target: ldm.lr_scheduler.LambdaLinearScheduler
22
- params:
23
- warm_up_steps: [ 10000 ]
24
- cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases
25
- f_start: [ 1.e-6 ]
26
- f_max: [ 1. ]
27
- f_min: [ 1. ]
28
-
29
- unet_config:
30
- target: ldm.modules.diffusionmodules.openaimodel.UNetModel
31
- params:
32
- image_size: 32 # unused
33
- in_channels: 4
34
- out_channels: 4
35
- model_channels: 320
36
- attention_resolutions: [ 4, 2, 1 ]
37
- num_res_blocks: 2
38
- channel_mult: [ 1, 2, 4, 4 ]
39
- num_heads: 8
40
- use_spatial_transformer: True
41
- transformer_depth: 1
42
- context_dim: 768
43
- use_checkpoint: True
44
- legacy: False
45
-
46
- first_stage_config:
47
- target: ldm.models.autoencoder.AutoencoderKL
48
- params:
49
- embed_dim: 4
50
- monitor: val/rec_loss
51
- ddconfig:
52
- double_z: true
53
- z_channels: 4
54
- resolution: 256
55
- in_channels: 3
56
- out_ch: 3
57
- ch: 128
58
- ch_mult:
59
- - 1
60
- - 2
61
- - 4
62
- - 4
63
- num_res_blocks: 2
64
- attn_resolutions: []
65
- dropout: 0.0
66
- lossconfig:
67
- target: torch.nn.Identity
68
-
69
- cond_stage_config:
70
- target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
71
- params:
72
- layer: "hidden"
73
- layer_idx: -2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
configs/v1-inference_clip_skip_2_fp16.yaml DELETED
@@ -1,74 +0,0 @@
1
- model:
2
- base_learning_rate: 1.0e-04
3
- target: ldm.models.diffusion.ddpm.LatentDiffusion
4
- params:
5
- linear_start: 0.00085
6
- linear_end: 0.0120
7
- num_timesteps_cond: 1
8
- log_every_t: 200
9
- timesteps: 1000
10
- first_stage_key: "jpg"
11
- cond_stage_key: "txt"
12
- image_size: 64
13
- channels: 4
14
- cond_stage_trainable: false # Note: different from the one we trained before
15
- conditioning_key: crossattn
16
- monitor: val/loss_simple_ema
17
- scale_factor: 0.18215
18
- use_ema: False
19
-
20
- scheduler_config: # 10000 warmup steps
21
- target: ldm.lr_scheduler.LambdaLinearScheduler
22
- params:
23
- warm_up_steps: [ 10000 ]
24
- cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases
25
- f_start: [ 1.e-6 ]
26
- f_max: [ 1. ]
27
- f_min: [ 1. ]
28
-
29
- unet_config:
30
- target: ldm.modules.diffusionmodules.openaimodel.UNetModel
31
- params:
32
- use_fp16: True
33
- image_size: 32 # unused
34
- in_channels: 4
35
- out_channels: 4
36
- model_channels: 320
37
- attention_resolutions: [ 4, 2, 1 ]
38
- num_res_blocks: 2
39
- channel_mult: [ 1, 2, 4, 4 ]
40
- num_heads: 8
41
- use_spatial_transformer: True
42
- transformer_depth: 1
43
- context_dim: 768
44
- use_checkpoint: True
45
- legacy: False
46
-
47
- first_stage_config:
48
- target: ldm.models.autoencoder.AutoencoderKL
49
- params:
50
- embed_dim: 4
51
- monitor: val/rec_loss
52
- ddconfig:
53
- double_z: true
54
- z_channels: 4
55
- resolution: 256
56
- in_channels: 3
57
- out_ch: 3
58
- ch: 128
59
- ch_mult:
60
- - 1
61
- - 2
62
- - 4
63
- - 4
64
- num_res_blocks: 2
65
- attn_resolutions: []
66
- dropout: 0.0
67
- lossconfig:
68
- target: torch.nn.Identity
69
-
70
- cond_stage_config:
71
- target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
72
- params:
73
- layer: "hidden"
74
- layer_idx: -2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
configs/v1-inference_fp16.yaml DELETED
@@ -1,71 +0,0 @@
1
- model:
2
- base_learning_rate: 1.0e-04
3
- target: ldm.models.diffusion.ddpm.LatentDiffusion
4
- params:
5
- linear_start: 0.00085
6
- linear_end: 0.0120
7
- num_timesteps_cond: 1
8
- log_every_t: 200
9
- timesteps: 1000
10
- first_stage_key: "jpg"
11
- cond_stage_key: "txt"
12
- image_size: 64
13
- channels: 4
14
- cond_stage_trainable: false # Note: different from the one we trained before
15
- conditioning_key: crossattn
16
- monitor: val/loss_simple_ema
17
- scale_factor: 0.18215
18
- use_ema: False
19
-
20
- scheduler_config: # 10000 warmup steps
21
- target: ldm.lr_scheduler.LambdaLinearScheduler
22
- params:
23
- warm_up_steps: [ 10000 ]
24
- cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases
25
- f_start: [ 1.e-6 ]
26
- f_max: [ 1. ]
27
- f_min: [ 1. ]
28
-
29
- unet_config:
30
- target: ldm.modules.diffusionmodules.openaimodel.UNetModel
31
- params:
32
- use_fp16: True
33
- image_size: 32 # unused
34
- in_channels: 4
35
- out_channels: 4
36
- model_channels: 320
37
- attention_resolutions: [ 4, 2, 1 ]
38
- num_res_blocks: 2
39
- channel_mult: [ 1, 2, 4, 4 ]
40
- num_heads: 8
41
- use_spatial_transformer: True
42
- transformer_depth: 1
43
- context_dim: 768
44
- use_checkpoint: True
45
- legacy: False
46
-
47
- first_stage_config:
48
- target: ldm.models.autoencoder.AutoencoderKL
49
- params:
50
- embed_dim: 4
51
- monitor: val/rec_loss
52
- ddconfig:
53
- double_z: true
54
- z_channels: 4
55
- resolution: 256
56
- in_channels: 3
57
- out_ch: 3
58
- ch: 128
59
- ch_mult:
60
- - 1
61
- - 2
62
- - 4
63
- - 4
64
- num_res_blocks: 2
65
- attn_resolutions: []
66
- dropout: 0.0
67
- lossconfig:
68
- target: torch.nn.Identity
69
-
70
- cond_stage_config:
71
- target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
configs/v1-inpainting-inference.yaml DELETED
@@ -1,71 +0,0 @@
1
- model:
2
- base_learning_rate: 7.5e-05
3
- target: ldm.models.diffusion.ddpm.LatentInpaintDiffusion
4
- params:
5
- linear_start: 0.00085
6
- linear_end: 0.0120
7
- num_timesteps_cond: 1
8
- log_every_t: 200
9
- timesteps: 1000
10
- first_stage_key: "jpg"
11
- cond_stage_key: "txt"
12
- image_size: 64
13
- channels: 4
14
- cond_stage_trainable: false # Note: different from the one we trained before
15
- conditioning_key: hybrid # important
16
- monitor: val/loss_simple_ema
17
- scale_factor: 0.18215
18
- finetune_keys: null
19
-
20
- scheduler_config: # 10000 warmup steps
21
- target: ldm.lr_scheduler.LambdaLinearScheduler
22
- params:
23
- warm_up_steps: [ 2500 ] # NOTE for resuming. use 10000 if starting from scratch
24
- cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases
25
- f_start: [ 1.e-6 ]
26
- f_max: [ 1. ]
27
- f_min: [ 1. ]
28
-
29
- unet_config:
30
- target: ldm.modules.diffusionmodules.openaimodel.UNetModel
31
- params:
32
- image_size: 32 # unused
33
- in_channels: 9 # 4 data + 4 downscaled image + 1 mask
34
- out_channels: 4
35
- model_channels: 320
36
- attention_resolutions: [ 4, 2, 1 ]
37
- num_res_blocks: 2
38
- channel_mult: [ 1, 2, 4, 4 ]
39
- num_heads: 8
40
- use_spatial_transformer: True
41
- transformer_depth: 1
42
- context_dim: 768
43
- use_checkpoint: True
44
- legacy: False
45
-
46
- first_stage_config:
47
- target: ldm.models.autoencoder.AutoencoderKL
48
- params:
49
- embed_dim: 4
50
- monitor: val/rec_loss
51
- ddconfig:
52
- double_z: true
53
- z_channels: 4
54
- resolution: 256
55
- in_channels: 3
56
- out_ch: 3
57
- ch: 128
58
- ch_mult:
59
- - 1
60
- - 2
61
- - 4
62
- - 4
63
- num_res_blocks: 2
64
- attn_resolutions: []
65
- dropout: 0.0
66
- lossconfig:
67
- target: torch.nn.Identity
68
-
69
- cond_stage_config:
70
- target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
71
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
configs/v2-inference-v.yaml DELETED
@@ -1,68 +0,0 @@
1
- model:
2
- base_learning_rate: 1.0e-4
3
- target: ldm.models.diffusion.ddpm.LatentDiffusion
4
- params:
5
- parameterization: "v"
6
- linear_start: 0.00085
7
- linear_end: 0.0120
8
- num_timesteps_cond: 1
9
- log_every_t: 200
10
- timesteps: 1000
11
- first_stage_key: "jpg"
12
- cond_stage_key: "txt"
13
- image_size: 64
14
- channels: 4
15
- cond_stage_trainable: false
16
- conditioning_key: crossattn
17
- monitor: val/loss_simple_ema
18
- scale_factor: 0.18215
19
- use_ema: False # we set this to false because this is an inference only config
20
-
21
- unet_config:
22
- target: ldm.modules.diffusionmodules.openaimodel.UNetModel
23
- params:
24
- use_checkpoint: True
25
- use_fp16: True
26
- image_size: 32 # unused
27
- in_channels: 4
28
- out_channels: 4
29
- model_channels: 320
30
- attention_resolutions: [ 4, 2, 1 ]
31
- num_res_blocks: 2
32
- channel_mult: [ 1, 2, 4, 4 ]
33
- num_head_channels: 64 # need to fix for flash-attn
34
- use_spatial_transformer: True
35
- use_linear_in_transformer: True
36
- transformer_depth: 1
37
- context_dim: 1024
38
- legacy: False
39
-
40
- first_stage_config:
41
- target: ldm.models.autoencoder.AutoencoderKL
42
- params:
43
- embed_dim: 4
44
- monitor: val/rec_loss
45
- ddconfig:
46
- #attn_type: "vanilla-xformers"
47
- double_z: true
48
- z_channels: 4
49
- resolution: 256
50
- in_channels: 3
51
- out_ch: 3
52
- ch: 128
53
- ch_mult:
54
- - 1
55
- - 2
56
- - 4
57
- - 4
58
- num_res_blocks: 2
59
- attn_resolutions: []
60
- dropout: 0.0
61
- lossconfig:
62
- target: torch.nn.Identity
63
-
64
- cond_stage_config:
65
- target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder
66
- params:
67
- freeze: True
68
- layer: "penultimate"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
configs/v2-inference-v_fp32.yaml DELETED
@@ -1,68 +0,0 @@
1
- model:
2
- base_learning_rate: 1.0e-4
3
- target: ldm.models.diffusion.ddpm.LatentDiffusion
4
- params:
5
- parameterization: "v"
6
- linear_start: 0.00085
7
- linear_end: 0.0120
8
- num_timesteps_cond: 1
9
- log_every_t: 200
10
- timesteps: 1000
11
- first_stage_key: "jpg"
12
- cond_stage_key: "txt"
13
- image_size: 64
14
- channels: 4
15
- cond_stage_trainable: false
16
- conditioning_key: crossattn
17
- monitor: val/loss_simple_ema
18
- scale_factor: 0.18215
19
- use_ema: False # we set this to false because this is an inference only config
20
-
21
- unet_config:
22
- target: ldm.modules.diffusionmodules.openaimodel.UNetModel
23
- params:
24
- use_checkpoint: True
25
- use_fp16: False
26
- image_size: 32 # unused
27
- in_channels: 4
28
- out_channels: 4
29
- model_channels: 320
30
- attention_resolutions: [ 4, 2, 1 ]
31
- num_res_blocks: 2
32
- channel_mult: [ 1, 2, 4, 4 ]
33
- num_head_channels: 64 # need to fix for flash-attn
34
- use_spatial_transformer: True
35
- use_linear_in_transformer: True
36
- transformer_depth: 1
37
- context_dim: 1024
38
- legacy: False
39
-
40
- first_stage_config:
41
- target: ldm.models.autoencoder.AutoencoderKL
42
- params:
43
- embed_dim: 4
44
- monitor: val/rec_loss
45
- ddconfig:
46
- #attn_type: "vanilla-xformers"
47
- double_z: true
48
- z_channels: 4
49
- resolution: 256
50
- in_channels: 3
51
- out_ch: 3
52
- ch: 128
53
- ch_mult:
54
- - 1
55
- - 2
56
- - 4
57
- - 4
58
- num_res_blocks: 2
59
- attn_resolutions: []
60
- dropout: 0.0
61
- lossconfig:
62
- target: torch.nn.Identity
63
-
64
- cond_stage_config:
65
- target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder
66
- params:
67
- freeze: True
68
- layer: "penultimate"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
configs/v2-inference.yaml DELETED
@@ -1,67 +0,0 @@
1
- model:
2
- base_learning_rate: 1.0e-4
3
- target: ldm.models.diffusion.ddpm.LatentDiffusion
4
- params:
5
- linear_start: 0.00085
6
- linear_end: 0.0120
7
- num_timesteps_cond: 1
8
- log_every_t: 200
9
- timesteps: 1000
10
- first_stage_key: "jpg"
11
- cond_stage_key: "txt"
12
- image_size: 64
13
- channels: 4
14
- cond_stage_trainable: false
15
- conditioning_key: crossattn
16
- monitor: val/loss_simple_ema
17
- scale_factor: 0.18215
18
- use_ema: False # we set this to false because this is an inference only config
19
-
20
- unet_config:
21
- target: ldm.modules.diffusionmodules.openaimodel.UNetModel
22
- params:
23
- use_checkpoint: True
24
- use_fp16: True
25
- image_size: 32 # unused
26
- in_channels: 4
27
- out_channels: 4
28
- model_channels: 320
29
- attention_resolutions: [ 4, 2, 1 ]
30
- num_res_blocks: 2
31
- channel_mult: [ 1, 2, 4, 4 ]
32
- num_head_channels: 64 # need to fix for flash-attn
33
- use_spatial_transformer: True
34
- use_linear_in_transformer: True
35
- transformer_depth: 1
36
- context_dim: 1024
37
- legacy: False
38
-
39
- first_stage_config:
40
- target: ldm.models.autoencoder.AutoencoderKL
41
- params:
42
- embed_dim: 4
43
- monitor: val/rec_loss
44
- ddconfig:
45
- #attn_type: "vanilla-xformers"
46
- double_z: true
47
- z_channels: 4
48
- resolution: 256
49
- in_channels: 3
50
- out_ch: 3
51
- ch: 128
52
- ch_mult:
53
- - 1
54
- - 2
55
- - 4
56
- - 4
57
- num_res_blocks: 2
58
- attn_resolutions: []
59
- dropout: 0.0
60
- lossconfig:
61
- target: torch.nn.Identity
62
-
63
- cond_stage_config:
64
- target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder
65
- params:
66
- freeze: True
67
- layer: "penultimate"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
configs/v2-inference_fp32.yaml DELETED
@@ -1,67 +0,0 @@
1
- model:
2
- base_learning_rate: 1.0e-4
3
- target: ldm.models.diffusion.ddpm.LatentDiffusion
4
- params:
5
- linear_start: 0.00085
6
- linear_end: 0.0120
7
- num_timesteps_cond: 1
8
- log_every_t: 200
9
- timesteps: 1000
10
- first_stage_key: "jpg"
11
- cond_stage_key: "txt"
12
- image_size: 64
13
- channels: 4
14
- cond_stage_trainable: false
15
- conditioning_key: crossattn
16
- monitor: val/loss_simple_ema
17
- scale_factor: 0.18215
18
- use_ema: False # we set this to false because this is an inference only config
19
-
20
- unet_config:
21
- target: ldm.modules.diffusionmodules.openaimodel.UNetModel
22
- params:
23
- use_checkpoint: True
24
- use_fp16: False
25
- image_size: 32 # unused
26
- in_channels: 4
27
- out_channels: 4
28
- model_channels: 320
29
- attention_resolutions: [ 4, 2, 1 ]
30
- num_res_blocks: 2
31
- channel_mult: [ 1, 2, 4, 4 ]
32
- num_head_channels: 64 # need to fix for flash-attn
33
- use_spatial_transformer: True
34
- use_linear_in_transformer: True
35
- transformer_depth: 1
36
- context_dim: 1024
37
- legacy: False
38
-
39
- first_stage_config:
40
- target: ldm.models.autoencoder.AutoencoderKL
41
- params:
42
- embed_dim: 4
43
- monitor: val/rec_loss
44
- ddconfig:
45
- #attn_type: "vanilla-xformers"
46
- double_z: true
47
- z_channels: 4
48
- resolution: 256
49
- in_channels: 3
50
- out_ch: 3
51
- ch: 128
52
- ch_mult:
53
- - 1
54
- - 2
55
- - 4
56
- - 4
57
- num_res_blocks: 2
58
- attn_resolutions: []
59
- dropout: 0.0
60
- lossconfig:
61
- target: torch.nn.Identity
62
-
63
- cond_stage_config:
64
- target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder
65
- params:
66
- freeze: True
67
- layer: "penultimate"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
configs/v2-inpainting-inference.yaml DELETED
@@ -1,158 +0,0 @@
1
- model:
2
- base_learning_rate: 5.0e-05
3
- target: ldm.models.diffusion.ddpm.LatentInpaintDiffusion
4
- params:
5
- linear_start: 0.00085
6
- linear_end: 0.0120
7
- num_timesteps_cond: 1
8
- log_every_t: 200
9
- timesteps: 1000
10
- first_stage_key: "jpg"
11
- cond_stage_key: "txt"
12
- image_size: 64
13
- channels: 4
14
- cond_stage_trainable: false
15
- conditioning_key: hybrid
16
- scale_factor: 0.18215
17
- monitor: val/loss_simple_ema
18
- finetune_keys: null
19
- use_ema: False
20
-
21
- unet_config:
22
- target: ldm.modules.diffusionmodules.openaimodel.UNetModel
23
- params:
24
- use_checkpoint: True
25
- image_size: 32 # unused
26
- in_channels: 9
27
- out_channels: 4
28
- model_channels: 320
29
- attention_resolutions: [ 4, 2, 1 ]
30
- num_res_blocks: 2
31
- channel_mult: [ 1, 2, 4, 4 ]
32
- num_head_channels: 64 # need to fix for flash-attn
33
- use_spatial_transformer: True
34
- use_linear_in_transformer: True
35
- transformer_depth: 1
36
- context_dim: 1024
37
- legacy: False
38
-
39
- first_stage_config:
40
- target: ldm.models.autoencoder.AutoencoderKL
41
- params:
42
- embed_dim: 4
43
- monitor: val/rec_loss
44
- ddconfig:
45
- #attn_type: "vanilla-xformers"
46
- double_z: true
47
- z_channels: 4
48
- resolution: 256
49
- in_channels: 3
50
- out_ch: 3
51
- ch: 128
52
- ch_mult:
53
- - 1
54
- - 2
55
- - 4
56
- - 4
57
- num_res_blocks: 2
58
- attn_resolutions: [ ]
59
- dropout: 0.0
60
- lossconfig:
61
- target: torch.nn.Identity
62
-
63
- cond_stage_config:
64
- target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder
65
- params:
66
- freeze: True
67
- layer: "penultimate"
68
-
69
-
70
- data:
71
- target: ldm.data.laion.WebDataModuleFromConfig
72
- params:
73
- tar_base: null # for concat as in LAION-A
74
- p_unsafe_threshold: 0.1
75
- filter_word_list: "data/filters.yaml"
76
- max_pwatermark: 0.45
77
- batch_size: 8
78
- num_workers: 6
79
- multinode: True
80
- min_size: 512
81
- train:
82
- shards:
83
- - "pipe:aws s3 cp s3://stability-aws/laion-a-native/part-0/{00000..18699}.tar -"
84
- - "pipe:aws s3 cp s3://stability-aws/laion-a-native/part-1/{00000..18699}.tar -"
85
- - "pipe:aws s3 cp s3://stability-aws/laion-a-native/part-2/{00000..18699}.tar -"
86
- - "pipe:aws s3 cp s3://stability-aws/laion-a-native/part-3/{00000..18699}.tar -"
87
- - "pipe:aws s3 cp s3://stability-aws/laion-a-native/part-4/{00000..18699}.tar -" #{00000-94333}.tar"
88
- shuffle: 10000
89
- image_key: jpg
90
- image_transforms:
91
- - target: torchvision.transforms.Resize
92
- params:
93
- size: 512
94
- interpolation: 3
95
- - target: torchvision.transforms.RandomCrop
96
- params:
97
- size: 512
98
- postprocess:
99
- target: ldm.data.laion.AddMask
100
- params:
101
- mode: "512train-large"
102
- p_drop: 0.25
103
- # NOTE use enough shards to avoid empty validation loops in workers
104
- validation:
105
- shards:
106
- - "pipe:aws s3 cp s3://deep-floyd-s3/datasets/laion_cleaned-part5/{93001..94333}.tar - "
107
- shuffle: 0
108
- image_key: jpg
109
- image_transforms:
110
- - target: torchvision.transforms.Resize
111
- params:
112
- size: 512
113
- interpolation: 3
114
- - target: torchvision.transforms.CenterCrop
115
- params:
116
- size: 512
117
- postprocess:
118
- target: ldm.data.laion.AddMask
119
- params:
120
- mode: "512train-large"
121
- p_drop: 0.25
122
-
123
- lightning:
124
- find_unused_parameters: True
125
- modelcheckpoint:
126
- params:
127
- every_n_train_steps: 5000
128
-
129
- callbacks:
130
- metrics_over_trainsteps_checkpoint:
131
- params:
132
- every_n_train_steps: 10000
133
-
134
- image_logger:
135
- target: main.ImageLogger
136
- params:
137
- enable_autocast: False
138
- disabled: False
139
- batch_frequency: 1000
140
- max_images: 4
141
- increase_log_steps: False
142
- log_first_step: False
143
- log_images_kwargs:
144
- use_ema_scope: False
145
- inpaint: False
146
- plot_progressive_rows: False
147
- plot_diffusion_rows: False
148
- N: 4
149
- unconditional_guidance_scale: 5.0
150
- unconditional_guidance_label: [""]
151
- ddim_steps: 50 # todo check these out for depth2img,
152
- ddim_eta: 0.0 # todo check these out for depth2img,
153
-
154
- trainer:
155
- benchmark: True
156
- val_check_interval: 5000000
157
- num_sanity_val_steps: 0
158
- accumulate_grad_batches: 1