Divyasreepat commited on
Commit
8f04540
1 Parent(s): 890853a

Upload folder using huggingface_hub

Browse files
README.md ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: keras-hub
3
+ ---
4
+ This is a [`StableDiffusion3` model](https://keras.io/api/keras_hub/models/stable_diffusion3) uploaded using the KerasHub library and can be used with JAX, TensorFlow, and PyTorch backends.
5
+ Model config:
6
+ * **name:** stable_diffusion_3.5_backbone
7
+ * **trainable:** True
8
+ * **mmdit_patch_size:** 2
9
+ * **mmdit_hidden_dim:** 2432
10
+ * **mmdit_num_layers:** 38
11
+ * **mmdit_num_heads:** 38
12
+ * **mmdit_position_size:** 192
13
+ * **mmdit_qk_norm:** rms_norm
14
+ * **vae:** {'module': 'keras_hub.src.models.vae.vae_backbone', 'class_name': 'VAEBackbone', 'config': {'name': 'vae', 'trainable': True, 'encoder_num_filters': [128, 256, 512, 512], 'encoder_num_blocks': [2, 2, 2, 2], 'decoder_num_filters': [512, 512, 256, 128], 'decoder_num_blocks': [3, 3, 3, 3], 'sampler_method': 'sample', 'input_channels': 3, 'sample_channels': 32, 'output_channels': 3, 'scale': 1.5305, 'shift': 0.0609}, 'registered_name': 'VAEBackbone'}
15
+ * **clip_l:** {'module': 'keras_hub.src.models.clip.clip_text_encoder', 'class_name': 'CLIPTextEncoder', 'config': {'name': 'clip_l', 'trainable': True, 'vocabulary_size': 49408, 'embedding_dim': 768, 'hidden_dim': 768, 'num_layers': 12, 'num_heads': 12, 'intermediate_dim': 3072, 'intermediate_activation': 'quick_gelu', 'intermediate_output_index': 10, 'max_sequence_length': 77}, 'registered_name': 'CLIPTextEncoder'}
16
+ * **clip_g:** {'module': 'keras_hub.src.models.clip.clip_text_encoder', 'class_name': 'CLIPTextEncoder', 'config': {'name': 'clip_g', 'trainable': True, 'vocabulary_size': 49408, 'embedding_dim': 1280, 'hidden_dim': 1280, 'num_layers': 32, 'num_heads': 20, 'intermediate_dim': 5120, 'intermediate_activation': 'gelu', 'intermediate_output_index': 30, 'max_sequence_length': 77}, 'registered_name': 'CLIPTextEncoder'}
17
+ * **t5:** None
18
+ * **latent_channels:** 16
19
+ * **output_channels:** 3
20
+ * **num_train_timesteps:** 1000
21
+ * **shift:** 3.0
22
+ * **image_shape:** [1024, 1024, 3]
23
+
24
+ This model card has been generated automatically and should be completed by the model author. See [Model Cards documentation](https://huggingface.co/docs/hub/model-cards) for more information.
assets/clip_g_tokenizer/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
assets/clip_g_tokenizer/vocabulary.json ADDED
The diff for this file is too large to render. See raw diff
 
assets/clip_l_tokenizer/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
assets/clip_l_tokenizer/vocabulary.json ADDED
The diff for this file is too large to render. See raw diff
 
clip_g_preprocessor.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "module": "keras_hub.src.models.clip.clip_preprocessor",
3
+ "class_name": "CLIPPreprocessor",
4
+ "config": {
5
+ "name": "clip_g_preprocessor",
6
+ "trainable": true,
7
+ "dtype": {
8
+ "module": "keras",
9
+ "class_name": "DTypePolicy",
10
+ "config": {
11
+ "name": "bfloat16"
12
+ },
13
+ "registered_name": null
14
+ },
15
+ "tokenizer": {
16
+ "module": "keras_hub.src.models.clip.clip_tokenizer",
17
+ "class_name": "CLIPTokenizer",
18
+ "config": {
19
+ "name": "clip_g_tokenizer",
20
+ "trainable": true,
21
+ "dtype": {
22
+ "module": "keras",
23
+ "class_name": "DTypePolicy",
24
+ "config": {
25
+ "name": "int32"
26
+ },
27
+ "registered_name": null
28
+ },
29
+ "config_file": "clip_g_tokenizer.json",
30
+ "sequence_length": null,
31
+ "add_prefix_space": false,
32
+ "pad_with_end_token": false
33
+ },
34
+ "registered_name": "keras_hub>CLIPTokenizer"
35
+ },
36
+ "config_file": "clip_g_preprocessor.json",
37
+ "sequence_length": 77,
38
+ "add_start_token": true,
39
+ "add_end_token": true,
40
+ "to_lower": true
41
+ },
42
+ "registered_name": "keras_hub>CLIPPreprocessor"
43
+ }
clip_g_tokenizer.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "module": "keras_hub.src.models.clip.clip_tokenizer",
3
+ "class_name": "CLIPTokenizer",
4
+ "config": {
5
+ "name": "clip_g_tokenizer",
6
+ "trainable": true,
7
+ "dtype": {
8
+ "module": "keras",
9
+ "class_name": "DTypePolicy",
10
+ "config": {
11
+ "name": "int32"
12
+ },
13
+ "registered_name": null
14
+ },
15
+ "config_file": "clip_g_tokenizer.json",
16
+ "sequence_length": null,
17
+ "add_prefix_space": false,
18
+ "pad_with_end_token": false
19
+ },
20
+ "registered_name": "keras_hub>CLIPTokenizer"
21
+ }
clip_l_preprocessor.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "module": "keras_hub.src.models.clip.clip_preprocessor",
3
+ "class_name": "CLIPPreprocessor",
4
+ "config": {
5
+ "name": "clip_l_preprocessor",
6
+ "trainable": true,
7
+ "dtype": {
8
+ "module": "keras",
9
+ "class_name": "DTypePolicy",
10
+ "config": {
11
+ "name": "bfloat16"
12
+ },
13
+ "registered_name": null
14
+ },
15
+ "tokenizer": {
16
+ "module": "keras_hub.src.models.clip.clip_tokenizer",
17
+ "class_name": "CLIPTokenizer",
18
+ "config": {
19
+ "name": "clip_l_tokenizer",
20
+ "trainable": true,
21
+ "dtype": {
22
+ "module": "keras",
23
+ "class_name": "DTypePolicy",
24
+ "config": {
25
+ "name": "int32"
26
+ },
27
+ "registered_name": null
28
+ },
29
+ "config_file": "clip_l_tokenizer.json",
30
+ "sequence_length": null,
31
+ "add_prefix_space": false,
32
+ "pad_with_end_token": true
33
+ },
34
+ "registered_name": "keras_hub>CLIPTokenizer"
35
+ },
36
+ "config_file": "clip_l_preprocessor.json",
37
+ "sequence_length": 77,
38
+ "add_start_token": true,
39
+ "add_end_token": true,
40
+ "to_lower": true
41
+ },
42
+ "registered_name": "keras_hub>CLIPPreprocessor"
43
+ }
clip_l_tokenizer.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "module": "keras_hub.src.models.clip.clip_tokenizer",
3
+ "class_name": "CLIPTokenizer",
4
+ "config": {
5
+ "name": "clip_l_tokenizer",
6
+ "trainable": true,
7
+ "dtype": {
8
+ "module": "keras",
9
+ "class_name": "DTypePolicy",
10
+ "config": {
11
+ "name": "int32"
12
+ },
13
+ "registered_name": null
14
+ },
15
+ "config_file": "clip_l_tokenizer.json",
16
+ "sequence_length": null,
17
+ "add_prefix_space": false,
18
+ "pad_with_end_token": true
19
+ },
20
+ "registered_name": "keras_hub>CLIPTokenizer"
21
+ }
config.json ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "module": "keras_hub.src.models.stable_diffusion_3.stable_diffusion_3_backbone",
3
+ "class_name": "StableDiffusion3Backbone",
4
+ "config": {
5
+ "name": "stable_diffusion_3.5_backbone",
6
+ "trainable": true,
7
+ "mmdit_patch_size": 2,
8
+ "mmdit_hidden_dim": 2432,
9
+ "mmdit_num_layers": 38,
10
+ "mmdit_num_heads": 38,
11
+ "mmdit_position_size": 192,
12
+ "mmdit_qk_norm": "rms_norm",
13
+ "vae": {
14
+ "module": "keras_hub.src.models.vae.vae_backbone",
15
+ "class_name": "VAEBackbone",
16
+ "config": {
17
+ "name": "vae",
18
+ "trainable": true,
19
+ "encoder_num_filters": [
20
+ 128,
21
+ 256,
22
+ 512,
23
+ 512
24
+ ],
25
+ "encoder_num_blocks": [
26
+ 2,
27
+ 2,
28
+ 2,
29
+ 2
30
+ ],
31
+ "decoder_num_filters": [
32
+ 512,
33
+ 512,
34
+ 256,
35
+ 128
36
+ ],
37
+ "decoder_num_blocks": [
38
+ 3,
39
+ 3,
40
+ 3,
41
+ 3
42
+ ],
43
+ "sampler_method": "sample",
44
+ "input_channels": 3,
45
+ "sample_channels": 32,
46
+ "output_channels": 3,
47
+ "scale": 1.5305,
48
+ "shift": 0.0609
49
+ },
50
+ "registered_name": "VAEBackbone"
51
+ },
52
+ "clip_l": {
53
+ "module": "keras_hub.src.models.clip.clip_text_encoder",
54
+ "class_name": "CLIPTextEncoder",
55
+ "config": {
56
+ "name": "clip_l",
57
+ "trainable": true,
58
+ "vocabulary_size": 49408,
59
+ "embedding_dim": 768,
60
+ "hidden_dim": 768,
61
+ "num_layers": 12,
62
+ "num_heads": 12,
63
+ "intermediate_dim": 3072,
64
+ "intermediate_activation": "quick_gelu",
65
+ "intermediate_output_index": 10,
66
+ "max_sequence_length": 77
67
+ },
68
+ "registered_name": "CLIPTextEncoder"
69
+ },
70
+ "clip_g": {
71
+ "module": "keras_hub.src.models.clip.clip_text_encoder",
72
+ "class_name": "CLIPTextEncoder",
73
+ "config": {
74
+ "name": "clip_g",
75
+ "trainable": true,
76
+ "vocabulary_size": 49408,
77
+ "embedding_dim": 1280,
78
+ "hidden_dim": 1280,
79
+ "num_layers": 32,
80
+ "num_heads": 20,
81
+ "intermediate_dim": 5120,
82
+ "intermediate_activation": "gelu",
83
+ "intermediate_output_index": 30,
84
+ "max_sequence_length": 77
85
+ },
86
+ "registered_name": "CLIPTextEncoder"
87
+ },
88
+ "t5": null,
89
+ "latent_channels": 16,
90
+ "output_channels": 3,
91
+ "num_train_timesteps": 1000,
92
+ "shift": 3.0,
93
+ "image_shape": [
94
+ 1024,
95
+ 1024,
96
+ 3
97
+ ]
98
+ },
99
+ "registered_name": "keras_hub>StableDiffusion3Backbone"
100
+ }
metadata.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "keras_version": "3.6.0",
3
+ "keras_hub_version": "0.17.0.dev0",
4
+ "parameter_count": 9048410595,
5
+ "date_saved": "2024-10-28@23:51:41"
6
+ }
model.weights.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12a09813235f449372c6a3ab0aa9e2a1ae30541e28b4af9a74a924ad3fd3e749
3
+ size 18101757720
preprocessor.json ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "module": "keras_hub.src.models.stable_diffusion_3.stable_diffusion_3_text_to_image_preprocessor",
3
+ "class_name": "StableDiffusion3TextToImagePreprocessor",
4
+ "config": {
5
+ "name": "stable_diffusion_3_text_to_image_preprocessor",
6
+ "trainable": true,
7
+ "dtype": {
8
+ "module": "keras",
9
+ "class_name": "DTypePolicy",
10
+ "config": {
11
+ "name": "bfloat16"
12
+ },
13
+ "registered_name": null
14
+ },
15
+ "config_file": "preprocessor.json",
16
+ "clip_l_preprocessor": {
17
+ "module": "keras_hub.src.models.clip.clip_preprocessor",
18
+ "class_name": "CLIPPreprocessor",
19
+ "config": {
20
+ "name": "clip_l_preprocessor",
21
+ "trainable": true,
22
+ "dtype": {
23
+ "module": "keras",
24
+ "class_name": "DTypePolicy",
25
+ "config": {
26
+ "name": "bfloat16"
27
+ },
28
+ "registered_name": null
29
+ },
30
+ "tokenizer": {
31
+ "module": "keras_hub.src.models.clip.clip_tokenizer",
32
+ "class_name": "CLIPTokenizer",
33
+ "config": {
34
+ "name": "clip_l_tokenizer",
35
+ "trainable": true,
36
+ "dtype": {
37
+ "module": "keras",
38
+ "class_name": "DTypePolicy",
39
+ "config": {
40
+ "name": "int32"
41
+ },
42
+ "registered_name": null
43
+ },
44
+ "config_file": "clip_l_tokenizer.json",
45
+ "sequence_length": null,
46
+ "add_prefix_space": false,
47
+ "pad_with_end_token": true
48
+ },
49
+ "registered_name": "keras_hub>CLIPTokenizer"
50
+ },
51
+ "config_file": "clip_l_preprocessor.json",
52
+ "sequence_length": 77,
53
+ "add_start_token": true,
54
+ "add_end_token": true,
55
+ "to_lower": true
56
+ },
57
+ "registered_name": "keras_hub>CLIPPreprocessor"
58
+ },
59
+ "clip_g_preprocessor": {
60
+ "module": "keras_hub.src.models.clip.clip_preprocessor",
61
+ "class_name": "CLIPPreprocessor",
62
+ "config": {
63
+ "name": "clip_g_preprocessor",
64
+ "trainable": true,
65
+ "dtype": {
66
+ "module": "keras",
67
+ "class_name": "DTypePolicy",
68
+ "config": {
69
+ "name": "bfloat16"
70
+ },
71
+ "registered_name": null
72
+ },
73
+ "tokenizer": {
74
+ "module": "keras_hub.src.models.clip.clip_tokenizer",
75
+ "class_name": "CLIPTokenizer",
76
+ "config": {
77
+ "name": "clip_g_tokenizer",
78
+ "trainable": true,
79
+ "dtype": {
80
+ "module": "keras",
81
+ "class_name": "DTypePolicy",
82
+ "config": {
83
+ "name": "int32"
84
+ },
85
+ "registered_name": null
86
+ },
87
+ "config_file": "clip_g_tokenizer.json",
88
+ "sequence_length": null,
89
+ "add_prefix_space": false,
90
+ "pad_with_end_token": false
91
+ },
92
+ "registered_name": "keras_hub>CLIPTokenizer"
93
+ },
94
+ "config_file": "clip_g_preprocessor.json",
95
+ "sequence_length": 77,
96
+ "add_start_token": true,
97
+ "add_end_token": true,
98
+ "to_lower": true
99
+ },
100
+ "registered_name": "keras_hub>CLIPPreprocessor"
101
+ },
102
+ "t5_preprocessor": null
103
+ },
104
+ "registered_name": "keras_hub>StableDiffusion3TextToImagePreprocessor"
105
+ }