Upload MusicgenMelodyForConditionalGeneration
Browse files- config.json +2 -2
- model-00001-of-00003.safetensors +2 -2
- model-00002-of-00003.safetensors +2 -2
- model-00003-of-00003.safetensors +2 -2
- model.safetensors.index.json +13 -5
config.json
CHANGED
@@ -105,7 +105,7 @@
|
|
105 |
"add_cross_attention": false,
|
106 |
"architectures": null,
|
107 |
"attention_dropout": 0.0,
|
108 |
-
"audio_channels":
|
109 |
"bad_words_ids": null,
|
110 |
"begin_suppress_tokens": null,
|
111 |
"bos_token_id": 2048,
|
@@ -145,7 +145,7 @@
|
|
145 |
"num_attention_heads": 32,
|
146 |
"num_beam_groups": 1,
|
147 |
"num_beams": 1,
|
148 |
-
"num_codebooks":
|
149 |
"num_hidden_layers": 48,
|
150 |
"num_return_sequences": 1,
|
151 |
"output_attentions": false,
|
|
|
105 |
"add_cross_attention": false,
|
106 |
"architectures": null,
|
107 |
"attention_dropout": 0.0,
|
108 |
+
"audio_channels": 2,
|
109 |
"bad_words_ids": null,
|
110 |
"begin_suppress_tokens": null,
|
111 |
"bos_token_id": 2048,
|
|
|
145 |
"num_attention_heads": 32,
|
146 |
"num_beam_groups": 1,
|
147 |
"num_beams": 1,
|
148 |
+
"num_codebooks": 8,
|
149 |
"num_hidden_layers": 48,
|
150 |
"num_return_sequences": 1,
|
151 |
"output_attentions": false,
|
model-00001-of-00003.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9eda9af3032ccd59e8d198260e1bb75b3b5155ef23940f710aa3ca2648112132
|
3 |
+
size 4987010880
|
model-00002-of-00003.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1df2e518045a95c6ced5aaad33d34eea40346d7e158539595134dbff3b47e469
|
3 |
+
size 4966907208
|
model-00003-of-00003.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a0621f095b1fb03d926437ce35af9510f50438b7d15b237aae25c7d632f6c362
|
3 |
+
size 677597536
|
model.safetensors.index.json
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
{
|
2 |
"metadata": {
|
3 |
-
"total_size":
|
4 |
},
|
5 |
"weight_map": {
|
6 |
"audio_enc_to_dec_proj.bias": "model-00003-of-00003.safetensors",
|
@@ -125,11 +125,19 @@
|
|
125 |
"decoder.lm_heads.1.weight": "model-00003-of-00003.safetensors",
|
126 |
"decoder.lm_heads.2.weight": "model-00003-of-00003.safetensors",
|
127 |
"decoder.lm_heads.3.weight": "model-00003-of-00003.safetensors",
|
|
|
|
|
|
|
|
|
128 |
"decoder.model.decoder.embed_positions.weights": "model-00001-of-00003.safetensors",
|
129 |
"decoder.model.decoder.embed_tokens.0.weight": "model-00001-of-00003.safetensors",
|
130 |
"decoder.model.decoder.embed_tokens.1.weight": "model-00001-of-00003.safetensors",
|
131 |
"decoder.model.decoder.embed_tokens.2.weight": "model-00001-of-00003.safetensors",
|
132 |
"decoder.model.decoder.embed_tokens.3.weight": "model-00001-of-00003.safetensors",
|
|
|
|
|
|
|
|
|
133 |
"decoder.model.decoder.layer_norm.bias": "model-00003-of-00003.safetensors",
|
134 |
"decoder.model.decoder.layer_norm.weight": "model-00003-of-00003.safetensors",
|
135 |
"decoder.model.decoder.layers.0.fc1.weight": "model-00001-of-00003.safetensors",
|
@@ -263,9 +271,9 @@
|
|
263 |
"decoder.model.decoder.layers.2.self_attn_layer_norm.bias": "model-00001-of-00003.safetensors",
|
264 |
"decoder.model.decoder.layers.2.self_attn_layer_norm.weight": "model-00001-of-00003.safetensors",
|
265 |
"decoder.model.decoder.layers.20.fc1.weight": "model-00001-of-00003.safetensors",
|
266 |
-
"decoder.model.decoder.layers.20.fc2.weight": "model-
|
267 |
-
"decoder.model.decoder.layers.20.final_layer_norm.bias": "model-
|
268 |
-
"decoder.model.decoder.layers.20.final_layer_norm.weight": "model-
|
269 |
"decoder.model.decoder.layers.20.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
|
270 |
"decoder.model.decoder.layers.20.self_attn.out_proj.weight": "model-00001-of-00003.safetensors",
|
271 |
"decoder.model.decoder.layers.20.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
|
@@ -532,7 +540,7 @@
|
|
532 |
"decoder.model.decoder.layers.44.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
|
533 |
"decoder.model.decoder.layers.44.self_attn_layer_norm.bias": "model-00002-of-00003.safetensors",
|
534 |
"decoder.model.decoder.layers.44.self_attn_layer_norm.weight": "model-00002-of-00003.safetensors",
|
535 |
-
"decoder.model.decoder.layers.45.fc1.weight": "model-
|
536 |
"decoder.model.decoder.layers.45.fc2.weight": "model-00003-of-00003.safetensors",
|
537 |
"decoder.model.decoder.layers.45.final_layer_norm.bias": "model-00003-of-00003.safetensors",
|
538 |
"decoder.model.decoder.layers.45.final_layer_norm.weight": "model-00003-of-00003.safetensors",
|
|
|
1 |
{
|
2 |
"metadata": {
|
3 |
+
"total_size": 10631424280
|
4 |
},
|
5 |
"weight_map": {
|
6 |
"audio_enc_to_dec_proj.bias": "model-00003-of-00003.safetensors",
|
|
|
125 |
"decoder.lm_heads.1.weight": "model-00003-of-00003.safetensors",
|
126 |
"decoder.lm_heads.2.weight": "model-00003-of-00003.safetensors",
|
127 |
"decoder.lm_heads.3.weight": "model-00003-of-00003.safetensors",
|
128 |
+
"decoder.lm_heads.4.weight": "model-00003-of-00003.safetensors",
|
129 |
+
"decoder.lm_heads.5.weight": "model-00003-of-00003.safetensors",
|
130 |
+
"decoder.lm_heads.6.weight": "model-00003-of-00003.safetensors",
|
131 |
+
"decoder.lm_heads.7.weight": "model-00003-of-00003.safetensors",
|
132 |
"decoder.model.decoder.embed_positions.weights": "model-00001-of-00003.safetensors",
|
133 |
"decoder.model.decoder.embed_tokens.0.weight": "model-00001-of-00003.safetensors",
|
134 |
"decoder.model.decoder.embed_tokens.1.weight": "model-00001-of-00003.safetensors",
|
135 |
"decoder.model.decoder.embed_tokens.2.weight": "model-00001-of-00003.safetensors",
|
136 |
"decoder.model.decoder.embed_tokens.3.weight": "model-00001-of-00003.safetensors",
|
137 |
+
"decoder.model.decoder.embed_tokens.4.weight": "model-00001-of-00003.safetensors",
|
138 |
+
"decoder.model.decoder.embed_tokens.5.weight": "model-00001-of-00003.safetensors",
|
139 |
+
"decoder.model.decoder.embed_tokens.6.weight": "model-00001-of-00003.safetensors",
|
140 |
+
"decoder.model.decoder.embed_tokens.7.weight": "model-00001-of-00003.safetensors",
|
141 |
"decoder.model.decoder.layer_norm.bias": "model-00003-of-00003.safetensors",
|
142 |
"decoder.model.decoder.layer_norm.weight": "model-00003-of-00003.safetensors",
|
143 |
"decoder.model.decoder.layers.0.fc1.weight": "model-00001-of-00003.safetensors",
|
|
|
271 |
"decoder.model.decoder.layers.2.self_attn_layer_norm.bias": "model-00001-of-00003.safetensors",
|
272 |
"decoder.model.decoder.layers.2.self_attn_layer_norm.weight": "model-00001-of-00003.safetensors",
|
273 |
"decoder.model.decoder.layers.20.fc1.weight": "model-00001-of-00003.safetensors",
|
274 |
+
"decoder.model.decoder.layers.20.fc2.weight": "model-00002-of-00003.safetensors",
|
275 |
+
"decoder.model.decoder.layers.20.final_layer_norm.bias": "model-00002-of-00003.safetensors",
|
276 |
+
"decoder.model.decoder.layers.20.final_layer_norm.weight": "model-00002-of-00003.safetensors",
|
277 |
"decoder.model.decoder.layers.20.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
|
278 |
"decoder.model.decoder.layers.20.self_attn.out_proj.weight": "model-00001-of-00003.safetensors",
|
279 |
"decoder.model.decoder.layers.20.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
|
|
|
540 |
"decoder.model.decoder.layers.44.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
|
541 |
"decoder.model.decoder.layers.44.self_attn_layer_norm.bias": "model-00002-of-00003.safetensors",
|
542 |
"decoder.model.decoder.layers.44.self_attn_layer_norm.weight": "model-00002-of-00003.safetensors",
|
543 |
+
"decoder.model.decoder.layers.45.fc1.weight": "model-00003-of-00003.safetensors",
|
544 |
"decoder.model.decoder.layers.45.fc2.weight": "model-00003-of-00003.safetensors",
|
545 |
"decoder.model.decoder.layers.45.final_layer_norm.bias": "model-00003-of-00003.safetensors",
|
546 |
"decoder.model.decoder.layers.45.final_layer_norm.weight": "model-00003-of-00003.safetensors",
|