Upload MusicgenMelodyForConditionalGeneration (#4)
Browse files- Upload MusicgenMelodyForConditionalGeneration (26b5a7209f2d93ef7ce851d4bfc7cc28bcb7dba3)
Co-authored-by: Yoach Lacombe <ylacombe@users.noreply.huggingface.co>
- config.json +1 -1
- generation_config.json +1 -1
- model-00001-of-00003.safetensors +2 -2
- model-00002-of-00003.safetensors +2 -2
- model-00003-of-00003.safetensors +2 -2
- model.safetensors.index.json +21 -7
config.json
CHANGED
@@ -292,5 +292,5 @@
|
|
292 |
"vocab_size": 32128
|
293 |
},
|
294 |
"torch_dtype": "float32",
|
295 |
-
"transformers_version": "4.
|
296 |
}
|
|
|
292 |
"vocab_size": 32128
|
293 |
},
|
294 |
"torch_dtype": "float32",
|
295 |
+
"transformers_version": "4.40.0.dev0"
|
296 |
}
|
generation_config.json
CHANGED
@@ -6,5 +6,5 @@
|
|
6 |
"guidance_scale": 3.0,
|
7 |
"max_length": 1500,
|
8 |
"pad_token_id": 2048,
|
9 |
-
"transformers_version": "4.
|
10 |
}
|
|
|
6 |
"guidance_scale": 3.0,
|
7 |
"max_length": 1500,
|
8 |
"pad_token_id": 2048,
|
9 |
+
"transformers_version": "4.40.0.dev0"
|
10 |
}
|
model-00001-of-00003.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:de1f894560846ddcaa2c8a2259229e135bcab586ec5002145869777ce0a15a5a
|
3 |
+
size 4987010864
|
model-00002-of-00003.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1df2e518045a95c6ced5aaad33d34eea40346d7e158539595134dbff3b47e469
|
3 |
+
size 4966907208
|
model-00003-of-00003.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a0621f095b1fb03d926437ce35af9510f50438b7d15b237aae25c7d632f6c362
|
3 |
+
size 677597536
|
model.safetensors.index.json
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
{
|
2 |
"metadata": {
|
3 |
-
"total_size":
|
4 |
},
|
5 |
"weight_map": {
|
6 |
"audio_enc_to_dec_proj.bias": "model-00003-of-00003.safetensors",
|
@@ -8,7 +8,14 @@
|
|
8 |
"audio_encoder.decoder.layers.0.conv.bias": "model-00001-of-00003.safetensors",
|
9 |
"audio_encoder.decoder.layers.0.conv.weight_g": "model-00001-of-00003.safetensors",
|
10 |
"audio_encoder.decoder.layers.0.conv.weight_v": "model-00001-of-00003.safetensors",
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
"audio_encoder.decoder.layers.1.lstm.weight_ih_l0": "model-00001-of-00003.safetensors",
|
|
|
12 |
"audio_encoder.decoder.layers.10.block.1.conv.bias": "model-00001-of-00003.safetensors",
|
13 |
"audio_encoder.decoder.layers.10.block.1.conv.weight_g": "model-00001-of-00003.safetensors",
|
14 |
"audio_encoder.decoder.layers.10.block.1.conv.weight_v": "model-00001-of-00003.safetensors",
|
@@ -66,7 +73,14 @@
|
|
66 |
"audio_encoder.encoder.layers.12.conv.bias": "model-00001-of-00003.safetensors",
|
67 |
"audio_encoder.encoder.layers.12.conv.weight_g": "model-00001-of-00003.safetensors",
|
68 |
"audio_encoder.encoder.layers.12.conv.weight_v": "model-00001-of-00003.safetensors",
|
|
|
|
|
|
|
|
|
|
|
|
|
69 |
"audio_encoder.encoder.layers.13.lstm.weight_ih_l0": "model-00001-of-00003.safetensors",
|
|
|
70 |
"audio_encoder.encoder.layers.15.conv.bias": "model-00001-of-00003.safetensors",
|
71 |
"audio_encoder.encoder.layers.15.conv.weight_g": "model-00001-of-00003.safetensors",
|
72 |
"audio_encoder.encoder.layers.15.conv.weight_v": "model-00001-of-00003.safetensors",
|
@@ -257,9 +271,9 @@
|
|
257 |
"decoder.model.decoder.layers.2.self_attn_layer_norm.bias": "model-00001-of-00003.safetensors",
|
258 |
"decoder.model.decoder.layers.2.self_attn_layer_norm.weight": "model-00001-of-00003.safetensors",
|
259 |
"decoder.model.decoder.layers.20.fc1.weight": "model-00001-of-00003.safetensors",
|
260 |
-
"decoder.model.decoder.layers.20.fc2.weight": "model-
|
261 |
-
"decoder.model.decoder.layers.20.final_layer_norm.bias": "model-
|
262 |
-
"decoder.model.decoder.layers.20.final_layer_norm.weight": "model-
|
263 |
"decoder.model.decoder.layers.20.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
|
264 |
"decoder.model.decoder.layers.20.self_attn.out_proj.weight": "model-00001-of-00003.safetensors",
|
265 |
"decoder.model.decoder.layers.20.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
|
@@ -270,10 +284,10 @@
|
|
270 |
"decoder.model.decoder.layers.21.fc2.weight": "model-00002-of-00003.safetensors",
|
271 |
"decoder.model.decoder.layers.21.final_layer_norm.bias": "model-00002-of-00003.safetensors",
|
272 |
"decoder.model.decoder.layers.21.final_layer_norm.weight": "model-00002-of-00003.safetensors",
|
273 |
-
"decoder.model.decoder.layers.21.self_attn.k_proj.weight": "model-
|
274 |
"decoder.model.decoder.layers.21.self_attn.out_proj.weight": "model-00002-of-00003.safetensors",
|
275 |
"decoder.model.decoder.layers.21.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
|
276 |
-
"decoder.model.decoder.layers.21.self_attn.v_proj.weight": "model-
|
277 |
"decoder.model.decoder.layers.21.self_attn_layer_norm.bias": "model-00002-of-00003.safetensors",
|
278 |
"decoder.model.decoder.layers.21.self_attn_layer_norm.weight": "model-00002-of-00003.safetensors",
|
279 |
"decoder.model.decoder.layers.22.fc1.weight": "model-00002-of-00003.safetensors",
|
@@ -526,7 +540,7 @@
|
|
526 |
"decoder.model.decoder.layers.44.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
|
527 |
"decoder.model.decoder.layers.44.self_attn_layer_norm.bias": "model-00002-of-00003.safetensors",
|
528 |
"decoder.model.decoder.layers.44.self_attn_layer_norm.weight": "model-00002-of-00003.safetensors",
|
529 |
-
"decoder.model.decoder.layers.45.fc1.weight": "model-
|
530 |
"decoder.model.decoder.layers.45.fc2.weight": "model-00003-of-00003.safetensors",
|
531 |
"decoder.model.decoder.layers.45.final_layer_norm.bias": "model-00003-of-00003.safetensors",
|
532 |
"decoder.model.decoder.layers.45.final_layer_norm.weight": "model-00003-of-00003.safetensors",
|
|
|
1 |
{
|
2 |
"metadata": {
|
3 |
+
"total_size": 10631424280
|
4 |
},
|
5 |
"weight_map": {
|
6 |
"audio_enc_to_dec_proj.bias": "model-00003-of-00003.safetensors",
|
|
|
8 |
"audio_encoder.decoder.layers.0.conv.bias": "model-00001-of-00003.safetensors",
|
9 |
"audio_encoder.decoder.layers.0.conv.weight_g": "model-00001-of-00003.safetensors",
|
10 |
"audio_encoder.decoder.layers.0.conv.weight_v": "model-00001-of-00003.safetensors",
|
11 |
+
"audio_encoder.decoder.layers.1.lstm.bias_hh_l0": "model-00001-of-00003.safetensors",
|
12 |
+
"audio_encoder.decoder.layers.1.lstm.bias_hh_l1": "model-00001-of-00003.safetensors",
|
13 |
+
"audio_encoder.decoder.layers.1.lstm.bias_ih_l0": "model-00001-of-00003.safetensors",
|
14 |
+
"audio_encoder.decoder.layers.1.lstm.bias_ih_l1": "model-00001-of-00003.safetensors",
|
15 |
+
"audio_encoder.decoder.layers.1.lstm.weight_hh_l0": "model-00001-of-00003.safetensors",
|
16 |
+
"audio_encoder.decoder.layers.1.lstm.weight_hh_l1": "model-00001-of-00003.safetensors",
|
17 |
"audio_encoder.decoder.layers.1.lstm.weight_ih_l0": "model-00001-of-00003.safetensors",
|
18 |
+
"audio_encoder.decoder.layers.1.lstm.weight_ih_l1": "model-00001-of-00003.safetensors",
|
19 |
"audio_encoder.decoder.layers.10.block.1.conv.bias": "model-00001-of-00003.safetensors",
|
20 |
"audio_encoder.decoder.layers.10.block.1.conv.weight_g": "model-00001-of-00003.safetensors",
|
21 |
"audio_encoder.decoder.layers.10.block.1.conv.weight_v": "model-00001-of-00003.safetensors",
|
|
|
73 |
"audio_encoder.encoder.layers.12.conv.bias": "model-00001-of-00003.safetensors",
|
74 |
"audio_encoder.encoder.layers.12.conv.weight_g": "model-00001-of-00003.safetensors",
|
75 |
"audio_encoder.encoder.layers.12.conv.weight_v": "model-00001-of-00003.safetensors",
|
76 |
+
"audio_encoder.encoder.layers.13.lstm.bias_hh_l0": "model-00001-of-00003.safetensors",
|
77 |
+
"audio_encoder.encoder.layers.13.lstm.bias_hh_l1": "model-00001-of-00003.safetensors",
|
78 |
+
"audio_encoder.encoder.layers.13.lstm.bias_ih_l0": "model-00001-of-00003.safetensors",
|
79 |
+
"audio_encoder.encoder.layers.13.lstm.bias_ih_l1": "model-00001-of-00003.safetensors",
|
80 |
+
"audio_encoder.encoder.layers.13.lstm.weight_hh_l0": "model-00001-of-00003.safetensors",
|
81 |
+
"audio_encoder.encoder.layers.13.lstm.weight_hh_l1": "model-00001-of-00003.safetensors",
|
82 |
"audio_encoder.encoder.layers.13.lstm.weight_ih_l0": "model-00001-of-00003.safetensors",
|
83 |
+
"audio_encoder.encoder.layers.13.lstm.weight_ih_l1": "model-00001-of-00003.safetensors",
|
84 |
"audio_encoder.encoder.layers.15.conv.bias": "model-00001-of-00003.safetensors",
|
85 |
"audio_encoder.encoder.layers.15.conv.weight_g": "model-00001-of-00003.safetensors",
|
86 |
"audio_encoder.encoder.layers.15.conv.weight_v": "model-00001-of-00003.safetensors",
|
|
|
271 |
"decoder.model.decoder.layers.2.self_attn_layer_norm.bias": "model-00001-of-00003.safetensors",
|
272 |
"decoder.model.decoder.layers.2.self_attn_layer_norm.weight": "model-00001-of-00003.safetensors",
|
273 |
"decoder.model.decoder.layers.20.fc1.weight": "model-00001-of-00003.safetensors",
|
274 |
+
"decoder.model.decoder.layers.20.fc2.weight": "model-00002-of-00003.safetensors",
|
275 |
+
"decoder.model.decoder.layers.20.final_layer_norm.bias": "model-00002-of-00003.safetensors",
|
276 |
+
"decoder.model.decoder.layers.20.final_layer_norm.weight": "model-00002-of-00003.safetensors",
|
277 |
"decoder.model.decoder.layers.20.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
|
278 |
"decoder.model.decoder.layers.20.self_attn.out_proj.weight": "model-00001-of-00003.safetensors",
|
279 |
"decoder.model.decoder.layers.20.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
|
|
|
284 |
"decoder.model.decoder.layers.21.fc2.weight": "model-00002-of-00003.safetensors",
|
285 |
"decoder.model.decoder.layers.21.final_layer_norm.bias": "model-00002-of-00003.safetensors",
|
286 |
"decoder.model.decoder.layers.21.final_layer_norm.weight": "model-00002-of-00003.safetensors",
|
287 |
+
"decoder.model.decoder.layers.21.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
|
288 |
"decoder.model.decoder.layers.21.self_attn.out_proj.weight": "model-00002-of-00003.safetensors",
|
289 |
"decoder.model.decoder.layers.21.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
|
290 |
+
"decoder.model.decoder.layers.21.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
|
291 |
"decoder.model.decoder.layers.21.self_attn_layer_norm.bias": "model-00002-of-00003.safetensors",
|
292 |
"decoder.model.decoder.layers.21.self_attn_layer_norm.weight": "model-00002-of-00003.safetensors",
|
293 |
"decoder.model.decoder.layers.22.fc1.weight": "model-00002-of-00003.safetensors",
|
|
|
540 |
"decoder.model.decoder.layers.44.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
|
541 |
"decoder.model.decoder.layers.44.self_attn_layer_norm.bias": "model-00002-of-00003.safetensors",
|
542 |
"decoder.model.decoder.layers.44.self_attn_layer_norm.weight": "model-00002-of-00003.safetensors",
|
543 |
+
"decoder.model.decoder.layers.45.fc1.weight": "model-00003-of-00003.safetensors",
|
544 |
"decoder.model.decoder.layers.45.fc2.weight": "model-00003-of-00003.safetensors",
|
545 |
"decoder.model.decoder.layers.45.final_layer_norm.bias": "model-00003-of-00003.safetensors",
|
546 |
"decoder.model.decoder.layers.45.final_layer_norm.weight": "model-00003-of-00003.safetensors",
|