jkeisling commited on
Commit
539ef62
1 Parent(s): 8b71872

Upload 7 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
config.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "attention_qkv_bias": false,
3
+ "codebook_size": 1024,
4
+ "dim": 1024,
5
+ "dropout": 0.0,
6
+ "fast_attention_qkv_bias": false,
7
+ "fast_dim": 1024,
8
+ "fast_head_dim": 64,
9
+ "fast_intermediate_size": 4096,
10
+ "fast_n_head": 16,
11
+ "fast_n_local_heads": 2,
12
+ "head_dim": 64,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 4096,
15
+ "is_reward_model": false,
16
+ "max_seq_len": 8192,
17
+ "model_type": "gpt2",
18
+ "n_fast_layer": 4,
19
+ "n_head": 16,
20
+ "n_layer": 24,
21
+ "n_local_heads": 2,
22
+ "norm_eps": 1e-6,
23
+ "num_codebooks": 8,
24
+ "rope_base": 1000000.0,
25
+ "tie_word_embeddings": false,
26
+ "use_gradient_checkpointing": true,
27
+ "vocab_size": 102048
28
+ }
firefly-gan-vq-fsq-8x1024-21hz-generator.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e777f440cb041cde17e4daea4a84c58c672e94855ea7023ceed8a7edcab46524
3
+ size 188268196
firefly_gan_vq.yaml ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _target_: fish_speech.models.vqgan.modules.firefly.FireflyArchitecture
2
+ spec_transform:
3
+ _target_: fish_speech.utils.spectrogram.LogMelSpectrogram
4
+ sample_rate: 44100
5
+ n_mels: 160
6
+ n_fft: 2048
7
+ hop_length: 512
8
+ win_length: 2048
9
+ backbone:
10
+ _target_: fish_speech.models.vqgan.modules.firefly.ConvNeXtEncoder
11
+ input_channels: 160
12
+ depths: [3, 3, 9, 3]
13
+ dims: [128, 256, 384, 512]
14
+ drop_path_rate: 0.2
15
+ kernel_size: 7
16
+ head:
17
+ _target_: fish_speech.models.vqgan.modules.firefly.HiFiGANGenerator
18
+ hop_length: 512
19
+ upsample_rates: [8, 8, 2, 2, 2] # aka. strides
20
+ upsample_kernel_sizes: [16, 16, 4, 4, 4]
21
+ resblock_kernel_sizes: [3, 7, 11]
22
+ resblock_dilation_sizes: [[1, 3, 5], [1, 3, 5], [1, 3, 5]]
23
+ num_mels: 512
24
+ upsample_initial_channel: 512
25
+ pre_conv_kernel_size: 13
26
+ post_conv_kernel_size: 13
27
+ quantizer:
28
+ _target_: fish_speech.models.vqgan.modules.fsq.DownsampleFiniteScalarQuantize
29
+ input_dim: 512
30
+ n_groups: 8
31
+ n_codebooks: 1
32
+ levels: [8, 5, 5, 5]
33
+ downsample_factor: [2, 2]
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:54419beca4466c03ce90c11b8948a74ba2cfc508ae2ebdf9f13c6554b27f162a
3
+ size 1275864040
special_tokens_map.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d985a4b068e76fe888615efb298038ba3e954b91fd4e95271139489650ac875
3
+ size 11481370
tokenizer_config.json ADDED
The diff for this file is too large to render. See raw diff