Bibek commited on
Commit
0ee4806
1 Parent(s): 998c668

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. autoencoder.ckpt +3 -0
  2. config.json +123 -0
autoencoder.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f02bbd964704202a7cee8ef60f1a2aee8c74634bdfcd3922c52aaba9924d75d
3
+ size 351945786
config.json ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "autoencoder",
3
+ "sample_size": 65536,
4
+ "sample_rate": 44100,
5
+ "audio_channels": 1,
6
+ "model": {
7
+ "encoder": {
8
+ "type": "oobleck",
9
+ "config": {
10
+ "in_channels": 1,
11
+ "channels": 96,
12
+ "c_mults": [1, 2, 4, 8, 16],
13
+ "strides": [2, 4, 4, 8, 8],
14
+ "latent_dim": 64,
15
+ "use_snake": true
16
+ }
17
+ },
18
+ "decoder": {
19
+ "type": "oobleck",
20
+ "config": {
21
+ "out_channels": 1,
22
+ "channels": 96,
23
+ "c_mults": [1, 2, 4, 8, 16],
24
+ "strides": [2, 4, 4, 8, 8],
25
+ "latent_dim": 64,
26
+ "use_snake": true,
27
+ "final_tanh": false
28
+ }
29
+ },
30
+ "bottleneck": {
31
+ "type": "dac_rvq",
32
+ "config": {
33
+ "input_dim": 64,
34
+ "n_codebooks": 9,
35
+ "codebook_size": 1024,
36
+ "codebook_dim": 8,
37
+ "quantizer_dropout": 1.0
38
+ }
39
+ },
40
+ "latent_dim": 64,
41
+ "downsampling_ratio": 2048,
42
+ "io_channels": 1
43
+ },
44
+ "training": {
45
+ "learning_rate": 1.5e-4,
46
+ "warmup_steps": 0,
47
+ "use_ema": true,
48
+ "optimizer_configs": {
49
+ "autoencoder": {
50
+ "optimizer": {
51
+ "type": "AdamW",
52
+ "config": {
53
+ "betas": [0.8, 0.99],
54
+ "lr": 1.5e-4,
55
+ "weight_decay": 1e-3
56
+ }
57
+ },
58
+ "scheduler": {
59
+ "type": "InverseLR",
60
+ "config": {
61
+ "inv_gamma": 200000,
62
+ "power": 0.5,
63
+ "warmup": 0.999
64
+ }
65
+ }
66
+ },
67
+ "discriminator": {
68
+ "optimizer": {
69
+ "type": "AdamW",
70
+ "config": {
71
+ "betas": [0.8, 0.99],
72
+ "lr": 3e-4,
73
+ "weight_decay": 1e-3
74
+ }
75
+ },
76
+ "scheduler": {
77
+ "type": "InverseLR",
78
+ "config": {
79
+ "inv_gamma": 200000,
80
+ "power": 0.5,
81
+ "warmup": 0.999
82
+ }
83
+ }
84
+ }
85
+ },
86
+ "loss_configs": {
87
+ "discriminator": {
88
+ "type": "encodec",
89
+ "config": {
90
+ "filters": 64,
91
+ "n_ffts": [2048, 1024, 512, 256, 128],
92
+ "hop_lengths": [512, 256, 128, 64, 32],
93
+ "win_lengths": [2048, 1024, 512, 256, 128]
94
+ },
95
+ "weights": {
96
+ "adversarial": 0.1,
97
+ "feature_matching": 5.0
98
+ }
99
+ },
100
+ "spectral": {
101
+ "type": "mrstft",
102
+ "config": {
103
+ "fft_sizes": [2048, 1024, 512, 256, 128, 64, 32],
104
+ "hop_sizes": [512, 256, 128, 64, 32, 16, 8],
105
+ "win_lengths": [2048, 1024, 512, 256, 128, 64, 32],
106
+ "perceptual_weighting": true
107
+ },
108
+ "weights": {
109
+ "mrstft": 1.0
110
+ }
111
+ },
112
+ "time": {
113
+ "type": "l1",
114
+ "weights": {
115
+ "l1": 0.0
116
+ }
117
+ }
118
+ },
119
+ "demo": {
120
+ "demo_every": 2000
121
+ }
122
+ }
123
+ }