pplantinga commited on
Commit
2a2af5c
1 Parent(s): 6025f3b

Revert "Update model to latest version"

Browse files

This reverts commit 7e4f17f9504e5d8f26eda48d2545059025414016.

Files changed (2) hide show
  1. enhance_model.ckpt +2 -2
  2. hyperparams.yaml +56 -10
enhance_model.ckpt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:348bdc866632457e60d9eea38aa9a511910b89cd0c1ad1b78c229535bd5b60e6
3
- size 89230845
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eea2ed64b9b136ccfa66741860d47b4a3ea6954bb8eb07d3212a14b601a0d3fb
3
+ size 29005818
hyperparams.yaml CHANGED
@@ -4,21 +4,67 @@ n_fft: 512
4
  win_length: 32
5
  hop_length: 16
6
 
7
- mask_weight: 0.99
8
-
9
  # Enhancement model args
10
- enhance_model: !new:speechbrain.lobes.models.EnhanceResnet.EnhanceResnet
 
 
 
 
 
 
 
 
 
 
11
  n_fft: !ref <n_fft>
12
  win_length: !ref <win_length>
13
  hop_length: !ref <hop_length>
 
 
14
  sample_rate: !ref <sample_rate>
15
- channel_counts: [128, 128, 256, 256, 512, 512]
16
- normalization: !name:speechbrain.nnet.normalization.BatchNorm2d
17
- activation: !new:torch.nn.GELU
18
- dense_count: 2
19
- dense_nodes: 1024
20
- dropout: 0.1
21
- mask_weight: !ref <mask_weight>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
  modules:
24
  enhance_model: !ref <enhance_model>
 
4
  win_length: 32
5
  hop_length: 16
6
 
 
 
7
  # Enhancement model args
8
+ emb_channels: 1024
9
+ emb_kernel_size: 3
10
+ emb_padding: same
11
+ enhancer_size: 512
12
+ enhancer_layers: 8
13
+ enhancer_heads: 8
14
+ enhancer_causal: False
15
+ enhancer_drop_rate: 0.1
16
+
17
+ compute_stft: !new:speechbrain.processing.features.STFT
18
+ sample_rate: !ref <sample_rate>
19
  n_fft: !ref <n_fft>
20
  win_length: !ref <win_length>
21
  hop_length: !ref <hop_length>
22
+
23
+ compute_istft: !new:speechbrain.processing.features.ISTFT
24
  sample_rate: !ref <sample_rate>
25
+ n_fft: !ref <n_fft>
26
+ win_length: !ref <win_length>
27
+ hop_length: !ref <hop_length>
28
+
29
+ spectral_magnitude: !name:speechbrain.processing.features.spectral_magnitude
30
+ power: 0.5
31
+
32
+ resynth: !name:speechbrain.processing.signal_processing.resynthesize
33
+ stft: !ref <compute_stft>
34
+ istft: !ref <compute_istft>
35
+
36
+ enhance_model: !new:speechbrain.lobes.models.transformer.TransformerSE.CNNTransformerSE
37
+ output_size: !ref <n_fft> // 2 + 1
38
+ d_model: !ref <n_fft> // 2
39
+ output_activation: !name:torch.nn.ReLU
40
+ activation: !name:torch.nn.LeakyReLU
41
+ dropout: !ref <enhancer_drop_rate>
42
+ num_layers: !ref <enhancer_layers>
43
+ d_ffn: !ref <enhancer_size>
44
+ nhead: !ref <enhancer_heads>
45
+ causal: !ref <enhancer_causal>
46
+ custom_emb_module: !new:speechbrain.nnet.containers.Sequential
47
+ input_shape: [null, null, !ref <n_fft> // 2 + 1]
48
+ conv1: !name:speechbrain.nnet.CNN.Conv1d
49
+ out_channels: !ref <emb_channels>
50
+ kernel_size: 3
51
+ norm1: !name:speechbrain.nnet.normalization.LayerNorm
52
+ act1: !new:torch.nn.LeakyReLU
53
+ conv2: !name:speechbrain.nnet.CNN.Conv1d
54
+ out_channels: !ref <emb_channels> // 2
55
+ kernel_size: 3
56
+ norm2: !name:speechbrain.nnet.normalization.LayerNorm
57
+ act2: !new:torch.nn.LeakyReLU
58
+ conv3: !name:speechbrain.nnet.CNN.Conv1d
59
+ out_channels: !ref <emb_channels> // 4
60
+ kernel_size: 3
61
+ norm3: !name:speechbrain.nnet.normalization.LayerNorm
62
+ act3: !new:torch.nn.LeakyReLU
63
+ conv4: !name:speechbrain.nnet.CNN.Conv1d
64
+ out_channels: !ref <emb_channels> // 4
65
+ kernel_size: 3
66
+ norm4: !name:speechbrain.nnet.normalization.LayerNorm
67
+ act4: !new:torch.nn.LeakyReLU
68
 
69
  modules:
70
  enhance_model: !ref <enhance_model>