Update README.md
Browse files
README.md
CHANGED
@@ -33,6 +33,9 @@ from torchaudio.compliance import kaldi
|
|
33 |
model = timm.create_model("hf_hub:gaunernst/vit_base_patch16_1024_128.audiomae_as2m_ft_as20k", pretrained=True)
|
34 |
model = model.eval()
|
35 |
|
|
|
|
|
|
|
36 |
audio = torch.randn(1, 10 * 16_000) # make sure input is 16kHz
|
37 |
melspec = kaldi.fbank(audio, htk_compat=True, window_type="hanning", num_mel_bins=128) # shape (n_frames, 128)
|
38 |
|
@@ -41,6 +44,7 @@ if melspec.shape[0] < 1024:
|
|
41 |
melspec = F.pad(melspec, (0, 0, 0, 1024 - melspec.shape[0]))
|
42 |
else:
|
43 |
melspec = melspec[:1024]
|
|
|
44 |
|
45 |
melspec = melspec.view(1, 1, 1024, 128) # add batch dim and channel dim
|
46 |
output = model(melspec)
|
|
|
33 |
model = timm.create_model("hf_hub:gaunernst/vit_base_patch16_1024_128.audiomae_as2m_ft_as20k", pretrained=True)
|
34 |
model = model.eval()
|
35 |
|
36 |
+
MEAN = -4.2677393
|
37 |
+
STD = 4.5689974
|
38 |
+
|
39 |
audio = torch.randn(1, 10 * 16_000) # make sure input is 16kHz
|
40 |
melspec = kaldi.fbank(audio, htk_compat=True, window_type="hanning", num_mel_bins=128) # shape (n_frames, 128)
|
41 |
|
|
|
44 |
melspec = F.pad(melspec, (0, 0, 0, 1024 - melspec.shape[0]))
|
45 |
else:
|
46 |
melspec = melspec[:1024]
|
47 |
+
melspec = (melspec - MEAN) / (STD * 2)
|
48 |
|
49 |
melspec = melspec.view(1, 1, 1024, 128) # add batch dim and channel dim
|
50 |
output = model(melspec)
|