gaunernst commited on
Commit
81e2a6f
1 Parent(s): 8333b77

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +4 -0
README.md CHANGED
@@ -33,6 +33,9 @@ from torchaudio.compliance import kaldi
33
  model = timm.create_model("hf_hub:gaunernst/vit_base_patch16_1024_128.audiomae_as2m_ft_as20k", pretrained=True)
34
  model = model.eval()
35
 
 
 
 
36
  audio = torch.randn(1, 10 * 16_000) # make sure input is 16kHz
37
  melspec = kaldi.fbank(audio, htk_compat=True, window_type="hanning", num_mel_bins=128) # shape (n_frames, 128)
38
 
@@ -41,6 +44,7 @@ if melspec.shape[0] < 1024:
41
  melspec = F.pad(melspec, (0, 0, 0, 1024 - melspec.shape[0]))
42
  else:
43
  melspec = melspec[:1024]
 
44
 
45
  melspec = melspec.view(1, 1, 1024, 128) # add batch dim and channel dim
46
  output = model(melspec)
 
33
  model = timm.create_model("hf_hub:gaunernst/vit_base_patch16_1024_128.audiomae_as2m_ft_as20k", pretrained=True)
34
  model = model.eval()
35
 
36
+ MEAN = -4.2677393
37
+ STD = 4.5689974
38
+
39
  audio = torch.randn(1, 10 * 16_000) # make sure input is 16kHz
40
  melspec = kaldi.fbank(audio, htk_compat=True, window_type="hanning", num_mel_bins=128) # shape (n_frames, 128)
41
 
 
44
  melspec = F.pad(melspec, (0, 0, 0, 1024 - melspec.shape[0]))
45
  else:
46
  melspec = melspec[:1024]
47
+ melspec = (melspec - MEAN) / (STD * 2)
48
 
49
  melspec = melspec.view(1, 1, 1024, 128) # add batch dim and channel dim
50
  output = model(melspec)