import torch import torch.nn as nn import torch.nn.functional as F class AudioExpert(nn.Module): def __init__(self): super(AudioExpert, self).__init__() self.rnn = nn.LSTM(input_size=40, hidden_size=128, num_layers=2, batch_first=True) self.fc1 = nn.Linear(128, 128) def forward(self, x): h0 = torch.zeros(2, x.size(0), 128).to(x.device) c0 = torch.zeros(2, x.size(0), 128).to(x.device) x, _ = self.rnn(x, (h0, c0)) x = F.relu(self.fc1(x[:, -1, :])) return x