Spaces:

arnabdas8901
/

Find_The_Fake

Running

App Files Files Community

Arnab Das commited on Aug 21

Commit

b30e39a

•

1 Parent(s): 3f67209

AASIST model added.

Browse files

Files changed (4) hide show

app.py +5 -3
models.py +231 -0
orig_aasist_epoch_1.pth +3 -0
process_data.py +4 -1

app.py CHANGED Viewed

@@ -11,7 +11,9 @@ model_master = {
                                            "model_checkpoint": "ssl_aasist_epoch_7.pth"},
     "AASIST": {"eer_threshold": 1.8018419742584229,
                "data_process_func": "process_assist_input",
-               "note": "This model is trained on ASVSpoof 2024 training data."}
 }
 model = MOD.Model(None, "cpu")
@@ -21,8 +23,6 @@ loaded_model = "SSL-AASIST (Trained on ASV-Spoof5)"
 def process(file, type):
-    if type == "AASIST":
-        return "Model AASIST is not yet implemented."
     global model
     global loaded_model
     inp = getattr(PD, model_master[type]["data_process_func"])(file)
@@ -54,6 +54,8 @@ file_proc = gr.Interface(
     examples=[
         ["./bonafide.flac", "SSL-AASIST (Trained on ASV-Spoof5)"],
         ["./fake.flac", "SSL-AASIST (Trained on ASV-Spoof5)"],
     ],
     cache_examples=True,
     allow_flagging="never",

                                            "model_checkpoint": "ssl_aasist_epoch_7.pth"},
     "AASIST": {"eer_threshold": 1.8018419742584229,
                "data_process_func": "process_assist_input",
+               "note": "This model is trained on ASVSpoof 2024 training data.",
+               "model_class":"AASIST_Model",
+               "model_checkpoint": "orig_aasist_epoch_1.pth"}
 }
 model = MOD.Model(None, "cpu")
 def process(file, type):
     global model
     global loaded_model
     inp = getattr(PD, model_master[type]["data_process_func"])(file)
     examples=[
         ["./bonafide.flac", "SSL-AASIST (Trained on ASV-Spoof5)"],
         ["./fake.flac", "SSL-AASIST (Trained on ASV-Spoof5)"],
+        ["./bonafide.flac", "AASIST"],
+        ["./fake.flac", "AASIST"],
     ],
     cache_examples=True,
     allow_flagging="never",

models.py CHANGED Viewed

@@ -1,6 +1,9 @@
 import torch
 import fairseq
 import torch.nn as nn
 from typing import Union
 import torch.nn.functional as F
@@ -633,3 +636,231 @@ class Model(nn.Module):
         output = self.out_layer(last_hidden)
         return output

 import torch
+import random
 import fairseq
+import numpy as np
 import torch.nn as nn
+from torch import Tensor
 from typing import Union
 import torch.nn.functional as F
         output = self.out_layer(last_hidden)
         return output
+class CONV(nn.Module):
+    @staticmethod
+    def to_mel(hz):
+        return 2595 * np.log10(1 + hz / 700)
+    @staticmethod
+    def to_hz(mel):
+        return 700 * (10**(mel / 2595) - 1)
+    def __init__(self,
+                 out_channels,
+                 kernel_size,
+                 sample_rate=16000,
+                 in_channels=1,
+                 stride=1,
+                 padding=0,
+                 dilation=1,
+                 bias=False,
+                 groups=1,
+                 mask=False):
+        super().__init__()
+        if in_channels != 1:
+            msg = "SincConv only support one input channel (here, in_channels = {%i})" % (
+                in_channels)
+            raise ValueError(msg)
+        self.out_channels = out_channels
+        self.kernel_size = kernel_size
+        self.sample_rate = sample_rate
+        # Forcing the filters to be odd (i.e, perfectly symmetrics)
+        if kernel_size % 2 == 0:
+            self.kernel_size = self.kernel_size + 1
+        self.stride = stride
+        self.padding = padding
+        self.dilation = dilation
+        self.mask = mask
+        if bias:
+            raise ValueError('SincConv does not support bias.')
+        if groups > 1:
+            raise ValueError('SincConv does not support groups.')
+        NFFT = 512
+        f = int(self.sample_rate / 2) * np.linspace(0, 1, int(NFFT / 2) + 1)
+        fmel = self.to_mel(f)
+        fmelmax = np.max(fmel)
+        fmelmin = np.min(fmel)
+        filbandwidthsmel = np.linspace(fmelmin, fmelmax, self.out_channels + 1)
+        filbandwidthsf = self.to_hz(filbandwidthsmel)
+        self.mel = filbandwidthsf
+        self.hsupp = torch.arange(-(self.kernel_size - 1) / 2,
+                                  (self.kernel_size - 1) / 2 + 1)
+        self.band_pass = torch.zeros(self.out_channels, self.kernel_size)
+        for i in range(len(self.mel) - 1):
+            fmin = self.mel[i]
+            fmax = self.mel[i + 1]
+            hHigh = (2*fmax/self.sample_rate) * \
+                np.sinc(2*fmax*self.hsupp/self.sample_rate)
+            hLow = (2*fmin/self.sample_rate) * \
+                np.sinc(2*fmin*self.hsupp/self.sample_rate)
+            hideal = hHigh - hLow
+            self.band_pass[i, :] = Tensor(np.hamming(
+                self.kernel_size)) * Tensor(hideal)
+    def forward(self, x, mask=False):
+        band_pass_filter = self.band_pass.clone().to(x.device)
+        if mask:
+            A = np.random.uniform(0, 20)
+            A = int(A)
+            A0 = random.randint(0, band_pass_filter.shape[0] - A)
+            band_pass_filter[A0:A0 + A, :] = 0
+        else:
+            band_pass_filter = band_pass_filter
+        self.filters = (band_pass_filter).view(self.out_channels, 1,
+                                               self.kernel_size)
+        return F.conv1d(x,
+                        self.filters,
+                        stride=self.stride,
+                        padding=self.padding,
+                        dilation=self.dilation,
+                        bias=None,
+                        groups=1)
+class AASIST_Model(nn.Module):
+    def __init__(self, args, device):
+        super().__init__()
+        filts = [70, [1, 32], [32, 32], [32, 64], [64, 64]]
+        gat_dims = [64, 32]
+        pool_ratios =[0.5, 0.7, 0.5, 0.5]
+        temperatures =[2.0, 2.0, 100.0, 100.0]
+        self.conv_time = CONV(out_channels=filts[0],
+                              kernel_size=128,
+                              in_channels=1)
+        self.first_bn = nn.BatchNorm2d(num_features=1)
+        self.drop = nn.Dropout(0.5, inplace=True)
+        self.drop_way = nn.Dropout(0.2, inplace=True)
+        self.selu = nn.SELU(inplace=True)
+        self.encoder = nn.Sequential(
+            nn.Sequential(Residual_block_aasist(nb_filts=filts[1], first=True)),
+            nn.Sequential(Residual_block_aasist(nb_filts=filts[2])),
+            nn.Sequential(Residual_block_aasist(nb_filts=filts[3])),
+            nn.Sequential(Residual_block_aasist(nb_filts=filts[4])),
+            nn.Sequential(Residual_block_aasist(nb_filts=filts[4])),
+            nn.Sequential(Residual_block_aasist(nb_filts=filts[4])))
+        self.pos_S = nn.Parameter(torch.randn(1, 23, filts[-1][-1]))
+        self.master1 = nn.Parameter(torch.randn(1, 1, gat_dims[0]))
+        self.master2 = nn.Parameter(torch.randn(1, 1, gat_dims[0]))
+        self.GAT_layer_S = GraphAttentionLayer(filts[-1][-1],
+                                               gat_dims[0],
+                                               temperature=temperatures[0])
+        self.GAT_layer_T = GraphAttentionLayer(filts[-1][-1],
+                                               gat_dims[0],
+                                               temperature=temperatures[1])
+        self.HtrgGAT_layer_ST11 = HtrgGraphAttentionLayer(
+            gat_dims[0], gat_dims[1], temperature=temperatures[2])
+        self.HtrgGAT_layer_ST12 = HtrgGraphAttentionLayer(
+            gat_dims[1], gat_dims[1], temperature=temperatures[2])
+        self.HtrgGAT_layer_ST21 = HtrgGraphAttentionLayer(
+            gat_dims[0], gat_dims[1], temperature=temperatures[2])
+        self.HtrgGAT_layer_ST22 = HtrgGraphAttentionLayer(
+            gat_dims[1], gat_dims[1], temperature=temperatures[2])
+        self.pool_S = GraphPool(pool_ratios[0], gat_dims[0], 0.3)
+        self.pool_T = GraphPool(pool_ratios[1], gat_dims[0], 0.3)
+        self.pool_hS1 = GraphPool(pool_ratios[2], gat_dims[1], 0.3)
+        self.pool_hT1 = GraphPool(pool_ratios[2], gat_dims[1], 0.3)
+        self.pool_hS2 = GraphPool(pool_ratios[2], gat_dims[1], 0.3)
+        self.pool_hT2 = GraphPool(pool_ratios[2], gat_dims[1], 0.3)
+        self.out_layer = nn.Linear(5 * gat_dims[1], 2)
+    def forward(self, x, Freq_aug=False):
+        x = x.unsqueeze(1)
+        x = self.conv_time(x, mask=Freq_aug)
+        x = x.unsqueeze(dim=1)
+        x = F.max_pool2d(torch.abs(x), (3, 3))
+        x = self.first_bn(x)
+        x = self.selu(x)
+        # get embeddings using encoder
+        # (#bs, #filt, #spec, #seq)
+        e = self.encoder(x)
+        # spectral GAT (GAT-S)
+        e_S, _ = torch.max(torch.abs(e), dim=3)  # max along time
+        e_S = e_S.transpose(1, 2) + self.pos_S
+        gat_S = self.GAT_layer_S(e_S)
+        out_S = self.pool_S(gat_S)  # (#bs, #node, #dim)
+        # temporal GAT (GAT-T)
+        e_T, _ = torch.max(torch.abs(e), dim=2)  # max along freq
+        e_T = e_T.transpose(1, 2)
+        gat_T = self.GAT_layer_T(e_T)
+        out_T = self.pool_T(gat_T)
+        # learnable master node
+        master1 = self.master1.expand(x.size(0), -1, -1)
+        master2 = self.master2.expand(x.size(0), -1, -1)
+        # inference 1
+        out_T1, out_S1, master1 = self.HtrgGAT_layer_ST11(
+            out_T, out_S, master=self.master1)
+        out_S1 = self.pool_hS1(out_S1)
+        out_T1 = self.pool_hT1(out_T1)
+        out_T_aug, out_S_aug, master_aug = self.HtrgGAT_layer_ST12(
+            out_T1, out_S1, master=master1)
+        out_T1 = out_T1 + out_T_aug
+        out_S1 = out_S1 + out_S_aug
+        master1 = master1 + master_aug
+        # inference 2
+        out_T2, out_S2, master2 = self.HtrgGAT_layer_ST21(
+            out_T, out_S, master=self.master2)
+        out_S2 = self.pool_hS2(out_S2)
+        out_T2 = self.pool_hT2(out_T2)
+        out_T_aug, out_S_aug, master_aug = self.HtrgGAT_layer_ST22(
+            out_T2, out_S2, master=master2)
+        out_T2 = out_T2 + out_T_aug
+        out_S2 = out_S2 + out_S_aug
+        master2 = master2 + master_aug
+        out_T1 = self.drop_way(out_T1)
+        out_T2 = self.drop_way(out_T2)
+        out_S1 = self.drop_way(out_S1)
+        out_S2 = self.drop_way(out_S2)
+        master1 = self.drop_way(master1)
+        master2 = self.drop_way(master2)
+        out_T = torch.max(out_T1, out_T2)
+        out_S = torch.max(out_S1, out_S2)
+        master = torch.max(master1, master2)
+        T_max, _ = torch.max(torch.abs(out_T), dim=1)
+        T_avg = torch.mean(out_T, dim=1)
+        S_max, _ = torch.max(torch.abs(out_S), dim=1)
+        S_avg = torch.mean(out_S, dim=1)
+        last_hidden = torch.cat(
+            [T_max, T_avg, S_max, S_avg, master.squeeze(1)], dim=1)
+        last_hidden = self.drop(last_hidden)
+        output = self.out_layer(last_hidden)
+        return last_hidden, output

orig_aasist_epoch_1.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f26ad87bca3d47e97ecfeac6fee6fcae93f62673a484d447c081d45911e3a027
+size 1276136

process_data.py CHANGED Viewed

@@ -17,4 +17,7 @@ def process_ssl_assist_input(filepath):
     X_pad = pad(X)
     x_inp = Tensor(X_pad)
     x_inp = x_inp.unsqueeze(0)
-    return x_inp

     X_pad = pad(X)
     x_inp = Tensor(X_pad)
     x_inp = x_inp.unsqueeze(0)
+    return x_inp
+def process_assist_input(filepath):
+    return process_ssl_assist_input(filepath)