Spaces:
Running
on
Zero
Running
on
Zero
File size: 10,621 Bytes
8e8cd3e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 |
import torch.nn as nn
import torch.nn.functional as F
import torch as th
from torch.nn.parameter import Parameter
import numpy as np
import os
class UniDeepFsmn(nn.Module):
"""
UniDeepFsmn is a neural network module that implements a single-deep feedforward sequence memory network (FSMN).
Attributes:
input_dim (int): Dimension of the input features.
output_dim (int): Dimension of the output features.
lorder (int): Length of the order for the convolution layers.
hidden_size (int): Number of hidden units in the linear layer.
linear (nn.Linear): Linear layer to project input features to hidden size.
project (nn.Linear): Linear layer to project hidden features to output dimensions.
conv1 (nn.Conv2d): Convolutional layer for processing the output in a grouped manner.
"""
def __init__(self, input_dim, output_dim, lorder=None, hidden_size=None):
super(UniDeepFsmn, self).__init__()
self.input_dim = input_dim
self.output_dim = output_dim
if lorder is None:
return
self.lorder = lorder
self.hidden_size = hidden_size
# Initialize the layers
self.linear = nn.Linear(input_dim, hidden_size) # Linear transformation to hidden size
self.project = nn.Linear(hidden_size, output_dim, bias=False) # Project hidden size to output dimension
self.conv1 = nn.Conv2d(output_dim, output_dim, [lorder + lorder - 1, 1], [1, 1], groups=output_dim, bias=False) # Convolution layer
def forward(self, input):
"""
Forward pass for the UniDeepFsmn model.
Args:
input (torch.Tensor): Input tensor of shape (batch_size, input_dim).
Returns:
torch.Tensor: The output tensor of the same shape as input, enhanced by the network.
"""
f1 = F.relu(self.linear(input)) # Apply linear layer followed by ReLU activation
p1 = self.project(f1) # Project to output dimension
x = th.unsqueeze(p1, 1) # Add a dimension for compatibility with Conv2d
x_per = x.permute(0, 3, 2, 1) # Permute dimensions for convolution
y = F.pad(x_per, [0, 0, self.lorder - 1, self.lorder - 1]) # Pad for causal convolution
out = x_per + self.conv1(y) # Add original input to convolution output
out1 = out.permute(0, 3, 2, 1) # Permute back to original dimensions
return input + out1.squeeze() # Return enhanced input
class UniDeepFsmn_dual(nn.Module):
"""
UniDeepFsmn_dual is a neural network module that implements a dual-deep feedforward sequence memory network (FSMN).
This class extends the UniDeepFsmn by adding a second convolution layer for richer feature extraction.
Attributes:
input_dim (int): Dimension of the input features.
output_dim (int): Dimension of the output features.
lorder (int): Length of the order for the convolution layers.
hidden_size (int): Number of hidden units in the linear layer.
linear (nn.Linear): Linear layer to project input features to hidden size.
project (nn.Linear): Linear layer to project hidden features to output dimensions.
conv1 (nn.Conv2d): First convolutional layer for processing the output.
conv2 (nn.Conv2d): Second convolutional layer for further processing the features.
"""
def __init__(self, input_dim, output_dim, lorder=None, hidden_size=None):
super(UniDeepFsmn_dual, self).__init__()
self.input_dim = input_dim
self.output_dim = output_dim
if lorder is None:
return
self.lorder = lorder
self.hidden_size = hidden_size
# Initialize the layers
self.linear = nn.Linear(input_dim, hidden_size) # Linear transformation to hidden size
self.project = nn.Linear(hidden_size, output_dim, bias=False) # Project hidden size to output dimension
self.conv1 = nn.Conv2d(output_dim, output_dim, [lorder + lorder - 1, 1], [1, 1], groups=output_dim, bias=False) # First convolution layer
self.conv2 = nn.Conv2d(output_dim, output_dim, [lorder + lorder - 1, 1], [1, 1], groups=output_dim // 4, bias=False) # Second convolution layer
def forward(self, input):
"""
Forward pass for the UniDeepFsmn_dual model.
Args:
input (torch.Tensor): Input tensor of shape (batch_size, input_dim).
Returns:
torch.Tensor: The output tensor of the same shape as input, enhanced by the network.
"""
f1 = F.relu(self.linear(input)) # Apply linear layer followed by ReLU activation
p1 = self.project(f1) # Project to output dimension
x = th.unsqueeze(p1, 1) # Add a dimension for compatibility with Conv2d
x_per = x.permute(0, 3, 2, 1) # Permute dimensions for convolution
y = F.pad(x_per, [0, 0, self.lorder - 1, self.lorder - 1]) # Pad for causal convolution
conv1_out = x_per + self.conv1(y) # Add original input to first convolution output
z = F.pad(conv1_out, [0, 0, self.lorder - 1, self.lorder - 1]) # Pad for second convolution
out = conv1_out + self.conv2(z) # Add output of second convolution
out1 = out.permute(0, 3, 2, 1) # Permute back to original dimensions
return input + out1.squeeze() # Return enhanced input
class DilatedDenseNet(nn.Module):
"""
DilatedDenseNet implements a dense network structure with dilated convolutions.
This architecture enables wider receptive fields while maintaining a lower number of parameters.
It consists of multiple convolutional layers with dilation rates that increase at each layer.
Attributes:
depth (int): Number of convolutional layers in the network.
in_channels (int): Number of input channels for the first layer.
pad (nn.ConstantPad2d): Padding layer to maintain dimensions.
twidth (int): Width of the kernel used in convolution.
kernel_size (tuple): Kernel size for convolution operations.
"""
def __init__(self, depth=4, lorder=20, in_channels=64):
super(DilatedDenseNet, self).__init__()
self.depth = depth
self.in_channels = in_channels
self.pad = nn.ConstantPad2d((1, 1, 1, 0), value=0.) # Padding for the input
self.twidth = lorder * 2 - 1 # Width of the kernel
self.kernel_size = (self.twidth, 1) # Kernel size for convolutions
# Initialize layers dynamically based on depth
for i in range(self.depth):
dil = 2 ** i # Calculate dilation rate
pad_length = lorder + (dil - 1) * (lorder - 1) - 1 # Calculate padding length
setattr(self, 'pad{}'.format(i + 1), nn.ConstantPad2d((0, 0, pad_length, pad_length), value=0.)) # Padding for dilation
setattr(self, 'conv{}'.format(i + 1),
nn.Conv2d(self.in_channels * (i + 1), self.in_channels, kernel_size=self.kernel_size,
dilation=(dil, 1), groups=self.in_channels, bias=False)) # Convolution layer with dilation
setattr(self, 'norm{}'.format(i + 1), nn.InstanceNorm2d(in_channels, affine=True)) # Normalization layer
setattr(self, 'prelu{}'.format(i + 1), nn.PReLU(self.in_channels)) # Activation layer
def forward(self, x):
"""
Forward pass for the DilatedDenseNet model.
Args:
x (torch.Tensor): Input tensor of shape (batch_size, in_channels, height, width).
Returns:
torch.Tensor: Output tensor after applying dense layers.
"""
skip = x # Initialize skip connection
for i in range(self.depth):
out = getattr(self, 'pad{}'.format(i + 1))(skip) # Apply padding
out = getattr(self, 'conv{}'.format(i + 1))(out) # Apply convolution
out = getattr(self, 'norm{}'.format(i + 1))(out) # Apply normalization
out = getattr(self, 'prelu{}'.format(i + 1))(out) # Apply PReLU activation
skip = th.cat([out, skip], dim=1) # Concatenate the output with the skip connection
return out # Return the final output
class UniDeepFsmn_dilated(nn.Module):
"""
UniDeepFsmn_dilated combines the UniDeepFsmn architecture with a dilated dense network
to enhance feature extraction while maintaining efficient computation.
Attributes:
input_dim (int): Dimension of the input features.
output_dim (int): Dimension of the output features.
depth (int): Depth of the dilated dense network.
lorder (int): Length of the order for the convolution layers.
hidden_size (int): Number of hidden units in the linear layer.
linear (nn.Linear): Linear layer to project input features to hidden size.
project (nn.Linear): Linear layer to project hidden features to output dimensions.
conv (DilatedDenseNet): Instance of the DilatedDenseNet for feature extraction.
"""
def __init__(self, input_dim, output_dim, lorder=None, hidden_size=None, depth=2):
super(UniDeepFsmn_dilated, self).__init__()
self.input_dim = input_dim
self.output_dim = output_dim
self.depth = depth
if lorder is None:
return
self.lorder = lorder
self.hidden_size = hidden_size
# Initialize layers
self.linear = nn.Linear(input_dim, hidden_size) # Linear transformation to hidden size
self.project = nn.Linear(hidden_size, output_dim, bias=False) # Project hidden size to output dimension
self.conv = DilatedDenseNet(depth=self.depth, lorder=lorder, in_channels=output_dim) # Dilated dense network for feature extraction
def forward(self, input):
"""
Forward pass for the UniDeepFsmn_dilated model.
Args:
input (torch.Tensor): Input tensor of shape (batch_size, input_dim).
Returns:
torch.Tensor: The output tensor of the same shape as input, enhanced by the network.
"""
f1 = F.relu(self.linear(input)) # Apply linear layer followed by ReLU activation
p1 = self.project(f1) # Project to output dimension
x = th.unsqueeze(p1, 1) # Add a dimension for compatibility with Conv2d
x_per = x.permute(0, 3, 2, 1) # Permute dimensions for convolution
out = self.conv(x_per) # Pass through the dilated dense network
out1 = out.permute(0, 3, 2, 1) # Permute back to original dimensions
return input + out1.squeeze() # Return enhanced input
|