LeTruongVu2k1
creating Customized_IDSF folder and putting corresponded files into it; modifying app.py
969d94d
raw
history blame
6.77 kB
import numpy as np
import torch
import torch.nn as nn
class Attention(nn.Module):
"""Applies attention mechanism on the `context` using the `query`.
Args:
dimensions (int): Dimensionality of the query and context.
attention_type (str, optional): How to compute the attention score:
* dot: :math:`score(H_j,q) = H_j^T q`
* general: :math:`score(H_j, q) = H_j^T W_a q`
Example:
>>> attention = Attention(256)
>>> query = torch.randn(32, 50, 256)
>>> context = torch.randn(32, 1, 256)
>>> output, weights = attention(query, context)
>>> output.size()
torch.Size([32, 50, 256])
>>> weights.size()
torch.Size([32, 50, 1])
"""
def __init__(self, dimensions):
super(Attention, self).__init__()
self.dimensions = dimensions
self.linear_out = nn.Linear(dimensions * 2, dimensions, bias=False)
self.softmax = nn.Softmax(dim=1)
self.tanh = nn.Tanh()
def forward(self, query, context, attention_mask):
"""
Args:
query (:class:`torch.FloatTensor` [batch size, output length, dimensions]): Sequence of
queries to query the context.
context (:class:`torch.FloatTensor` [batch size, query length, dimensions]): Data
overwhich to apply the attention mechanism.
output length: length of utterance
query length: length of each token (1)
Returns:
:class:`tuple` with `output` and `weights`:
* **output** (:class:`torch.LongTensor` [batch size, output length, dimensions]):
Tensor containing the attended features.
* **weights** (:class:`torch.FloatTensor` [batch size, output length, query length]):
Tensor containing attention weights.
"""
# query = self.linear_query(query)
batch_size, output_len, hidden_size = query.size()
# query_len = context.size(1)
# (batch_size, output_len, dimensions) * (batch_size, query_len, dimensions) ->
# (batch_size, output_len, query_len)
attention_scores = torch.bmm(query, context.transpose(1, 2).contiguous())
# Compute weights across every context sequence
# attention_scores = attention_scores.view(batch_size * output_len, query_len)
if attention_mask is not None:
# Create attention mask, apply attention mask before softmax
attention_mask = torch.unsqueeze(attention_mask, 2)
# attention_mask = attention_mask.view(batch_size * output_len, query_len)
attention_scores.masked_fill_(attention_mask == 0, -np.inf)
# attention_scores = torch.squeeze(attention_scores,1)
attention_weights = self.softmax(attention_scores)
# attention_weights = attention_weights.view(batch_size, output_len, query_len)
# (batch_size, output_len, query_len) * (batch_size, query_len, dimensions) ->
# (batch_size, output_len, dimensions)
mix = torch.bmm(attention_weights, context)
# from IPython import embed; embed()
# concat -> (batch_size * output_len, 2*dimensions)
combined = torch.cat((mix, query), dim=2)
# combined = combined.view(batch_size * output_len, 2 * self.dimensions)
# Apply linear_out on every 2nd dimension of concat
# output -> (batch_size, output_len, dimensions)
# output = self.linear_out(combined).view(batch_size, output_len, self.dimensions)
output = self.linear_out(combined)
output = self.tanh(output)
# output = combined
return output, attention_weights
class IntentClassifier(nn.Module):
def __init__(self, input_dim, num_intent_labels, dropout_rate=0.0):
super(IntentClassifier, self).__init__()
self.dropout = nn.Dropout(dropout_rate)
self.linear = nn.Linear(input_dim, num_intent_labels)
def forward(self, x):
x = self.dropout(x)
return self.linear(x)
class SlotClassifier(nn.Module):
def __init__(
self,
input_dim,
num_intent_labels,
num_slot_labels,
use_intent_context_concat=False,
use_intent_context_attn=False,
max_seq_len=50,
attention_embedding_size=200,
dropout_rate=0.0,
):
super(SlotClassifier, self).__init__()
self.use_intent_context_attn = use_intent_context_attn
self.use_intent_context_concat = use_intent_context_concat
self.max_seq_len = max_seq_len
self.num_intent_labels = num_intent_labels
self.num_slot_labels = num_slot_labels
self.attention_embedding_size = attention_embedding_size
output_dim = self.attention_embedding_size # base model
if self.use_intent_context_concat:
output_dim = self.attention_embedding_size
self.linear_out = nn.Linear(2 * attention_embedding_size, attention_embedding_size)
elif self.use_intent_context_attn:
output_dim = self.attention_embedding_size
self.attention = Attention(attention_embedding_size)
self.linear_slot = nn.Linear(input_dim, self.attention_embedding_size, bias=False)
if self.use_intent_context_attn or self.use_intent_context_concat:
# project intent vector and slot vector to have the same dimensions
self.linear_intent_context = nn.Linear(self.num_intent_labels, self.attention_embedding_size, bias=False)
self.softmax = nn.Softmax(dim=-1) # softmax layer for intent logits
# self.linear_out = nn.Linear(2 * intent_embedding_size, intent_embedding_size)
# output
self.dropout = nn.Dropout(dropout_rate)
self.linear = nn.Linear(output_dim, num_slot_labels)
def forward(self, x, intent_context, attention_mask):
x = self.linear_slot(x)
if self.use_intent_context_concat:
intent_context = self.softmax(intent_context)
intent_context = self.linear_intent_context(intent_context)
intent_context = torch.unsqueeze(intent_context, 1)
intent_context = intent_context.expand(-1, self.max_seq_len, -1)
x = torch.cat((x, intent_context), dim=2)
x = self.linear_out(x)
elif self.use_intent_context_attn:
intent_context = self.softmax(intent_context)
intent_context = self.linear_intent_context(intent_context)
intent_context = torch.unsqueeze(intent_context, 1) # 1: query length (each token)
output, weights = self.attention(x, intent_context, attention_mask)
x = output
x = self.dropout(x)
return self.linear(x)