lgcharpe commited on
Commit
47c2a5f
1 Parent(s): 0df427f

Update modeling_norbert.py

Browse files
Files changed (1) hide show
  1. modeling_norbert.py +5 -5
modeling_norbert.py CHANGED
@@ -140,9 +140,9 @@ class Attention(nn.Module):
140
 
141
  position_indices = torch.arange(config.max_position_embeddings, dtype=torch.long).unsqueeze(1) \
142
  - torch.arange(config.max_position_embeddings, dtype=torch.long).unsqueeze(0)
143
- position_indices = self.make_log_bucket_position(position_indices, config.config.position_bucket_size, config.max_position_embeddings)
144
- position_indices = config.config.position_bucket_size - 1 + position_indices
145
- self.register_buffer("position_indices", position_indices, persistent=True)
146
 
147
  self.dropout = nn.Dropout(config.attention_probs_dropout_prob)
148
  self.scale = 1.0 / math.sqrt(3 * self.head_size)
@@ -162,8 +162,8 @@ class Attention(nn.Module):
162
  if self.position_indices.size(0) < query_len:
163
  position_indices = torch.arange(query_len, dtype=torch.long).unsqueeze(1) \
164
  - torch.arange(query_len, dtype=torch.long).unsqueeze(0)
165
- position_indices = self.make_log_bucket_position(position_indices, self.position_bucket_size, 512)
166
- position_indices = self.position_bucket_size - 1 + position_indices
167
  self.position_indices = position_indices.to(hidden_states.device)
168
 
169
  hidden_states = self.pre_layer_norm(hidden_states)
 
140
 
141
  position_indices = torch.arange(config.max_position_embeddings, dtype=torch.long).unsqueeze(1) \
142
  - torch.arange(config.max_position_embeddings, dtype=torch.long).unsqueeze(0)
143
+ position_indices = self.make_log_bucket_position(position_indices, config.position_bucket_size, config.max_position_embeddings)
144
+ position_indices = config.position_bucket_size - 1 + position_indices
145
+ self.register_buffer("position_indices", position_indices, persistent=False)
146
 
147
  self.dropout = nn.Dropout(config.attention_probs_dropout_prob)
148
  self.scale = 1.0 / math.sqrt(3 * self.head_size)
 
162
  if self.position_indices.size(0) < query_len:
163
  position_indices = torch.arange(query_len, dtype=torch.long).unsqueeze(1) \
164
  - torch.arange(query_len, dtype=torch.long).unsqueeze(0)
165
+ position_indices = self.make_log_bucket_position(position_indices, self.config.position_bucket_size, 512)
166
+ position_indices = self.config.position_bucket_size - 1 + position_indices
167
  self.position_indices = position_indices.to(hidden_states.device)
168
 
169
  hidden_states = self.pre_layer_norm(hidden_states)