Update modeling_ltgbert.py
Browse files- modeling_ltgbert.py +2 -2
modeling_ltgbert.py
CHANGED
@@ -203,7 +203,7 @@ class Attention(nn.Module):
|
|
203 |
- torch.arange(config.max_position_embeddings, dtype=torch.long).unsqueeze(0)
|
204 |
position_indices = self.make_log_bucket_position(position_indices, config.position_bucket_size, config.max_position_embeddings)
|
205 |
position_indices = config.position_bucket_size - 1 + position_indices
|
206 |
-
self.register_buffer("position_indices", position_indices, persistent=
|
207 |
|
208 |
self.dropout = nn.Dropout(config.attention_probs_dropout_prob)
|
209 |
self.scale = 1.0 / math.sqrt(3 * self.head_size)
|
@@ -234,7 +234,7 @@ class Attention(nn.Module):
|
|
234 |
position_indices = torch.arange(query_len, dtype=torch.long).unsqueeze(1) \
|
235 |
- torch.arange(query_len, dtype=torch.long).unsqueeze(0)
|
236 |
position_indices = self.make_log_bucket_position(position_indices, self.config.position_bucket_size, 512)
|
237 |
-
position_indices = self.position_bucket_size - 1 + position_indices
|
238 |
self.position_indices = position_indices.to(hidden_states.device)
|
239 |
|
240 |
hidden_states = self.pre_layer_norm(hidden_states)
|
|
|
203 |
- torch.arange(config.max_position_embeddings, dtype=torch.long).unsqueeze(0)
|
204 |
position_indices = self.make_log_bucket_position(position_indices, config.position_bucket_size, config.max_position_embeddings)
|
205 |
position_indices = config.position_bucket_size - 1 + position_indices
|
206 |
+
self.register_buffer("position_indices", position_indices, persistent=False)
|
207 |
|
208 |
self.dropout = nn.Dropout(config.attention_probs_dropout_prob)
|
209 |
self.scale = 1.0 / math.sqrt(3 * self.head_size)
|
|
|
234 |
position_indices = torch.arange(query_len, dtype=torch.long).unsqueeze(1) \
|
235 |
- torch.arange(query_len, dtype=torch.long).unsqueeze(0)
|
236 |
position_indices = self.make_log_bucket_position(position_indices, self.config.position_bucket_size, 512)
|
237 |
+
position_indices = self.config.position_bucket_size - 1 + position_indices
|
238 |
self.position_indices = position_indices.to(hidden_states.device)
|
239 |
|
240 |
hidden_states = self.pre_layer_norm(hidden_states)
|