yairschiff
commited on
Commit
•
d8238ba
1
Parent(s):
0fd3e52
Update modeling_caduceus.py
Browse filesFix bug where final layer norm was returning residual
- modeling_caduceus.py +2 -1
modeling_caduceus.py
CHANGED
@@ -213,7 +213,8 @@ class CaduceusMixerModel(nn.Module):
|
|
213 |
|
214 |
if not self.fused_add_norm:
|
215 |
if self.rcps:
|
216 |
-
|
|
|
217 |
else:
|
218 |
residual = (hidden_states + residual) if residual is not None else hidden_states
|
219 |
hidden_states = self.norm_f(residual.to(dtype=self.norm_f.weight.dtype))
|
|
|
213 |
|
214 |
if not self.fused_add_norm:
|
215 |
if self.rcps:
|
216 |
+
# Set prenorm=False here since we don't need the residual
|
217 |
+
hidden_states = self.norm_f(hidden_states, residual=residual, prenorm=False)
|
218 |
else:
|
219 |
residual = (hidden_states + residual) if residual is not None else hidden_states
|
220 |
hidden_states = self.norm_f(residual.to(dtype=self.norm_f.weight.dtype))
|