Spaces:

taka-yamakoshi
/

causal-intervention-demo

Runtime error

taka-yamakoshi commited on Mar 22, 2023

Commit

b218eb4

1 Parent(s): e87e116

fix

Files changed (1) hide show

skeleton_modeling_albert.py CHANGED Viewed

@@ -10,11 +10,16 @@ def SkeletonAlbertLayer(layer_id,layer,hidden,interventions):
     attention_layer = layer.attention
     num_heads = attention_layer.num_attention_heads
     head_dim = attention_layer.attention_head_size
     qry = attention_layer.query(hidden)
     key = attention_layer.key(hidden)
     val = attention_layer.value(hidden)
     # swap representations
     interv_layer = interventions.pop(layer_id,None)
     if interv_layer is not None:
@@ -29,8 +34,8 @@ def SkeletonAlbertLayer(layer_id,layer,hidden,interventions):
             if interv_rep is not None:
                 new_state = reps[rep_type].clone()
                 for head_id, pos, swap_ids in interv_rep:
-                    new_state[swap_ids[0],pos,head_id] = reps[rep_name][swap_ids[1],pos,head_id]
-                    new_state[swap_ids[1],pos,head_id] = reps[rep_name][swap_ids[0],pos,head_id]
                 reps[rep_type] = new_state.clone()
         hidden = reps['lay'].clone()

     attention_layer = layer.attention
     num_heads = attention_layer.num_attention_heads
     head_dim = attention_layer.attention_head_size
+    assert num_heads*head_dim == hidden.shape[2]
     qry = attention_layer.query(hidden)
     key = attention_layer.key(hidden)
     val = attention_layer.value(hidden)
+    assert qry.shape == hidden.shape
+    assert key.shape == hidden.shape
+    assert val.shape == hidden.shape
     # swap representations
     interv_layer = interventions.pop(layer_id,None)
     if interv_layer is not None:
             if interv_rep is not None:
                 new_state = reps[rep_type].clone()
                 for head_id, pos, swap_ids in interv_rep:
+                    new_state[swap_ids[0],pos,head_dim*head_id:head_dim*(head_id+1)] = reps[rep_type][swap_ids[1],pos,head_dim*head_id:head_dim*(head_id+1)]
+                    new_state[swap_ids[1],pos,head_dim*head_id:head_dim*(head_id+1)] = reps[rep_type][swap_ids[0],pos,head_dim*head_id:head_dim*(head_id+1)]
                 reps[rep_type] = new_state.clone()
         hidden = reps['lay'].clone()