Spaces:

taka-yamakoshi
/

causal-intervention-demo

Sleeping

App Files Files Community

taka-yamakoshi commited on Mar 23, 2023

Commit

50ce4f4

•

1 Parent(s): 8f32fbf

first pass

Browse files

Files changed (1) hide show

app.py +65 -28

app.py CHANGED Viewed

@@ -58,6 +58,41 @@ def clear_data():
     for key in st.session_state:
         del st.session_state[key]
 def show_annotated_sentence(sent,option_locs=[],mask_locs=[]):
     disp_style = '"font-family:san serif; color:Black; font-size: 20px"'
     prefix = f'<p style={disp_style}><span style="font-weight:bold">'
@@ -90,43 +125,45 @@ if __name__=='__main__':
             sent_1 = st.text_input('Sentence 1',value='It is better to play a prank on Samuel than Craig because he gets angry less often.')
             sent_2 = st.text_input('Sentence 2',value='It is better to play a prank on Samuel than Craig because he gets angry more often.')
             if st.button('Tokenize'):
-                st.session_state['page_status'] = 'tokenized'
                 st.session_state['sent_1'] = sent_1
                 st.session_state['sent_2'] = sent_2
                 main_area.empty()
-    if st.session_state['page_status']=='tokenized':
         with main_area.container():
             sent_1 = st.session_state['sent_1']
             sent_2 = st.session_state['sent_2']
-            if 'masked_pos_1' not in st.session_state:
-                st.session_state['masked_pos_1'] = []
-            if 'masked_pos_2' not in st.session_state:
-                st.session_state['masked_pos_2'] = []
             st.write('2. Select sites to mask out and click "Confirm"')
-            input_sent = tokenizer(sent_1).input_ids
-            decoded_sent = [tokenizer.decode([token]) for token in input_sent[1:-1]]
-            char_nums = [len(word)+2 for word in decoded_sent]
-            cols = st.columns(char_nums)
-            for word_id,(col,word) in enumerate(zip(cols,decoded_sent)):
-                with col:
-                    if st.button(word,key=f'word_{word_id}'):
-                        if word_id not in st.session_state['masked_pos_1']:
-                            st.session_state['masked_pos_1'].append(word_id)
-                        else:
-                            st.session_state['masked_pos_1'].remove(word_id)
-            st.markdown(show_annotated_sentence(decoded_sent,mask_locs=st.session_state['masked_pos_1']), unsafe_allow_html = True)
     if st.session_state['page_status']=='analysis':
-        sent_1 = st.sidebar.text_input('Sentence 1',value='It is better to play a prank on Samuel than Craig because he gets angry less often.',on_change=clear_data)
-        sent_2 = st.sidebar.text_input('Sentence 2',value='It is better to play a prank on Samuel than Craig because he gets angry more often.',on_change=clear_data)
-        input_ids_1 = tokenizer(sent_1).input_ids
-        input_ids_2 = tokenizer(sent_2).input_ids
-        input_ids = torch.tensor([input_ids_1,input_ids_2])
-        outputs = SkeletonAlbertForMaskedLM(model,input_ids,interventions = {0:{'lay':[(8,1,[0,1])]}})
-        logprobs = F.log_softmax(outputs['logits'], dim = -1)
-        preds = [torch.multinomial(torch.exp(probs), num_samples=1).squeeze(dim=-1) for probs in logprobs[0]]
-        st.write([tokenizer.decode([token]) for token in preds])

     for key in st.session_state:
         del st.session_state[key]
+def annotate_mask(sent_id,sent):
+    st.write(f'Sentence {sent_id}')
+    input_sent = tokenizer(sent).input_ids
+    decoded_sent = [tokenizer.decode([token]) for token in input_sent[1:-1]]
+    char_nums = [len(word)+2 for word in decoded_sent]
+    cols = st.columns(char_nums)
+    mask_locs = []
+    for word_id,(col,word) in enumerate(zip(cols,decoded_sent)):
+        with col:
+            if st.button(word,key=f'word_{word_id}'):
+                if word_id not in mask_locs:
+                    mask_locs.append(word_id)
+                else:
+                    mask_locs.remove(word_id)
+    st.markdown(show_annotated_sentence(decoded_sent,mask_locs=mask_locs), unsafe_allow_html = True)
+    return mask_locs
+def annotate_options(sent_id,sent):
+    st.write(f'Sentence {sent_id}')
+    input_sent = tokenizer(sent).input_ids
+    decoded_sent = [tokenizer.decode([token]) for token in input_sent[1:-1]]
+    char_nums = [len(word)+2 for word in decoded_sent]
+    cols = st.columns(char_nums)
+    option_locs = []
+    for word_id,(col,word) in enumerate(zip(cols,decoded_sent)):
+        with col:
+            if st.button(word,key=f'word_{word_id}'):
+                if word_id not in option_locs:
+                    option_locs.append(word_id)
+                else:
+                    option_locs.remove(word_id)
+    st.markdown(show_annotated_sentence(decoded_sent,option_locs=option_locs,
+                                        mask_locs=st.session_state[f'mask_locs_{sent_id}']), unsafe_allow_html = True)
+    return option_locs
 def show_annotated_sentence(sent,option_locs=[],mask_locs=[]):
     disp_style = '"font-family:san serif; color:Black; font-size: 20px"'
     prefix = f'<p style={disp_style}><span style="font-weight:bold">'
             sent_1 = st.text_input('Sentence 1',value='It is better to play a prank on Samuel than Craig because he gets angry less often.')
             sent_2 = st.text_input('Sentence 2',value='It is better to play a prank on Samuel than Craig because he gets angry more often.')
             if st.button('Tokenize'):
+                st.session_state['page_status'] = 'annotate_mask'
                 st.session_state['sent_1'] = sent_1
                 st.session_state['sent_2'] = sent_2
                 main_area.empty()
+    if st.session_state['page_status']=='annotate_mask':
         with main_area.container():
             sent_1 = st.session_state['sent_1']
             sent_2 = st.session_state['sent_2']
             st.write('2. Select sites to mask out and click "Confirm"')
+            st.session_state[f'mask_locs_1'] = annotate_mask(1,sent_1)
+            st.session_state[f'mask_locs_2'] = annotate_mask(2,sent_2)
+            if st.button('Confirm'):
+                st.session_state['page_status'] = 'annotate_options'
+                main_area.empty()
+    if st.session_state['page_status'] == 'annotate_options':
+        with main_area.container():
+            sent_1 = st.session_state['sent_1']
+            sent_2 = st.session_state['sent_2']
+            st.write('2. Select options click "Confirm"')
+            st.session_state[f'option_locs_1'] = annotate_options(1,sent_1)
+            st.session_state[f'option_locs_2'] = annotate_options(2,sent_2)
+            if st.button('Confirm'):
+                st.session_state['page_status'] = 'analysis'
+                main_area.empty()
     if st.session_state['page_status']=='analysis':
+        with main_area.container():
+            sent_1 = st.session_state['sent_1']
+            sent_2 = st.session_state['sent_2']
+            input_ids_1 = tokenizer(sent_1).input_ids
+            input_ids_2 = tokenizer(sent_2).input_ids
+            input_ids = torch.tensor([input_ids_1,input_ids_2])
+            outputs = SkeletonAlbertForMaskedLM(model,input_ids,interventions = {0:{'lay':[(8,1,[0,1])]}})
+            logprobs = F.log_softmax(outputs['logits'], dim = -1)
+            preds = [torch.multinomial(torch.exp(probs), num_samples=1).squeeze(dim=-1) for probs in logprobs[0]]
+            st.write([tokenizer.decode([token]) for token in preds])