import streamlit as st import random import copy N = 10 state = st.session_state generated_path = 'generated_predictions.txt' ORI_RES = f'DUC/results/{generated_path}' # CONTEXT_SENT_0_h_0_RES = f'/home/nlp/wolhanr/mds_faithfull/data/DUC/output_dir/sent_window_0_h_0_clusters/{generated_path}' CONTEXT_SENT_0_h_1_RES = f'DUC/output_dir/sent_window_0_clusters/{generated_path}' CONTEXT_SENT_1_h_1_RES = f'DUC/output_dir/sent_window_1_h_1_clusters/{generated_path}' source_path = 'DUC/sent_window_1_h_1_clusters/test.source' OPTIONS = ["faithfull", "Not faithfull"] Annotators = ['Ruben', 'Arie'] annotators_dic = {'Ruben':10, 'Arie':60} if "number_button_fill" not in state: state.number_button_fill = 0 def annotate(annotator): state.annotator = annotator if "annotator" not in state: c = st.columns(len(Annotators)) for idx, option in enumerate(Annotators): c[idx].button(f"{option}", on_click=annotate, args=(option,)) if "annotations" not in state and "annotator" in state: state.annotations = {} state.annotations['data_id', 'index', 'output', 'model', 'is_faithfull'] = '' f_0 = open(source_path) source_ = f_0.read().split('\n') source = source_[:N] + source_[annotators_dic[state.annotator]:annotators_dic[state.annotator]+50] f_1 = open(ORI_RES) ori_res_ = f_1.read().split('\n') ori_res = ori_res_[:N] + ori_res_[annotators_dic[state.annotator]:annotators_dic[state.annotator]+50] # f_2 = open(CONTEXT_SENT_0_h_0_RES) f_3 = open(CONTEXT_SENT_0_h_1_RES) sent_0_h_1_ = f_3.read().split('\n') sent_0_h_1 = sent_0_h_1_[:N] + sent_0_h_1_[annotators_dic[state.annotator]:annotators_dic[state.annotator]+50] f_4 = open(CONTEXT_SENT_1_h_1_RES) sent_1_h_1_ = f_4.read().split('\n') sent_1_h_1 = sent_1_h_1_[:N] + sent_1_h_1_[annotators_dic[state.annotator]:annotators_dic[state.annotator]+50] state.files = list(zip(source, ori_res, sent_0_h_1, sent_1_h_1)) state.current_file = state.files[0] state.counter = 0 state.submit = 0 def submit(index_0, index_1, index_2): x = [(index_0, state.a),(index_1, state.b), (index_2, state.c)] x = sorted(x, key=lambda x: x[0]) if state.submit == 0: state.annotations[state.counter, x[0][0], state.current_file[1], 'ori_res', x[0][1]] = '' state.annotations[state.counter, x[1][0], state.current_file[2], 'sent_0_h_1', x[1][1]] = '' state.annotations[state.counter, x[2][0], state.current_file[3], 'sent_1_h_1', x[2][1]] = '' state.submit = 1 if state.submit == 1: state.files.remove(state.current_file) random.shuffle(state.indexes) state.current_file = state.files[0] state.counter += 1 state.submit = 0 if 'files' in state and "annotator" in state: st.header("Dataset annotation") st.header(state.annotator) selected_file = state.current_file # source_file = selected_file[0] # ori_file = selected_file[1] # sent_0_h_1_file = selected_file[2] # sent_1_h_1_file = selected_file[3] st.write(f"Source file: {selected_file[0]}") if 'indexes' not in state: state.indexes = [1, 2, 3] random.shuffle(state.indexes) st.write('-'*50) st.write(f"Output: {selected_file[state.indexes[0]]}") # c = st.columns(len(OPTIONS)) state.a = st.radio('a', OPTIONS, key=f'{0}') st.write('-'*50) st.write(f"Output: {selected_file[state.indexes[1]]}") state.b = st.radio('b', OPTIONS, key=f'{1}') st.write('-'*50) st.write(f"Output: {selected_file[state.indexes[2]]}") state.c = st.radio('c', OPTIONS, key=f'{2}') st.write('-'*50) st.button('Submit', on_click=submit, args=(copy.deepcopy(state.indexes))) else: st.info("Everything annotated.") if 'annotations' in state and 'files' in state: st.info(f"Annotated: {(len(state.annotations)-1)/3}, Remaining: {len(state.files)}") st.download_button( "Download annotations as CSV", "\n".join([f"{k}\t{v}" for k, v in state.annotations.items()]), file_name="annotations_faithfull.csv", )