Spaces:

vives
/

kp_quality_matching

Runtime error

App Files Files Community

vives commited on Jul 26, 2022

Commit

eec46b2

1 Parent(s): f47e438

Update app.py

Browse files

Files changed (1) hide show

app.py +60 -36

app.py CHANGED Viewed

@@ -1,40 +1,64 @@
 """The following program will read in 2 XL sheets of KP matches and the user will evaluate the quality of the matching"""
 import streamlit as st
-import json
 import pandas as pd
 xl1 = st.file_uploader("Choose first file", key="xl1")
-xl2 = st.file_uploader("Choose second file", key="xl2")
-if xl1 is not None and xl2 is not None:
-  #assert that the first few columns are the same
-  df1 = pd.read_excel(xl1, sheet_name= "0.85 Threshold")
-  df2 = pd.read_excel(xl2, sheet_name= "0.85 Threshold")
-  st.write(df1[["Attendee A","Attendee B","KP"]])
-  if not df1[["Attendee A","Attendee B","KP"]].equals(df2[["Attendee A","Attendee B","KP"]]):
-    xl1 = None
-    xl2 = None
-  else:
-    i = 0
-    choices = []
-    st.write("First excel file")
-    for t1 in df1.iterrows():
-      r1 = t1[1]
-      kps1 = json.loads(r1["Matched KPs"].replace("'", '"'))
-      curr_keys = list(kps1.keys()).copy()
-      for kp1 in curr_keys:
-        if kps1[kp1] > 0.99:
-          kps1.pop(kp1)
-      #now display the kps
-      if kps1 == {}:
-        continue
-      else:
-        for kp1 in kps1.keys():
-          col1, col2, col3 = st.columns(3)
-          with col1:
-            st.write(r1["KP"])
-          with col2:
-            st.write(f"kp: {kp1}\ndistance: {kps1[kp1]}")
-          with col3:
-            choices.append(st.radio("Appropriate?", [True, False],key = i))
-          i+=1

 """The following program will read in 2 XL sheets of KP matches and the user will evaluate the quality of the matching"""
 import streamlit as st
+import ast
 import pandas as pd
+import random
+from time import sleep
+threshold = st.radio("Select threshold", ["0.7", "0.8", "0.85", "0.87", "0.9", "0.95"], 2)
+num_kp = st.slider("Number of key-phrases to select", min_value=10, max_value=100,value=50,step=5)
 xl1 = st.file_uploader("Choose first file", key="xl1")
+#xl2 = st.file_uploader("Choose second file", key="xl2")
+def merge_dicts(x):
+    return {k: v for d in x.dropna() for k, v in d.items()}
+def clean_dict(x):
+    return  x.replace("'", '"')
+if xl1 is not None :
+    #assert that the first few columns are the same
+    df1 = pd.read_excel(xl1, sheet_name= "0.85 Threshold")
+    #first convert strings into dicts
+    df1["Matched KPs"] = df1["Matched KPs"].apply(clean_dict)
+    df1["Matched KPs"] = df1["Matched KPs"].apply(lambda x: ast.literal_eval(x))
+    df1["Matched KPs"] = df1["Matched KPs"].apply(lambda x: {key: x[key] for key in x.keys() if x[key]!="null"})
+    #now pop direct matches
+    df1["Matched KPs"] = df1["Matched KPs"].apply(lambda x: {key:x[key] for key in x.keys() if x[key] <0.99})
+    df1.drop(df1[df1["Matched KPs"] == {}].index, inplace=True)
+    #now merge same KPs and their respective dicts
+    new_df = df1[["KP","Matched KPs"]].groupby("KP").agg(merge_dicts)
+    new_df["dict len"] = new_df["Matched KPs"].apply(lambda x: len(list(x.keys())))
+    new_df = new_df.sort_values(by="dict len", ascending=False)
+    new_df.reset_index(inplace=True)
+    #new_df = new_df.drop("dict len", axis=1)
+    with st.form("First excel file"):
+        choices = []
+        i = 0
+        for t1 in new_df.sample(n=num_kp,random_state=42).iterrows():
+        #for t1 in new_df.sample(n=10, random_state=42).iterrows():
+            r1 = t1[1]
+            kps1 = r1["Matched KPs"]
+            curr_keys = list(kps1.keys()).copy()
+            for kp1 in curr_keys:
+                if kps1[kp1] > 0.99:
+                    kps1.pop(kp1)
+            # now display the kps
+            if kps1 == {}:
+                continue
+            else:
+                col1, col2 = st.columns(2)
+                with col1:
+                    st.write(r1["KP"])
+                with col2:
+                    #if number of keys > 5, then shuffle them and select 5 random
+                    if len(list(kps1.keys())) > 5:
+                        #we can repurpose curr_keys as it was used to pop direct matches
+                        curr_keys = list(kps1.keys())
+                        random.Random(42).shuffle(curr_keys)
+                        curr_keys = curr_keys[:5]
+                    else:
+                        curr_keys = list(kps1.keys())
+                    for kp1 in curr_keys:
+                        choices.append(st.checkbox(f"{kp1}: {kps1[kp1]:0.2f}", key = i))
+                        i+=1
+                    st.markdown("""---""")
+        submitted = st.form_submit_button("Submit")
+        if submitted:
+            st.write(len([x for x in choices if x]), i, f"{len([x for x in choices if x])/i : 0.3f}")