vives commited on
Commit
eec46b2
·
1 Parent(s): f47e438

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +60 -36
app.py CHANGED
@@ -1,40 +1,64 @@
1
  """The following program will read in 2 XL sheets of KP matches and the user will evaluate the quality of the matching"""
2
  import streamlit as st
3
- import json
4
  import pandas as pd
 
 
 
 
5
  xl1 = st.file_uploader("Choose first file", key="xl1")
6
- xl2 = st.file_uploader("Choose second file", key="xl2")
7
- if xl1 is not None and xl2 is not None:
8
- #assert that the first few columns are the same
9
- df1 = pd.read_excel(xl1, sheet_name= "0.85 Threshold")
10
- df2 = pd.read_excel(xl2, sheet_name= "0.85 Threshold")
11
- st.write(df1[["Attendee A","Attendee B","KP"]])
12
- if not df1[["Attendee A","Attendee B","KP"]].equals(df2[["Attendee A","Attendee B","KP"]]):
13
- xl1 = None
14
- xl2 = None
15
- else:
16
- i = 0
17
- choices = []
18
-
19
- st.write("First excel file")
20
- for t1 in df1.iterrows():
21
- r1 = t1[1]
22
- kps1 = json.loads(r1["Matched KPs"].replace("'", '"'))
23
- curr_keys = list(kps1.keys()).copy()
24
- for kp1 in curr_keys:
25
- if kps1[kp1] > 0.99:
26
- kps1.pop(kp1)
27
- #now display the kps
28
- if kps1 == {}:
29
- continue
30
- else:
31
- for kp1 in kps1.keys():
32
- col1, col2, col3 = st.columns(3)
33
- with col1:
34
- st.write(r1["KP"])
35
- with col2:
36
- st.write(f"kp: {kp1}\ndistance: {kps1[kp1]}")
37
- with col3:
38
- choices.append(st.radio("Appropriate?", [True, False],key = i))
39
- i+=1
40
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  """The following program will read in 2 XL sheets of KP matches and the user will evaluate the quality of the matching"""
2
  import streamlit as st
3
+ import ast
4
  import pandas as pd
5
+ import random
6
+ from time import sleep
7
+ threshold = st.radio("Select threshold", ["0.7", "0.8", "0.85", "0.87", "0.9", "0.95"], 2)
8
+ num_kp = st.slider("Number of key-phrases to select", min_value=10, max_value=100,value=50,step=5)
9
  xl1 = st.file_uploader("Choose first file", key="xl1")
10
+ #xl2 = st.file_uploader("Choose second file", key="xl2")
11
+ def merge_dicts(x):
12
+ return {k: v for d in x.dropna() for k, v in d.items()}
13
+ def clean_dict(x):
14
+ return x.replace("'", '"')
15
+ if xl1 is not None :
16
+ #assert that the first few columns are the same
17
+ df1 = pd.read_excel(xl1, sheet_name= "0.85 Threshold")
18
+ #first convert strings into dicts
19
+ df1["Matched KPs"] = df1["Matched KPs"].apply(clean_dict)
20
+ df1["Matched KPs"] = df1["Matched KPs"].apply(lambda x: ast.literal_eval(x))
21
+ df1["Matched KPs"] = df1["Matched KPs"].apply(lambda x: {key: x[key] for key in x.keys() if x[key]!="null"})
22
+ #now pop direct matches
23
+ df1["Matched KPs"] = df1["Matched KPs"].apply(lambda x: {key:x[key] for key in x.keys() if x[key] <0.99})
24
+ df1.drop(df1[df1["Matched KPs"] == {}].index, inplace=True)
25
+ #now merge same KPs and their respective dicts
26
+ new_df = df1[["KP","Matched KPs"]].groupby("KP").agg(merge_dicts)
27
+ new_df["dict len"] = new_df["Matched KPs"].apply(lambda x: len(list(x.keys())))
28
+ new_df = new_df.sort_values(by="dict len", ascending=False)
29
+ new_df.reset_index(inplace=True)
30
+ #new_df = new_df.drop("dict len", axis=1)
31
+ with st.form("First excel file"):
32
+ choices = []
33
+ i = 0
34
+ for t1 in new_df.sample(n=num_kp,random_state=42).iterrows():
35
+ #for t1 in new_df.sample(n=10, random_state=42).iterrows():
36
+ r1 = t1[1]
37
+ kps1 = r1["Matched KPs"]
38
+ curr_keys = list(kps1.keys()).copy()
39
+ for kp1 in curr_keys:
40
+ if kps1[kp1] > 0.99:
41
+ kps1.pop(kp1)
42
+ # now display the kps
43
+ if kps1 == {}:
44
+ continue
45
+ else:
46
+ col1, col2 = st.columns(2)
47
+ with col1:
48
+ st.write(r1["KP"])
49
+ with col2:
50
+ #if number of keys > 5, then shuffle them and select 5 random
51
+ if len(list(kps1.keys())) > 5:
52
+ #we can repurpose curr_keys as it was used to pop direct matches
53
+ curr_keys = list(kps1.keys())
54
+ random.Random(42).shuffle(curr_keys)
55
+ curr_keys = curr_keys[:5]
56
+ else:
57
+ curr_keys = list(kps1.keys())
58
+ for kp1 in curr_keys:
59
+ choices.append(st.checkbox(f"{kp1}: {kps1[kp1]:0.2f}", key = i))
60
+ i+=1
61
+ st.markdown("""---""")
62
+ submitted = st.form_submit_button("Submit")
63
+ if submitted:
64
+ st.write(len([x for x in choices if x]), i, f"{len([x for x in choices if x])/i : 0.3f}")