Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -1,40 +1,64 @@
|
|
1 |
"""The following program will read in 2 XL sheets of KP matches and the user will evaluate the quality of the matching"""
|
2 |
import streamlit as st
|
3 |
-
import
|
4 |
import pandas as pd
|
|
|
|
|
|
|
|
|
5 |
xl1 = st.file_uploader("Choose first file", key="xl1")
|
6 |
-
xl2 = st.file_uploader("Choose second file", key="xl2")
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
xl1 =
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
for
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
"""The following program will read in 2 XL sheets of KP matches and the user will evaluate the quality of the matching"""
|
2 |
import streamlit as st
|
3 |
+
import ast
|
4 |
import pandas as pd
|
5 |
+
import random
|
6 |
+
from time import sleep
|
7 |
+
threshold = st.radio("Select threshold", ["0.7", "0.8", "0.85", "0.87", "0.9", "0.95"], 2)
|
8 |
+
num_kp = st.slider("Number of key-phrases to select", min_value=10, max_value=100,value=50,step=5)
|
9 |
xl1 = st.file_uploader("Choose first file", key="xl1")
|
10 |
+
#xl2 = st.file_uploader("Choose second file", key="xl2")
|
11 |
+
def merge_dicts(x):
|
12 |
+
return {k: v for d in x.dropna() for k, v in d.items()}
|
13 |
+
def clean_dict(x):
|
14 |
+
return x.replace("'", '"')
|
15 |
+
if xl1 is not None :
|
16 |
+
#assert that the first few columns are the same
|
17 |
+
df1 = pd.read_excel(xl1, sheet_name= "0.85 Threshold")
|
18 |
+
#first convert strings into dicts
|
19 |
+
df1["Matched KPs"] = df1["Matched KPs"].apply(clean_dict)
|
20 |
+
df1["Matched KPs"] = df1["Matched KPs"].apply(lambda x: ast.literal_eval(x))
|
21 |
+
df1["Matched KPs"] = df1["Matched KPs"].apply(lambda x: {key: x[key] for key in x.keys() if x[key]!="null"})
|
22 |
+
#now pop direct matches
|
23 |
+
df1["Matched KPs"] = df1["Matched KPs"].apply(lambda x: {key:x[key] for key in x.keys() if x[key] <0.99})
|
24 |
+
df1.drop(df1[df1["Matched KPs"] == {}].index, inplace=True)
|
25 |
+
#now merge same KPs and their respective dicts
|
26 |
+
new_df = df1[["KP","Matched KPs"]].groupby("KP").agg(merge_dicts)
|
27 |
+
new_df["dict len"] = new_df["Matched KPs"].apply(lambda x: len(list(x.keys())))
|
28 |
+
new_df = new_df.sort_values(by="dict len", ascending=False)
|
29 |
+
new_df.reset_index(inplace=True)
|
30 |
+
#new_df = new_df.drop("dict len", axis=1)
|
31 |
+
with st.form("First excel file"):
|
32 |
+
choices = []
|
33 |
+
i = 0
|
34 |
+
for t1 in new_df.sample(n=num_kp,random_state=42).iterrows():
|
35 |
+
#for t1 in new_df.sample(n=10, random_state=42).iterrows():
|
36 |
+
r1 = t1[1]
|
37 |
+
kps1 = r1["Matched KPs"]
|
38 |
+
curr_keys = list(kps1.keys()).copy()
|
39 |
+
for kp1 in curr_keys:
|
40 |
+
if kps1[kp1] > 0.99:
|
41 |
+
kps1.pop(kp1)
|
42 |
+
# now display the kps
|
43 |
+
if kps1 == {}:
|
44 |
+
continue
|
45 |
+
else:
|
46 |
+
col1, col2 = st.columns(2)
|
47 |
+
with col1:
|
48 |
+
st.write(r1["KP"])
|
49 |
+
with col2:
|
50 |
+
#if number of keys > 5, then shuffle them and select 5 random
|
51 |
+
if len(list(kps1.keys())) > 5:
|
52 |
+
#we can repurpose curr_keys as it was used to pop direct matches
|
53 |
+
curr_keys = list(kps1.keys())
|
54 |
+
random.Random(42).shuffle(curr_keys)
|
55 |
+
curr_keys = curr_keys[:5]
|
56 |
+
else:
|
57 |
+
curr_keys = list(kps1.keys())
|
58 |
+
for kp1 in curr_keys:
|
59 |
+
choices.append(st.checkbox(f"{kp1}: {kps1[kp1]:0.2f}", key = i))
|
60 |
+
i+=1
|
61 |
+
st.markdown("""---""")
|
62 |
+
submitted = st.form_submit_button("Submit")
|
63 |
+
if submitted:
|
64 |
+
st.write(len([x for x in choices if x]), i, f"{len([x for x in choices if x])/i : 0.3f}")
|