Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Jan Mühlnikel
commited on
Commit
·
6a85a81
1
Parent(s):
923adf2
added same country check feature
Browse files
__pycache__/similarity_page.cpython-310.pyc
CHANGED
Binary files a/__pycache__/similarity_page.cpython-310.pyc and b/__pycache__/similarity_page.cpython-310.pyc differ
|
|
functions/__pycache__/calc_matches.cpython-310.pyc
CHANGED
Binary files a/functions/__pycache__/calc_matches.cpython-310.pyc and b/functions/__pycache__/calc_matches.cpython-310.pyc differ
|
|
functions/calc_matches.py
CHANGED
@@ -9,12 +9,18 @@ def calc_matches(filtered_df, project_df, similarity_matrix, top_x):
|
|
9 |
|
10 |
# filter out all row considering the filter
|
11 |
filtered_df_indecies_list = filtered_df.index
|
|
|
12 |
|
13 |
np.fill_diagonal(similarity_matrix, 0)
|
14 |
-
match_matrix = similarity_matrix[filtered_df_indecies_list]
|
|
|
|
|
|
|
|
|
|
|
15 |
|
16 |
# get row (project1) and column (project2) with highest similarity in filtered df
|
17 |
-
top_indices = np.unravel_index(
|
18 |
|
19 |
# get the corresponding similarity values
|
20 |
top_values = match_matrix[top_indices]
|
|
|
9 |
|
10 |
# filter out all row considering the filter
|
11 |
filtered_df_indecies_list = filtered_df.index
|
12 |
+
project_df_indecies_list = project_df.index
|
13 |
|
14 |
np.fill_diagonal(similarity_matrix, 0)
|
15 |
+
match_matrix = similarity_matrix[filtered_df_indecies_list, :][:, project_df_indecies_list]
|
16 |
+
|
17 |
+
best_matches_list = np.argsort(match_matrix, axis=None)
|
18 |
+
|
19 |
+
if len(best_matches_list) < top_x:
|
20 |
+
top_x = len(best_matches_list)
|
21 |
|
22 |
# get row (project1) and column (project2) with highest similarity in filtered df
|
23 |
+
top_indices = np.unravel_index(best_matches_list[-top_x:], match_matrix.shape)
|
24 |
|
25 |
# get the corresponding similarity values
|
26 |
top_values = match_matrix[top_indices]
|
functions/same_country_filter.py
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
from functions.semantic_search import search
|
3 |
+
|
4 |
+
def same_country_filter(df, country_code_list):
|
5 |
+
# FILTER COUNTRY
|
6 |
+
if country_code_list != []:
|
7 |
+
country_filtered_df = pd.DataFrame()
|
8 |
+
for c in country_code_list:
|
9 |
+
c_df = df[df["country"].str.contains(c, na=False)]
|
10 |
+
country_filtered_df = pd.concat([country_filtered_df, c_df], ignore_index=False)
|
11 |
+
|
12 |
+
df = country_filtered_df
|
13 |
+
|
14 |
+
return country_filtered_df
|
15 |
+
else:
|
16 |
+
return df
|
modules/__pycache__/result_table.cpython-310.pyc
CHANGED
Binary files a/modules/__pycache__/result_table.cpython-310.pyc and b/modules/__pycache__/result_table.cpython-310.pyc differ
|
|
modules/result_table.py
CHANGED
@@ -17,11 +17,19 @@ def show_table(p1_df, p2_df):
|
|
17 |
|
18 |
# INTEGRATE IN PREPROCESSING !!!
|
19 |
# transform strings to list
|
20 |
-
|
21 |
-
|
|
|
|
|
|
|
|
|
22 |
|
23 |
-
|
24 |
-
|
|
|
|
|
|
|
|
|
25 |
|
26 |
row_from_p1["sdg_list"] = [row_from_p1['sgd_pred_code'].item()]
|
27 |
row_from_p2["sdg_list"] = [row_from_p2['sgd_pred_code'].item()]
|
|
|
17 |
|
18 |
# INTEGRATE IN PREPROCESSING !!!
|
19 |
# transform strings to list
|
20 |
+
try:
|
21 |
+
row_from_p1["crs_3_code_list"] = [row_from_p1['crs_3_code'].item().split(";")[:-1]]
|
22 |
+
row_from_p2["crs_3_code_list"] = [row_from_p2['crs_3_code'].item().split(";")[:-1]]
|
23 |
+
except:
|
24 |
+
row_from_p1["crs_3_code_list"] = []
|
25 |
+
row_from_p2["crs_3_code_list"] = []
|
26 |
|
27 |
+
try:
|
28 |
+
row_from_p1["crs_5_code_list"] = [row_from_p1['crs_3_code'].item().split(";")[:-1]]
|
29 |
+
row_from_p2["crs_5_code_list"] = [row_from_p2['crs_3_code'].item().split(";")[:-1]]
|
30 |
+
except:
|
31 |
+
row_from_p1["crs_5_code_list"] = []
|
32 |
+
row_from_p2["crs_5_code_list"] = []
|
33 |
|
34 |
row_from_p1["sdg_list"] = [row_from_p1['sgd_pred_code'].item()]
|
35 |
row_from_p2["sdg_list"] = [row_from_p2['sgd_pred_code'].item()]
|
similarity_page.py
CHANGED
@@ -13,6 +13,7 @@ from sentence_transformers import SentenceTransformer
|
|
13 |
from modules.result_table import show_table
|
14 |
from functions.filter_projects import filter_projects
|
15 |
from functions.calc_matches import calc_matches
|
|
|
16 |
import psutil
|
17 |
import os
|
18 |
import gc
|
@@ -140,7 +141,7 @@ def show_page():
|
|
140 |
crs3_option = st.multiselect(
|
141 |
'CRS 3',
|
142 |
CRS3_MERGED,
|
143 |
-
placeholder="Select"
|
144 |
)
|
145 |
|
146 |
# CRS 5 SELECTION
|
@@ -155,7 +156,7 @@ def show_page():
|
|
155 |
crs5_option = st.multiselect(
|
156 |
'CRS 5',
|
157 |
crs5_list,
|
158 |
-
placeholder="Select",
|
159 |
disabled=st.session_state.crs5_option_disabled
|
160 |
)
|
161 |
|
@@ -168,13 +169,14 @@ def show_page():
|
|
168 |
)
|
169 |
|
170 |
different_orga_checkbox = st.checkbox("Only matches between different organizations")
|
|
|
171 |
|
172 |
with col2:
|
173 |
# COUNTRY SELECTION
|
174 |
country_option = st.multiselect(
|
175 |
'Country / Countries',
|
176 |
COUNTRY_OPTION_LIST,
|
177 |
-
placeholder="
|
178 |
)
|
179 |
|
180 |
# ORGA SELECTION
|
@@ -185,7 +187,7 @@ def show_page():
|
|
185 |
orga_option = st.multiselect(
|
186 |
'Development Bank / Organization',
|
187 |
orga_list,
|
188 |
-
placeholder="
|
189 |
)
|
190 |
|
191 |
# SEARCH BOX
|
@@ -217,11 +219,17 @@ def show_page():
|
|
217 |
#searched_filtered_df = semantic_search.show_search(model, embeddings, sentences, filtered_df, TOP_X_PROJECTS)
|
218 |
if isinstance(filtered_df, pd.DataFrame):
|
219 |
# FIND MATCHES
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
220 |
if different_orga_checkbox:
|
221 |
-
p1_df, p2_df = calc_matches(filtered_df,
|
222 |
-
|
223 |
else:
|
224 |
-
p1_df, p2_df = calc_matches(filtered_df,
|
225 |
|
226 |
# SHOW THE RESULT
|
227 |
show_table(p1_df, p2_df)
|
|
|
13 |
from modules.result_table import show_table
|
14 |
from functions.filter_projects import filter_projects
|
15 |
from functions.calc_matches import calc_matches
|
16 |
+
from functions.same_country_filter import same_country_filter
|
17 |
import psutil
|
18 |
import os
|
19 |
import gc
|
|
|
141 |
crs3_option = st.multiselect(
|
142 |
'CRS 3',
|
143 |
CRS3_MERGED,
|
144 |
+
placeholder="Select CRS3"
|
145 |
)
|
146 |
|
147 |
# CRS 5 SELECTION
|
|
|
156 |
crs5_option = st.multiselect(
|
157 |
'CRS 5',
|
158 |
crs5_list,
|
159 |
+
placeholder="Select CRS 5",
|
160 |
disabled=st.session_state.crs5_option_disabled
|
161 |
)
|
162 |
|
|
|
169 |
)
|
170 |
|
171 |
different_orga_checkbox = st.checkbox("Only matches between different organizations")
|
172 |
+
filterd_country_only_checkbox = st.checkbox("Only matches between filtered countries")
|
173 |
|
174 |
with col2:
|
175 |
# COUNTRY SELECTION
|
176 |
country_option = st.multiselect(
|
177 |
'Country / Countries',
|
178 |
COUNTRY_OPTION_LIST,
|
179 |
+
placeholder="All"
|
180 |
)
|
181 |
|
182 |
# ORGA SELECTION
|
|
|
187 |
orga_option = st.multiselect(
|
188 |
'Development Bank / Organization',
|
189 |
orga_list,
|
190 |
+
placeholder="All"
|
191 |
)
|
192 |
|
193 |
# SEARCH BOX
|
|
|
219 |
#searched_filtered_df = semantic_search.show_search(model, embeddings, sentences, filtered_df, TOP_X_PROJECTS)
|
220 |
if isinstance(filtered_df, pd.DataFrame):
|
221 |
# FIND MATCHES
|
222 |
+
## If only same country checkbox i sactivated
|
223 |
+
if filterd_country_only_checkbox:
|
224 |
+
compare_df = same_country_filter(projects_df, country_code_list)
|
225 |
+
else:
|
226 |
+
compare_df = projects_df
|
227 |
+
|
228 |
+
## if show only different orgas checkbox is activated
|
229 |
if different_orga_checkbox:
|
230 |
+
p1_df, p2_df = calc_matches(filtered_df, compare_df, nonsameorgas_sim_matrix, TOP_X_PROJECTS)
|
|
|
231 |
else:
|
232 |
+
p1_df, p2_df = calc_matches(filtered_df, compare_df, sim_matrix, TOP_X_PROJECTS)
|
233 |
|
234 |
# SHOW THE RESULT
|
235 |
show_table(p1_df, p2_df)
|