Jan Mühlnikel
commited on
Commit
·
6a85a81
1
Parent(s):
923adf2
added same country check feature
Browse files
__pycache__/similarity_page.cpython-310.pyc
CHANGED
|
Binary files a/__pycache__/similarity_page.cpython-310.pyc and b/__pycache__/similarity_page.cpython-310.pyc differ
|
|
|
functions/__pycache__/calc_matches.cpython-310.pyc
CHANGED
|
Binary files a/functions/__pycache__/calc_matches.cpython-310.pyc and b/functions/__pycache__/calc_matches.cpython-310.pyc differ
|
|
|
functions/calc_matches.py
CHANGED
|
@@ -9,12 +9,18 @@ def calc_matches(filtered_df, project_df, similarity_matrix, top_x):
|
|
| 9 |
|
| 10 |
# filter out all row considering the filter
|
| 11 |
filtered_df_indecies_list = filtered_df.index
|
|
|
|
| 12 |
|
| 13 |
np.fill_diagonal(similarity_matrix, 0)
|
| 14 |
-
match_matrix = similarity_matrix[filtered_df_indecies_list]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
|
| 16 |
# get row (project1) and column (project2) with highest similarity in filtered df
|
| 17 |
-
top_indices = np.unravel_index(
|
| 18 |
|
| 19 |
# get the corresponding similarity values
|
| 20 |
top_values = match_matrix[top_indices]
|
|
|
|
| 9 |
|
| 10 |
# filter out all row considering the filter
|
| 11 |
filtered_df_indecies_list = filtered_df.index
|
| 12 |
+
project_df_indecies_list = project_df.index
|
| 13 |
|
| 14 |
np.fill_diagonal(similarity_matrix, 0)
|
| 15 |
+
match_matrix = similarity_matrix[filtered_df_indecies_list, :][:, project_df_indecies_list]
|
| 16 |
+
|
| 17 |
+
best_matches_list = np.argsort(match_matrix, axis=None)
|
| 18 |
+
|
| 19 |
+
if len(best_matches_list) < top_x:
|
| 20 |
+
top_x = len(best_matches_list)
|
| 21 |
|
| 22 |
# get row (project1) and column (project2) with highest similarity in filtered df
|
| 23 |
+
top_indices = np.unravel_index(best_matches_list[-top_x:], match_matrix.shape)
|
| 24 |
|
| 25 |
# get the corresponding similarity values
|
| 26 |
top_values = match_matrix[top_indices]
|
functions/same_country_filter.py
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
from functions.semantic_search import search
|
| 3 |
+
|
| 4 |
+
def same_country_filter(df, country_code_list):
|
| 5 |
+
# FILTER COUNTRY
|
| 6 |
+
if country_code_list != []:
|
| 7 |
+
country_filtered_df = pd.DataFrame()
|
| 8 |
+
for c in country_code_list:
|
| 9 |
+
c_df = df[df["country"].str.contains(c, na=False)]
|
| 10 |
+
country_filtered_df = pd.concat([country_filtered_df, c_df], ignore_index=False)
|
| 11 |
+
|
| 12 |
+
df = country_filtered_df
|
| 13 |
+
|
| 14 |
+
return country_filtered_df
|
| 15 |
+
else:
|
| 16 |
+
return df
|
modules/__pycache__/result_table.cpython-310.pyc
CHANGED
|
Binary files a/modules/__pycache__/result_table.cpython-310.pyc and b/modules/__pycache__/result_table.cpython-310.pyc differ
|
|
|
modules/result_table.py
CHANGED
|
@@ -17,11 +17,19 @@ def show_table(p1_df, p2_df):
|
|
| 17 |
|
| 18 |
# INTEGRATE IN PREPROCESSING !!!
|
| 19 |
# transform strings to list
|
| 20 |
-
|
| 21 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
|
| 23 |
-
|
| 24 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
|
| 26 |
row_from_p1["sdg_list"] = [row_from_p1['sgd_pred_code'].item()]
|
| 27 |
row_from_p2["sdg_list"] = [row_from_p2['sgd_pred_code'].item()]
|
|
|
|
| 17 |
|
| 18 |
# INTEGRATE IN PREPROCESSING !!!
|
| 19 |
# transform strings to list
|
| 20 |
+
try:
|
| 21 |
+
row_from_p1["crs_3_code_list"] = [row_from_p1['crs_3_code'].item().split(";")[:-1]]
|
| 22 |
+
row_from_p2["crs_3_code_list"] = [row_from_p2['crs_3_code'].item().split(";")[:-1]]
|
| 23 |
+
except:
|
| 24 |
+
row_from_p1["crs_3_code_list"] = []
|
| 25 |
+
row_from_p2["crs_3_code_list"] = []
|
| 26 |
|
| 27 |
+
try:
|
| 28 |
+
row_from_p1["crs_5_code_list"] = [row_from_p1['crs_3_code'].item().split(";")[:-1]]
|
| 29 |
+
row_from_p2["crs_5_code_list"] = [row_from_p2['crs_3_code'].item().split(";")[:-1]]
|
| 30 |
+
except:
|
| 31 |
+
row_from_p1["crs_5_code_list"] = []
|
| 32 |
+
row_from_p2["crs_5_code_list"] = []
|
| 33 |
|
| 34 |
row_from_p1["sdg_list"] = [row_from_p1['sgd_pred_code'].item()]
|
| 35 |
row_from_p2["sdg_list"] = [row_from_p2['sgd_pred_code'].item()]
|
similarity_page.py
CHANGED
|
@@ -13,6 +13,7 @@ from sentence_transformers import SentenceTransformer
|
|
| 13 |
from modules.result_table import show_table
|
| 14 |
from functions.filter_projects import filter_projects
|
| 15 |
from functions.calc_matches import calc_matches
|
|
|
|
| 16 |
import psutil
|
| 17 |
import os
|
| 18 |
import gc
|
|
@@ -140,7 +141,7 @@ def show_page():
|
|
| 140 |
crs3_option = st.multiselect(
|
| 141 |
'CRS 3',
|
| 142 |
CRS3_MERGED,
|
| 143 |
-
placeholder="Select"
|
| 144 |
)
|
| 145 |
|
| 146 |
# CRS 5 SELECTION
|
|
@@ -155,7 +156,7 @@ def show_page():
|
|
| 155 |
crs5_option = st.multiselect(
|
| 156 |
'CRS 5',
|
| 157 |
crs5_list,
|
| 158 |
-
placeholder="Select",
|
| 159 |
disabled=st.session_state.crs5_option_disabled
|
| 160 |
)
|
| 161 |
|
|
@@ -168,13 +169,14 @@ def show_page():
|
|
| 168 |
)
|
| 169 |
|
| 170 |
different_orga_checkbox = st.checkbox("Only matches between different organizations")
|
|
|
|
| 171 |
|
| 172 |
with col2:
|
| 173 |
# COUNTRY SELECTION
|
| 174 |
country_option = st.multiselect(
|
| 175 |
'Country / Countries',
|
| 176 |
COUNTRY_OPTION_LIST,
|
| 177 |
-
placeholder="
|
| 178 |
)
|
| 179 |
|
| 180 |
# ORGA SELECTION
|
|
@@ -185,7 +187,7 @@ def show_page():
|
|
| 185 |
orga_option = st.multiselect(
|
| 186 |
'Development Bank / Organization',
|
| 187 |
orga_list,
|
| 188 |
-
placeholder="
|
| 189 |
)
|
| 190 |
|
| 191 |
# SEARCH BOX
|
|
@@ -217,11 +219,17 @@ def show_page():
|
|
| 217 |
#searched_filtered_df = semantic_search.show_search(model, embeddings, sentences, filtered_df, TOP_X_PROJECTS)
|
| 218 |
if isinstance(filtered_df, pd.DataFrame):
|
| 219 |
# FIND MATCHES
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 220 |
if different_orga_checkbox:
|
| 221 |
-
p1_df, p2_df = calc_matches(filtered_df,
|
| 222 |
-
|
| 223 |
else:
|
| 224 |
-
p1_df, p2_df = calc_matches(filtered_df,
|
| 225 |
|
| 226 |
# SHOW THE RESULT
|
| 227 |
show_table(p1_df, p2_df)
|
|
|
|
| 13 |
from modules.result_table import show_table
|
| 14 |
from functions.filter_projects import filter_projects
|
| 15 |
from functions.calc_matches import calc_matches
|
| 16 |
+
from functions.same_country_filter import same_country_filter
|
| 17 |
import psutil
|
| 18 |
import os
|
| 19 |
import gc
|
|
|
|
| 141 |
crs3_option = st.multiselect(
|
| 142 |
'CRS 3',
|
| 143 |
CRS3_MERGED,
|
| 144 |
+
placeholder="Select CRS3"
|
| 145 |
)
|
| 146 |
|
| 147 |
# CRS 5 SELECTION
|
|
|
|
| 156 |
crs5_option = st.multiselect(
|
| 157 |
'CRS 5',
|
| 158 |
crs5_list,
|
| 159 |
+
placeholder="Select CRS 5",
|
| 160 |
disabled=st.session_state.crs5_option_disabled
|
| 161 |
)
|
| 162 |
|
|
|
|
| 169 |
)
|
| 170 |
|
| 171 |
different_orga_checkbox = st.checkbox("Only matches between different organizations")
|
| 172 |
+
filterd_country_only_checkbox = st.checkbox("Only matches between filtered countries")
|
| 173 |
|
| 174 |
with col2:
|
| 175 |
# COUNTRY SELECTION
|
| 176 |
country_option = st.multiselect(
|
| 177 |
'Country / Countries',
|
| 178 |
COUNTRY_OPTION_LIST,
|
| 179 |
+
placeholder="All"
|
| 180 |
)
|
| 181 |
|
| 182 |
# ORGA SELECTION
|
|
|
|
| 187 |
orga_option = st.multiselect(
|
| 188 |
'Development Bank / Organization',
|
| 189 |
orga_list,
|
| 190 |
+
placeholder="All"
|
| 191 |
)
|
| 192 |
|
| 193 |
# SEARCH BOX
|
|
|
|
| 219 |
#searched_filtered_df = semantic_search.show_search(model, embeddings, sentences, filtered_df, TOP_X_PROJECTS)
|
| 220 |
if isinstance(filtered_df, pd.DataFrame):
|
| 221 |
# FIND MATCHES
|
| 222 |
+
## If only same country checkbox i sactivated
|
| 223 |
+
if filterd_country_only_checkbox:
|
| 224 |
+
compare_df = same_country_filter(projects_df, country_code_list)
|
| 225 |
+
else:
|
| 226 |
+
compare_df = projects_df
|
| 227 |
+
|
| 228 |
+
## if show only different orgas checkbox is activated
|
| 229 |
if different_orga_checkbox:
|
| 230 |
+
p1_df, p2_df = calc_matches(filtered_df, compare_df, nonsameorgas_sim_matrix, TOP_X_PROJECTS)
|
|
|
|
| 231 |
else:
|
| 232 |
+
p1_df, p2_df = calc_matches(filtered_df, compare_df, sim_matrix, TOP_X_PROJECTS)
|
| 233 |
|
| 234 |
# SHOW THE RESULT
|
| 235 |
show_table(p1_df, p2_df)
|