Jan Mühlnikel commited on
Commit
6a85a81
·
1 Parent(s): 923adf2

added same country check feature

Browse files
__pycache__/similarity_page.cpython-310.pyc CHANGED
Binary files a/__pycache__/similarity_page.cpython-310.pyc and b/__pycache__/similarity_page.cpython-310.pyc differ
 
functions/__pycache__/calc_matches.cpython-310.pyc CHANGED
Binary files a/functions/__pycache__/calc_matches.cpython-310.pyc and b/functions/__pycache__/calc_matches.cpython-310.pyc differ
 
functions/calc_matches.py CHANGED
@@ -9,12 +9,18 @@ def calc_matches(filtered_df, project_df, similarity_matrix, top_x):
9
 
10
  # filter out all row considering the filter
11
  filtered_df_indecies_list = filtered_df.index
 
12
 
13
  np.fill_diagonal(similarity_matrix, 0)
14
- match_matrix = similarity_matrix[filtered_df_indecies_list]
 
 
 
 
 
15
 
16
  # get row (project1) and column (project2) with highest similarity in filtered df
17
- top_indices = np.unravel_index(np.argsort(match_matrix, axis=None)[-top_x:], match_matrix.shape)
18
 
19
  # get the corresponding similarity values
20
  top_values = match_matrix[top_indices]
 
9
 
10
  # filter out all row considering the filter
11
  filtered_df_indecies_list = filtered_df.index
12
+ project_df_indecies_list = project_df.index
13
 
14
  np.fill_diagonal(similarity_matrix, 0)
15
+ match_matrix = similarity_matrix[filtered_df_indecies_list, :][:, project_df_indecies_list]
16
+
17
+ best_matches_list = np.argsort(match_matrix, axis=None)
18
+
19
+ if len(best_matches_list) < top_x:
20
+ top_x = len(best_matches_list)
21
 
22
  # get row (project1) and column (project2) with highest similarity in filtered df
23
+ top_indices = np.unravel_index(best_matches_list[-top_x:], match_matrix.shape)
24
 
25
  # get the corresponding similarity values
26
  top_values = match_matrix[top_indices]
functions/same_country_filter.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ from functions.semantic_search import search
3
+
4
+ def same_country_filter(df, country_code_list):
5
+ # FILTER COUNTRY
6
+ if country_code_list != []:
7
+ country_filtered_df = pd.DataFrame()
8
+ for c in country_code_list:
9
+ c_df = df[df["country"].str.contains(c, na=False)]
10
+ country_filtered_df = pd.concat([country_filtered_df, c_df], ignore_index=False)
11
+
12
+ df = country_filtered_df
13
+
14
+ return country_filtered_df
15
+ else:
16
+ return df
modules/__pycache__/result_table.cpython-310.pyc CHANGED
Binary files a/modules/__pycache__/result_table.cpython-310.pyc and b/modules/__pycache__/result_table.cpython-310.pyc differ
 
modules/result_table.py CHANGED
@@ -17,11 +17,19 @@ def show_table(p1_df, p2_df):
17
 
18
  # INTEGRATE IN PREPROCESSING !!!
19
  # transform strings to list
20
- row_from_p1["crs_3_code_list"] = [row_from_p1['crs_3_code'].item().split(";")[:-1]]
21
- row_from_p2["crs_3_code_list"] = [row_from_p2['crs_3_code'].item().split(";")[:-1]]
 
 
 
 
22
 
23
- row_from_p1["crs_5_code_list"] = [row_from_p1['crs_3_code'].item().split(";")[:-1]]
24
- row_from_p2["crs_5_code_list"] = [row_from_p2['crs_3_code'].item().split(";")[:-1]]
 
 
 
 
25
 
26
  row_from_p1["sdg_list"] = [row_from_p1['sgd_pred_code'].item()]
27
  row_from_p2["sdg_list"] = [row_from_p2['sgd_pred_code'].item()]
 
17
 
18
  # INTEGRATE IN PREPROCESSING !!!
19
  # transform strings to list
20
+ try:
21
+ row_from_p1["crs_3_code_list"] = [row_from_p1['crs_3_code'].item().split(";")[:-1]]
22
+ row_from_p2["crs_3_code_list"] = [row_from_p2['crs_3_code'].item().split(";")[:-1]]
23
+ except:
24
+ row_from_p1["crs_3_code_list"] = []
25
+ row_from_p2["crs_3_code_list"] = []
26
 
27
+ try:
28
+ row_from_p1["crs_5_code_list"] = [row_from_p1['crs_3_code'].item().split(";")[:-1]]
29
+ row_from_p2["crs_5_code_list"] = [row_from_p2['crs_3_code'].item().split(";")[:-1]]
30
+ except:
31
+ row_from_p1["crs_5_code_list"] = []
32
+ row_from_p2["crs_5_code_list"] = []
33
 
34
  row_from_p1["sdg_list"] = [row_from_p1['sgd_pred_code'].item()]
35
  row_from_p2["sdg_list"] = [row_from_p2['sgd_pred_code'].item()]
similarity_page.py CHANGED
@@ -13,6 +13,7 @@ from sentence_transformers import SentenceTransformer
13
  from modules.result_table import show_table
14
  from functions.filter_projects import filter_projects
15
  from functions.calc_matches import calc_matches
 
16
  import psutil
17
  import os
18
  import gc
@@ -140,7 +141,7 @@ def show_page():
140
  crs3_option = st.multiselect(
141
  'CRS 3',
142
  CRS3_MERGED,
143
- placeholder="Select"
144
  )
145
 
146
  # CRS 5 SELECTION
@@ -155,7 +156,7 @@ def show_page():
155
  crs5_option = st.multiselect(
156
  'CRS 5',
157
  crs5_list,
158
- placeholder="Select",
159
  disabled=st.session_state.crs5_option_disabled
160
  )
161
 
@@ -168,13 +169,14 @@ def show_page():
168
  )
169
 
170
  different_orga_checkbox = st.checkbox("Only matches between different organizations")
 
171
 
172
  with col2:
173
  # COUNTRY SELECTION
174
  country_option = st.multiselect(
175
  'Country / Countries',
176
  COUNTRY_OPTION_LIST,
177
- placeholder="Select"
178
  )
179
 
180
  # ORGA SELECTION
@@ -185,7 +187,7 @@ def show_page():
185
  orga_option = st.multiselect(
186
  'Development Bank / Organization',
187
  orga_list,
188
- placeholder="Select"
189
  )
190
 
191
  # SEARCH BOX
@@ -217,11 +219,17 @@ def show_page():
217
  #searched_filtered_df = semantic_search.show_search(model, embeddings, sentences, filtered_df, TOP_X_PROJECTS)
218
  if isinstance(filtered_df, pd.DataFrame):
219
  # FIND MATCHES
 
 
 
 
 
 
 
220
  if different_orga_checkbox:
221
- p1_df, p2_df = calc_matches(filtered_df, projects_df, nonsameorgas_sim_matrix, TOP_X_PROJECTS)
222
-
223
  else:
224
- p1_df, p2_df = calc_matches(filtered_df, projects_df, sim_matrix, TOP_X_PROJECTS)
225
 
226
  # SHOW THE RESULT
227
  show_table(p1_df, p2_df)
 
13
  from modules.result_table import show_table
14
  from functions.filter_projects import filter_projects
15
  from functions.calc_matches import calc_matches
16
+ from functions.same_country_filter import same_country_filter
17
  import psutil
18
  import os
19
  import gc
 
141
  crs3_option = st.multiselect(
142
  'CRS 3',
143
  CRS3_MERGED,
144
+ placeholder="Select CRS3"
145
  )
146
 
147
  # CRS 5 SELECTION
 
156
  crs5_option = st.multiselect(
157
  'CRS 5',
158
  crs5_list,
159
+ placeholder="Select CRS 5",
160
  disabled=st.session_state.crs5_option_disabled
161
  )
162
 
 
169
  )
170
 
171
  different_orga_checkbox = st.checkbox("Only matches between different organizations")
172
+ filterd_country_only_checkbox = st.checkbox("Only matches between filtered countries")
173
 
174
  with col2:
175
  # COUNTRY SELECTION
176
  country_option = st.multiselect(
177
  'Country / Countries',
178
  COUNTRY_OPTION_LIST,
179
+ placeholder="All"
180
  )
181
 
182
  # ORGA SELECTION
 
187
  orga_option = st.multiselect(
188
  'Development Bank / Organization',
189
  orga_list,
190
+ placeholder="All"
191
  )
192
 
193
  # SEARCH BOX
 
219
  #searched_filtered_df = semantic_search.show_search(model, embeddings, sentences, filtered_df, TOP_X_PROJECTS)
220
  if isinstance(filtered_df, pd.DataFrame):
221
  # FIND MATCHES
222
+ ## If only same country checkbox i sactivated
223
+ if filterd_country_only_checkbox:
224
+ compare_df = same_country_filter(projects_df, country_code_list)
225
+ else:
226
+ compare_df = projects_df
227
+
228
+ ## if show only different orgas checkbox is activated
229
  if different_orga_checkbox:
230
+ p1_df, p2_df = calc_matches(filtered_df, compare_df, nonsameorgas_sim_matrix, TOP_X_PROJECTS)
 
231
  else:
232
+ p1_df, p2_df = calc_matches(filtered_df, compare_df, sim_matrix, TOP_X_PROJECTS)
233
 
234
  # SHOW THE RESULT
235
  show_table(p1_df, p2_df)