Spaces:

GIZ
/

Development-Project-Synergy-Finder

Sleeping

App Files Files Community

Jan Mühlnikel commited on Mar 22, 2024

Commit

6a85a81

1 Parent(s): 923adf2

added same country check feature

Browse files

Files changed (7) hide show

__pycache__/similarity_page.cpython-310.pyc +0 -0
functions/__pycache__/calc_matches.cpython-310.pyc +0 -0
functions/calc_matches.py +8 -2
functions/same_country_filter.py +16 -0
modules/__pycache__/result_table.cpython-310.pyc +0 -0
modules/result_table.py +12 -4
similarity_page.py +15 -7

__pycache__/similarity_page.cpython-310.pyc CHANGED Viewed

Binary files a/__pycache__/similarity_page.cpython-310.pyc and b/__pycache__/similarity_page.cpython-310.pyc differ

functions/__pycache__/calc_matches.cpython-310.pyc CHANGED Viewed

Binary files a/functions/__pycache__/calc_matches.cpython-310.pyc and b/functions/__pycache__/calc_matches.cpython-310.pyc differ

functions/calc_matches.py CHANGED Viewed

@@ -9,12 +9,18 @@ def calc_matches(filtered_df, project_df, similarity_matrix, top_x):
     # filter out all row considering the filter
     filtered_df_indecies_list = filtered_df.index
     np.fill_diagonal(similarity_matrix, 0)
-    match_matrix = similarity_matrix[filtered_df_indecies_list]
     # get row (project1) and column (project2) with highest similarity in filtered df
-    top_indices = np.unravel_index(np.argsort(match_matrix, axis=None)[-top_x:], match_matrix.shape)
     # get the corresponding similarity values
     top_values = match_matrix[top_indices]

     # filter out all row considering the filter
     filtered_df_indecies_list = filtered_df.index
+    project_df_indecies_list = project_df.index
     np.fill_diagonal(similarity_matrix, 0)
+    match_matrix = similarity_matrix[filtered_df_indecies_list, :][:, project_df_indecies_list]
+    best_matches_list = np.argsort(match_matrix, axis=None)
+    if len(best_matches_list) < top_x:
+        top_x = len(best_matches_list)
     # get row (project1) and column (project2) with highest similarity in filtered df
+    top_indices = np.unravel_index(best_matches_list[-top_x:], match_matrix.shape)
     # get the corresponding similarity values
     top_values = match_matrix[top_indices]

functions/same_country_filter.py ADDED Viewed

	@@ -0,0 +1,16 @@

+import pandas as pd
+from functions.semantic_search import search
+def same_country_filter(df, country_code_list):
+    # FILTER COUNTRY
+        if country_code_list != []:
+            country_filtered_df = pd.DataFrame()
+            for c in country_code_list:
+                c_df = df[df["country"].str.contains(c, na=False)]
+                country_filtered_df = pd.concat([country_filtered_df, c_df], ignore_index=False)
+            df = country_filtered_df
+            return country_filtered_df
+        else:
+            return df

modules/__pycache__/result_table.cpython-310.pyc CHANGED Viewed

Binary files a/modules/__pycache__/result_table.cpython-310.pyc and b/modules/__pycache__/result_table.cpython-310.pyc differ

modules/result_table.py CHANGED Viewed

@@ -17,11 +17,19 @@ def show_table(p1_df, p2_df):
             # INTEGRATE IN PREPROCESSING !!!
             # transform strings to list
-            row_from_p1["crs_3_code_list"] = [row_from_p1['crs_3_code'].item().split(";")[:-1]]
-            row_from_p2["crs_3_code_list"] = [row_from_p2['crs_3_code'].item().split(";")[:-1]]
-            row_from_p1["crs_5_code_list"] = [row_from_p1['crs_3_code'].item().split(";")[:-1]]
-            row_from_p2["crs_5_code_list"] = [row_from_p2['crs_3_code'].item().split(";")[:-1]]
             row_from_p1["sdg_list"] = [row_from_p1['sgd_pred_code'].item()]
             row_from_p2["sdg_list"] = [row_from_p2['sgd_pred_code'].item()]

             # INTEGRATE IN PREPROCESSING !!!
             # transform strings to list
+            try:
+                row_from_p1["crs_3_code_list"] = [row_from_p1['crs_3_code'].item().split(";")[:-1]]
+                row_from_p2["crs_3_code_list"] = [row_from_p2['crs_3_code'].item().split(";")[:-1]]
+            except:
+                row_from_p1["crs_3_code_list"] = []
+                row_from_p2["crs_3_code_list"] = []
+            try:
+                row_from_p1["crs_5_code_list"] = [row_from_p1['crs_3_code'].item().split(";")[:-1]]
+                row_from_p2["crs_5_code_list"] = [row_from_p2['crs_3_code'].item().split(";")[:-1]]
+            except:
+                row_from_p1["crs_5_code_list"] = []
+                row_from_p2["crs_5_code_list"] = []
             row_from_p1["sdg_list"] = [row_from_p1['sgd_pred_code'].item()]
             row_from_p2["sdg_list"] = [row_from_p2['sgd_pred_code'].item()]

similarity_page.py CHANGED Viewed

@@ -13,6 +13,7 @@ from sentence_transformers import SentenceTransformer
 from modules.result_table import show_table
 from functions.filter_projects import filter_projects
 from functions.calc_matches import calc_matches
 import psutil
 import os
 import gc
@@ -140,7 +141,7 @@ def show_page():
         crs3_option = st.multiselect(
                         'CRS 3',
                         CRS3_MERGED,
-                        placeholder="Select"
                         )
         # CRS 5 SELECTION
@@ -155,7 +156,7 @@ def show_page():
         crs5_option = st.multiselect(
             'CRS 5',
             crs5_list,
-            placeholder="Select",
             disabled=st.session_state.crs5_option_disabled
             )
@@ -168,13 +169,14 @@ def show_page():
                 )
         different_orga_checkbox = st.checkbox("Only matches between different organizations")
     with col2:
         # COUNTRY SELECTION
         country_option = st.multiselect(
                 'Country / Countries',
                 COUNTRY_OPTION_LIST,
-                placeholder="Select"
                 )
         # ORGA SELECTION
@@ -185,7 +187,7 @@ def show_page():
         orga_option = st.multiselect(
                 'Development Bank / Organization',
                 orga_list,
-                placeholder="Select"
                 )
         # SEARCH BOX
@@ -217,11 +219,17 @@ def show_page():
         #searched_filtered_df = semantic_search.show_search(model, embeddings, sentences, filtered_df, TOP_X_PROJECTS)
     if isinstance(filtered_df, pd.DataFrame):
         # FIND MATCHES
         if different_orga_checkbox:
-            p1_df, p2_df = calc_matches(filtered_df, projects_df, nonsameorgas_sim_matrix, TOP_X_PROJECTS)
         else:
-            p1_df, p2_df = calc_matches(filtered_df, projects_df, sim_matrix, TOP_X_PROJECTS)
         # SHOW THE RESULT
         show_table(p1_df, p2_df)

 from modules.result_table import show_table
 from functions.filter_projects import filter_projects
 from functions.calc_matches import calc_matches
+from functions.same_country_filter import same_country_filter
 import psutil
 import os
 import gc
         crs3_option = st.multiselect(
                         'CRS 3',
                         CRS3_MERGED,
+                        placeholder="Select CRS3"
                         )
         # CRS 5 SELECTION
         crs5_option = st.multiselect(
             'CRS 5',
             crs5_list,
+            placeholder="Select CRS 5",
             disabled=st.session_state.crs5_option_disabled
             )
                 )
         different_orga_checkbox = st.checkbox("Only matches between different organizations")
+        filterd_country_only_checkbox = st.checkbox("Only matches between filtered countries")
     with col2:
         # COUNTRY SELECTION
         country_option = st.multiselect(
                 'Country / Countries',
                 COUNTRY_OPTION_LIST,
+                placeholder="All"
                 )
         # ORGA SELECTION
         orga_option = st.multiselect(
                 'Development Bank / Organization',
                 orga_list,
+                placeholder="All"
                 )
         # SEARCH BOX
         #searched_filtered_df = semantic_search.show_search(model, embeddings, sentences, filtered_df, TOP_X_PROJECTS)
     if isinstance(filtered_df, pd.DataFrame):
         # FIND MATCHES
+        ## If only same country checkbox i sactivated
+        if filterd_country_only_checkbox:
+            compare_df = same_country_filter(projects_df, country_code_list)
+        else:
+            compare_df = projects_df
+        ## if show only different orgas checkbox is activated
         if different_orga_checkbox:
+            p1_df, p2_df = calc_matches(filtered_df, compare_df, nonsameorgas_sim_matrix, TOP_X_PROJECTS)
         else:
+            p1_df, p2_df = calc_matches(filtered_df, compare_df, sim_matrix, TOP_X_PROJECTS)
         # SHOW THE RESULT
         show_table(p1_df, p2_df)