Jan Mühlnikel commited on
Commit
55a6bd8
·
1 Parent(s): f123b98

added crs5 and sdg selection

Browse files
__pycache__/similarity_page.cpython-310.pyc CHANGED
Binary files a/__pycache__/similarity_page.cpython-310.pyc and b/__pycache__/similarity_page.cpython-310.pyc differ
 
functions/__pycache__/calc_matches.cpython-310.pyc CHANGED
Binary files a/functions/__pycache__/calc_matches.cpython-310.pyc and b/functions/__pycache__/calc_matches.cpython-310.pyc differ
 
functions/__pycache__/filter_projects.cpython-310.pyc CHANGED
Binary files a/functions/__pycache__/filter_projects.cpython-310.pyc and b/functions/__pycache__/filter_projects.cpython-310.pyc differ
 
functions/calc_matches.py CHANGED
@@ -14,7 +14,7 @@ def calc_matches(filtered_df, project_df, similarity_matrix):
14
  match_matrix = similarity_matrix[filtered_df_indecies_list]
15
 
16
  # get row (project1) and column (project2) with highest similarity in filtered df
17
- top_indices = np.unravel_index(np.argsort(match_matrix, axis=None)[-60:], match_matrix.shape)
18
 
19
  # get the corresponding similarity values
20
  top_values = match_matrix[top_indices]
 
14
  match_matrix = similarity_matrix[filtered_df_indecies_list]
15
 
16
  # get row (project1) and column (project2) with highest similarity in filtered df
17
+ top_indices = np.unravel_index(np.argsort(match_matrix, axis=None)[-30:], match_matrix.shape)
18
 
19
  # get the corresponding similarity values
20
  top_values = match_matrix[top_indices]
functions/filter_projects.py CHANGED
@@ -1,13 +1,22 @@
1
  import pandas as pd
2
 
3
-
4
  def contains_code(crs_codes, code_list):
5
  codes = str(crs_codes).split(';')
6
  return any(code in code_list for code in codes)
7
 
8
- def filter_projects(df, crs3_list):
9
- filtered_crs_df = df[df['crs_3_code'].apply(lambda x: contains_code(x, crs3_list))]
 
 
 
 
 
 
 
 
 
10
 
11
- return filtered_crs_df
 
12
 
13
 
 
1
  import pandas as pd
2
 
 
3
  def contains_code(crs_codes, code_list):
4
  codes = str(crs_codes).split(';')
5
  return any(code in code_list for code in codes)
6
 
7
+ def filter_projects(df, crs3_list, crs5_list, sdg_str):
8
+ if crs3_list != [] or crs5_list != [] or sdg_str != "":
9
+ if crs3_list and not crs5_list:
10
+ df = df[df['crs_3_code'].apply(lambda x: contains_code(x, crs3_list))]
11
+ elif crs3_list and crs5_list:
12
+ df = df[df['crs_5_code'].apply(lambda x: contains_code(x, crs5_list))]
13
+ elif not crs3_list and crs5_list:
14
+ df = df[df['crs_5_code'].apply(lambda x: contains_code(x, crs5_list))]
15
+
16
+ if sdg_str != "":
17
+ df = df[df["sgd_pred_code"] == int(sdg_str)]
18
 
19
+ return df
20
+
21
 
22
 
modules/__pycache__/result_table.cpython-310.pyc CHANGED
Binary files a/modules/__pycache__/result_table.cpython-310.pyc and b/modules/__pycache__/result_table.cpython-310.pyc differ
 
modules/result_table.py CHANGED
@@ -7,10 +7,15 @@ def show_table(p1_df, p2_df):
7
  st.write("------------------")
8
 
9
  st.dataframe(
10
- p1_df[["title_main", "orga_abbreviation", "client", "description_main", "country", "sgd_pred_code", "crs_3_code", "crs_5_code", "similarity"]],
11
  use_container_width = True,
12
  height = 35 + 35 * len(p1_df),
13
  column_config={
 
 
 
 
 
14
  "orga_abbreviation": st.column_config.TextColumn(
15
  "Organization",
16
  help="If description not in English, description in other language provided",
@@ -59,10 +64,15 @@ def show_table(p1_df, p2_df):
59
  st.write("------------------")
60
 
61
  st.dataframe(
62
- p2_df[["title_main", "orga_abbreviation", "client", "description_main", "country", "sgd_pred_code", "crs_3_code", "crs_5_code", "similarity"]],
63
  use_container_width = True,
64
  height = 35 + 35 * len(p2_df),
65
  column_config={
 
 
 
 
 
66
  "orga_abbreviation": st.column_config.TextColumn(
67
  "Organization",
68
  help="If description not in English, description in other language provided",
 
7
  st.write("------------------")
8
 
9
  st.dataframe(
10
+ p1_df[["iati_id", "title_main", "orga_abbreviation", "client", "description_main", "country", "sgd_pred_code", "crs_3_code", "crs_5_code", "similarity"]],
11
  use_container_width = True,
12
  height = 35 + 35 * len(p1_df),
13
  column_config={
14
+ "iati_id": st.column_config.TextColumn(
15
+ "IATI ID",
16
+ help="IATI Project ID",
17
+ disabled=True
18
+ ),
19
  "orga_abbreviation": st.column_config.TextColumn(
20
  "Organization",
21
  help="If description not in English, description in other language provided",
 
64
  st.write("------------------")
65
 
66
  st.dataframe(
67
+ p2_df[["iati_id", "title_main", "orga_abbreviation", "client", "description_main", "country", "sgd_pred_code", "crs_3_code", "crs_5_code", "similarity"]],
68
  use_container_width = True,
69
  height = 35 + 35 * len(p2_df),
70
  column_config={
71
+ "iati_id": st.column_config.TextColumn(
72
+ "IATI ID",
73
+ help="IATI Project ID",
74
+ disabled=True
75
+ ),
76
  "orga_abbreviation": st.column_config.TextColumn(
77
  "Organization",
78
  help="If description not in English, description in other language provided",
similarity_page.py CHANGED
@@ -17,6 +17,7 @@ from functions.filter_projects import filter_projects
17
  from functions.calc_matches import calc_matches
18
  import psutil
19
  import os
 
20
 
21
  def get_process_memory():
22
  process = psutil.Process(os.getpid())
@@ -116,6 +117,7 @@ def show_page():
116
  st.write(f"Current RAM usage of this app: {get_process_memory():.2f} MB")
117
  st.write("Similarities")
118
 
 
119
  col1, col2 = st.columns([1, 1])
120
  with col1:
121
  # CRS 3 SELECTION
@@ -124,6 +126,31 @@ def show_page():
124
  CRS3_MERGED,
125
  placeholder="Select"
126
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127
 
128
  with col2:
129
  st.write("x")
@@ -131,12 +158,22 @@ def show_page():
131
 
132
  # CRS CODE LIST
133
  crs3_list = [i[-3:] for i in crs3_option]
 
 
 
 
 
 
 
134
 
135
  # FILTER DF WITH SELECTED FILTER OPTIONS
136
- filtered_df = filter_projects(projects_df, crs3_list)
137
 
138
  # FIND MATCHES
139
  p1_df, p2_df = calc_matches(filtered_df, projects_df, sim_matrix)
140
 
141
  # SHOW THE RESULT
142
  show_table(p1_df, p2_df)
 
 
 
 
17
  from functions.calc_matches import calc_matches
18
  import psutil
19
  import os
20
+ import gc
21
 
22
  def get_process_memory():
23
  process = psutil.Process(os.getpid())
 
117
  st.write(f"Current RAM usage of this app: {get_process_memory():.2f} MB")
118
  st.write("Similarities")
119
 
120
+ st.session_state.crs5_option_disabled = True
121
  col1, col2 = st.columns([1, 1])
122
  with col1:
123
  # CRS 3 SELECTION
 
126
  CRS3_MERGED,
127
  placeholder="Select"
128
  )
129
+
130
+ # CRS 5 SELECTION
131
+ ## Only enable crs5 select field when crs3 code is selected
132
+ if crs3_option != []:
133
+ st.session_state.crs5_option_disabled = False
134
+
135
+ ## define list of crs5 codes dependend on crs3 codes
136
+ crs5_list = [txt[0].replace('"', "") for crs3_item in crs3_option for code, txt in CRS5_MERGED.items() if str(code)[:3] == str(crs3_item)[-3:]]
137
+
138
+ ## crs5 select field
139
+ crs5_option = st.multiselect(
140
+ 'CRS 5',
141
+ crs5_list,
142
+ placeholder="Select",
143
+ disabled=st.session_state.crs5_option_disabled
144
+ )
145
+
146
+ # SDG SELECTION
147
+ sdg_option = st.selectbox(
148
+ label = 'SDG',
149
+ index = None,
150
+ placeholder = "Select SDG",
151
+ options = SDG_NAMES[:-1],
152
+ )
153
+
154
 
155
  with col2:
156
  st.write("x")
 
158
 
159
  # CRS CODE LIST
160
  crs3_list = [i[-3:] for i in crs3_option]
161
+ crs5_list = [i[-5:] for i in crs5_option]
162
+
163
+ # SDG CODE LIST
164
+ if sdg_option != None:
165
+ sdg_str = sdg_option[0]
166
+ else:
167
+ sdg_str = ""
168
 
169
  # FILTER DF WITH SELECTED FILTER OPTIONS
170
+ filtered_df = filter_projects(projects_df, crs3_list, crs5_list, sdg_str)
171
 
172
  # FIND MATCHES
173
  p1_df, p2_df = calc_matches(filtered_df, projects_df, sim_matrix)
174
 
175
  # SHOW THE RESULT
176
  show_table(p1_df, p2_df)
177
+
178
+ del p1_df, p2_df, crs3_list, crs5_list, sdg_str, filtered_df
179
+ gc.collect()