Spaces:

GIZ
/

Development-Project-Synergy-Finder

Sleeping

App Files Files Community

Jan Mühlnikel commited on Mar 18, 2024

Commit

4226dcf

1 Parent(s): 4f56571

final initial update

Browse files

Files changed (18) hide show

__pycache__/app.cpython-310.pyc +0 -0
__pycache__/crs.cpython-310.pyc +0 -0
__pycache__/home.cpython-310.pyc +0 -0
__pycache__/sector.cpython-310.pyc +0 -0
__pycache__/similarity.cpython-310.pyc +0 -0
home.py +4 -0
requirements.txt +5 -0
sdg.py +0 -0
sector.py +222 -0
similarity.py +78 -0
src/codelists/country_codes_ISO3166-1alpha-2.csv +3 -0
src/codelists/crs3_codes.csv +3 -0
src/codelists/crs5_codes.csv +3 -0
src/codelists/sdg_goals.csv +3 -0
src/codelists/sdg_targets.csv +3 -0
src/projects/project_region.csv +0 -0
src/projects/project_sector.csv +0 -0
src/projects/project_status.csv +0 -0

__pycache__/app.cpython-310.pyc ADDED Viewed

Binary file (664 Bytes). View file

__pycache__/crs.cpython-310.pyc ADDED Viewed

Binary file (3.71 kB). View file

__pycache__/home.cpython-310.pyc ADDED Viewed

Binary file (439 Bytes). View file

__pycache__/sector.cpython-310.pyc ADDED Viewed

Binary file (5.44 kB). View file

__pycache__/similarity.cpython-310.pyc ADDED Viewed

Binary file (2.53 kB). View file

home.py ADDED Viewed

	@@ -0,0 +1,4 @@

+import streamlit as st
+def show_page():
+    st.write("home")

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+numpy==1.26.4
+pandas==2.1.4
+streamlit==1.32.2
+streamlit-option-menu==0.3.12
+scipy==1.12.0

sdg.py ADDED Viewed

File without changes

sector.py ADDED Viewed

	@@ -0,0 +1,222 @@

+"""
+Page to analyse the link between crs codes, countries and organizations
+"""
+################
+# DEPENDENCIES #
+################
+import streamlit as st
+import pandas as pd
+import utils.crs_table as crs_table
+import utils.sdg_table as sdg_table
+import utils.filter_modules as filter_modules
+from importlib.machinery import SourceFileLoader
+crs_overlap = SourceFileLoader("crs_overlap", "data/models/crs_overlap.py").load_module()
+sdg_overlap = SourceFileLoader("sdg_overlap", "data/models/sdg_overlap.py").load_module()
+CONSTANTS = SourceFileLoader("CONSTANTS", "config/CONSTANTS.py").load_module()
+# CHACHE DATA
+# FETCH NEEDED DATA AND STORE IN CHACHE MEMORY TO SAVE LOADING TIME
+@st.cache_data
+def getCRS3():
+    # Read in CRS3 CODELISTS
+    crs3_df = pd.read_csv('app/src/codelists/crs3_codes.csv')
+    CRS3_CODES = crs3_df['code'].tolist()
+    CRS3_NAME = crs3_df['name'].tolist()
+    CRS3_MERGED = {f"{name} - {code}": code for name, code in zip(CRS3_NAME, CRS3_CODES)}
+    return CRS3_MERGED
+@st.cache_data
+def getCRS5():
+    # Read in CRS3 CODELISTS
+    crs5_df = pd.read_csv('app/src/codelists/crs5_codes.csv')
+    CRS5_CODES = crs5_df['code'].tolist()
+    CRS5_NAME = crs5_df['name'].tolist()
+    CRS5_MERGED = {code: [f"{name} - {code}"] for name, code in zip(CRS5_NAME, CRS5_CODES)}
+    return CRS5_MERGED
+@st.cache_data
+def getSDG():
+    # Read in SDG CODELISTS
+    sdg_df = pd.read_csv('app/src/codelists/sdg_goals.csv')
+    SDG_NAMES = sdg_df['name'].tolist()
+    return SDG_NAMES
+@st.cache_data
+def getCountry():
+    # Read in countries from codelist
+    country_df = pd.read_csv('app/src/codelists/country_codes_ISO3166-1alpha-2.csv')
+    COUNTRY_CODES = country_df['Alpha-2 code'].tolist()
+    COUNTRY_NAMES = country_df['Country'].tolist()
+    return country_df, COUNTRY_CODES, COUNTRY_NAMES
+CRS3_MERGED = getCRS3()
+CRS5_MERGED = getCRS5()
+SDG_NAMES = getSDG()
+country_df, COUNTRY_CODES, COUNTRY_NAMES = getCountry()
+# SPECIAL SELECTIONS
+## COUNTRY
+SPECIAL_COUNTRY_SLECTIONS = ["All"]
+SHOW_ALL_COUNTRIES = False # If all countries should be showed in matching
+## ORGANIZATION
+SPECIAL_ORGA_SLECTIONS = ["All"]
+SHOW_ALL_ORGAS = False
+########
+# PAGE #
+########
+def show_page():
+    def show_crs():
+        # SESSION STATES
+        st.session_state.crs5_option_disabled = True
+        # SELECTION FIELDS
+        col1, col2 = st.columns([1, 1])
+        with col1:
+            #####################
+            # CRS 3 CODE SELECT #
+            #####################
+            crs3_option = st.multiselect(
+                'CRS 3',
+                CRS3_MERGED,
+                placeholder="Select"
+                )
+            #####################
+            # CRS 5 CODE SELECT #
+            #####################
+            # Only enable crs5 select field when crs3 code is selected
+            if crs3_option != []:
+                st.session_state.crs5_option_disabled = False
+            # define list of crs5 codes dependend on crs3 codes
+            crs5_list = [txt[0].replace('"', "") for crs3_item in crs3_option for code, txt in CRS5_MERGED.items() if str(code)[:3] == str(crs3_item)[-3:]]
+            # crs5 select field
+            crs5_option = st.multiselect(
+                'CRS 5',
+                crs5_list,
+                placeholder="Select",
+                disabled=st.session_state.crs5_option_disabled
+                )
+        with col2:
+            # COUNTRY SELECTION
+            country_option = filter_modules.country_option(SPECIAL_COUNTRY_SLECTIONS, COUNTRY_NAMES)
+            # ORGA SELECTION
+            orga_option = filter_modules.orga_option(SPECIAL_ORGA_SLECTIONS, CONSTANTS.ORGA_SEARCH)
+        ################
+        # SHOW RESULTS #
+        ################
+        # Extract Orgas from multiselect
+        if "All" in orga_option:
+            SHOW_ALL_ORGAS = True
+            selected_orgas = []
+        else:
+            SHOW_ALL_ORGAS = False
+            selected_orgas = [str(o).replace(")", "").lower().split("(")[1] for o in orga_option]
+        if country_option != []:
+            # all selection
+            if "All" in country_option:
+                SHOW_ALL_COUNTRIES = True
+                country_option.remove("All")
+            else:
+                SHOW_ALL_COUNTRIES = False
+            if crs3_option != []:
+                # CRS 3 codes from option
+                crs3_list = [i[-3:] for i in crs3_option]
+                # get country codes from multiselect
+                country_names = [str(c) for c in country_option]
+                country_codes = [
+                    country_df[country_df['Country'] == c]['Alpha-2 code'].values[0].replace('"', "").strip(" ")
+                    for c in country_names
+                    ]
+                result_df = crs_overlap.calc_crs3(crs3_list, country_codes, selected_orgas, SHOW_ALL_COUNTRIES, SHOW_ALL_ORGAS)
+                if crs5_option != []:
+                    # CRS 5 codes from option
+                    crs5_list = [i[-5:] for i in crs5_option]
+                    result_df = crs_overlap.calc_crs5(crs5_list, country_codes, selected_orgas, SHOW_ALL_COUNTRIES, SHOW_ALL_ORGAS)
+                # TABLE FOR CRS OVERLAP
+                crs_table.show_table(result_df)
+    def show_sdg():
+        # SELECTION
+        col1, col2 = st.columns([1, 1])
+        with col1:
+            # CRS3 CODE SELECT
+            sdg_option = st.selectbox(
+                label = 'SDG',
+                index = None,
+                placeholder = "Select SDG",
+                options = SDG_NAMES,
+                )
+        with col2:
+            # COUNTRY SELECTION
+            country_option = filter_modules.country_option(SPECIAL_COUNTRY_SLECTIONS, COUNTRY_NAMES)
+            # ORGA SELECTION
+            orga_option = filter_modules.orga_option(SPECIAL_ORGA_SLECTIONS, CONSTANTS.ORGA_SEARCH)
+        # SHOW RESULTS
+        if sdg_option != None:
+            sdg_int = int(sdg_option.split(" ")[0].replace(".", ""))
+            # Extract Orgas from multiselect
+            if "All" in orga_option:
+                SHOW_ALL_ORGAS = True
+                selected_orgas = []
+            else:
+                SHOW_ALL_ORGAS = False
+                selected_orgas = [str(o).replace(")", "").lower().split("(")[1] for o in orga_option]
+            if country_option != []:
+                # all selection
+                if "All" in country_option:
+                    SHOW_ALL_COUNTRIES = True
+                    country_option.remove("All")
+                else:
+                    SHOW_ALL_COUNTRIES = False
+                country_names = [str(c) for c in country_option]
+                country_codes = [
+                    country_df[country_df['Country'] == c]['Alpha-2 code'].values[0].replace('"', "").strip(" ")
+                    for c in country_names
+                    ]
+                result_df = sdg_overlap.calc_crs3(sdg_int, country_codes, selected_orgas, SHOW_ALL_COUNTRIES, SHOW_ALL_ORGAS)
+                # TABLE FOR SDG OVERLAP
+                sdg_table.show_table(result_df)
+    # SELECT IF CRS or SDG Match
+    match_option = st.selectbox(
+                label = 'Matching Method',
+                index = 0,
+                placeholder = "Select",
+                options = ["CRS", "SDG"],
+                )
+    st.write("------------------")
+    if match_option == "CRS":
+        show_crs()
+    elif match_option == "SDG":
+        show_sdg()

similarity.py ADDED Viewed

	@@ -0,0 +1,78 @@

+"""
+Page for similarities
+"""
+################
+# DEPENDENCIES #
+################
+import streamlit as st
+import pandas as pd
+from scipy.sparse import load_npz
+import utils.similarity_table as similarity_table
+# Catch DATA
+# Load Similarity matrix
+@st.cache_data
+def load_sim_matrix():
+    loaded_matrix = load_npz("app/src/similarities.npz")
+    dense_matrix = loaded_matrix.toarray()
+    return dense_matrix
+@st.cache_data
+def load_projects():
+    orgas_df = pd.read_csv("app/src/projects/project_orgas.csv")
+    region_df = pd.read_csv("app/src/projects/project_region.csv")
+    sector_df = pd.read_csv("app/src/projects/project_sector.csv")
+    status_df = pd.read_csv("app/src/projects/project_status.csv")
+    texts_df = pd.read_csv("app/src/projects/project_texts.csv")
+    projects_df = pd.merge(orgas_df, region_df, on='iati_id', how='inner')
+    projects_df = pd.merge(projects_df, sector_df, on='iati_id', how='inner')
+    projects_df = pd.merge(projects_df, status_df, on='iati_id', how='inner')
+    projects_df = pd.merge(projects_df, texts_df, on='iati_id', how='inner')
+    return projects_df
+# LOAD DATA
+sim_matrix = load_sim_matrix()
+projects_df = load_projects()
+def show_page():
+    st.write("Similarities")
+    df_subset = projects_df.head(10)
+    selected_index = st.selectbox('Select an entry', df_subset.index, format_func=lambda x: df_subset.loc[x, 'iati_id'])
+    st.write(selected_index)
+    # add index and similarity together
+    indecies = range(0, len(sim_matrix))
+    similarities = sim_matrix[selected_index]
+    zipped_sims = list(zip(indecies, similarities))
+    # remove all 0 similarities
+    filtered_sims = [(index, similarity) for index, similarity in zipped_sims if similarity != 0]
+    # Select and sort top 20 most similar projects
+    sorted_sims = sorted(filtered_sims, key=lambda x: x[1], reverse=True)
+    top_20_sims = sorted_sims[:20]
+    # create result data frame
+    index_list = [tup[0] for tup in top_20_sims]
+    print(index_list)
+    result_df = projects_df.iloc[index_list]
+    print(len(result_df))
+    print(len(result_df))
+    # add other colums to result df
+    similarity_list = [tup[1] for tup in top_20_sims]
+    result_df["similarity"] = similarity_list
+    similarity_table.show_table(result_df, similarity_list)

src/codelists/country_codes_ISO3166-1alpha-2.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1ff1ad92034a4a593138fcbb7570ec5015c3c28a4476f95015a39d0bf257382a
+size 13113

src/codelists/crs3_codes.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cfd7bf86baf7bbc54c880c098b89b803adfb060c2c9ba55ee976cc47c2be426a
+size 3218

src/codelists/crs5_codes.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:84a522ad573ad1866835cb24efc7984016ef17b9990ac2484345705ac82a0d80
+size 100133

src/codelists/sdg_goals.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d14fbb5ea0582758b80c99d6726406852af2799dc53a3da646192535c2b3a08f
+size 1887

src/codelists/sdg_targets.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7bcfb315fcb778c1503557b6b76bec0159ccc25933a5de1cb6c51542064190e9
+size 36758

src/projects/project_region.csv CHANGED Viewed

The diff for this file is too large to render. See raw diff

src/projects/project_sector.csv CHANGED Viewed

The diff for this file is too large to render. See raw diff

src/projects/project_status.csv CHANGED Viewed

The diff for this file is too large to render. See raw diff