Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
File size: 1,736 Bytes
57a5237 71de22d 57a5237 71de22d e3302f1 71de22d e3302f1 55a6bd8 e3302f1 55a6bd8 57a5237 e3302f1 7823114 e3302f1 71de22d e3302f1 55a6bd8 a272945 55a6bd8 57a5237 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 |
import pandas as pd
from functions.semantic_search import search
def contains_code(crs_codes, code_list):
codes = str(crs_codes).split(';')
return any(code in code_list for code in codes)
def filter_projects(df, crs3_list, crs5_list, sdg_str, country_code_list, orga_code_list, query, model, embeddings, TOP_X_PROJECTS=30):
# Check if filters where not all should be selected are empty
if crs3_list != [] or crs5_list != [] or sdg_str != "" or query != "":
# FILTER CRS
if crs3_list and not crs5_list:
df = df[df['crs_3_code'].apply(lambda x: contains_code(x, crs3_list))]
elif crs3_list and crs5_list:
df = df[df['crs_5_code'].apply(lambda x: contains_code(x, crs5_list))]
elif not crs3_list and crs5_list:
df = df[df['crs_5_code'].apply(lambda x: contains_code(x, crs5_list))]
# FILTER SDG
if sdg_str != "":
df = df[df["sgd_pred_code"] == int(sdg_str)]
# FILTER COUNTRY
if country_code_list != []:
country_filtered_df = pd.DataFrame()
for c in country_code_list:
c_df = df[df["country"].str.contains(c, na=False)]
country_filtered_df = pd.concat([country_filtered_df, c_df], ignore_index=False)
df = country_filtered_df
# FILTER ORGANIZATION
if orga_code_list != []:
df = df[df['orga_abbreviation'].isin(orga_code_list)]
# FILTER QUERY
if query != "" and len(df) > 0:
if len(df) < TOP_X_PROJECTS:
TOP_X_PROJECTS = len(df)
df = search(query, model, embeddings, df, TOP_X_PROJECTS)
return df
|