import streamlit as st | |
from setfit import SetFitModel | |
from file_processing import get_paragraphs | |
####################################### Dashboard ###################################################### | |
# App | |
st.title("Identify references to vulnerable groups.") | |
st.write("""Vulnerable groups encompass various communities and individuals who are disproportionately affected by the impacts of climate change | |
due to their socioeconomic status, geographical location, or inherent characteristics. By incorporating the needs and perspectives of these groups | |
into national climate policies, governments can ensure equitable outcomes, promote social justice, and strive to build resilience within the most marginalized populations, | |
fostering a more sustainable and inclusive society as we navigate the challenges posed by climate change.This app allows you to identify whether a text contains any | |
references to vulnerable groups, for example when talking about policy documents.""") | |
# Document upload | |
uploaded_file = st.file_uploader("Upload your file here") | |
# Create text input box | |
#input_text = st.text_area(label='Please enter your text here', value="This policy has been implemented to support women.") | |
#st.write('Prediction:', model(input_text)) | |
######################################### Model ######################################################### | |
# Load the model | |
model = SetFitModel.from_pretrained("leavoigt/vulnerable_groups") | |
# Define the classes | |
id2label = { | |
0: 'Agricultural communities', | |
1: 'Children and Youth', | |
2: 'Coastal communities', | |
3: 'Drought-prone regions', | |
4: 'Economically disadvantaged communities', | |
5: 'Elderly population', | |
6: 'Ethnic minorities and indigenous people', | |
7: 'Informal sector workers', | |
8: 'Migrants and Refugees', | |
9: 'Other', | |
10: 'People with Disabilities', | |
11: 'Rural populations', | |
12: 'Sexual minorities (LGBTQI+)', | |
13: 'Urban populations', | |
14: 'Women'} | |
### Process document to paragraphs | |
# Source: https://blog.jcharistech.com/2021/01/21/how-to-save-uploaded-files-to-directory-in-streamlit-apps/ | |
# Store uploaded file temporarily in directory to get file path (necessary for processing) | |
# def save_uploadedfile(upl_file): | |
# with open(os.path.join("tempDir",upl_file.name),"wb") as f: | |
# f.write(upl_file.getbuffer()) | |
# return st.success("Saved File:{} to tempDir".format(upl_file.name)) | |
# if uploaded_file is not None: | |
# # Save the file | |
# file_details = {"FileName": uploaded_file.name, "FileType": uploaded_file.type} | |
# save_uploadedfile(uploaded_file) | |
# #Get the file path | |
file = st.file_uploader("File upload", type=["pdf"]) | |
if uploaded_file is not None: | |
# Retrieve the file name | |
with tempfile.NamedTemporaryFile(mode="wb") as temp: | |
bytes_data = files.getvalue() | |
temp.write(bytes_data) | |
print(temp.name) | |
# # Process file | |
# par_list = get_paragraphs(uploaded_file) | |
# ### Make predictions | |
# preds = vg_model(par_list) | |
# # Get label names | |
# preds_list = preds.tolist() | |
# predictions_names=[] | |
# # loop through each prediction | |
# for ele in preds_list: | |
# try: | |
# index_of_one = ele.index(1) | |
# except ValueError: | |
# index_of_one = "NA" | |
# if index_of_one != "NA": | |
# name = id2label[index_of_one] | |
# else: | |
# name = "NA" | |
# predictions_names.append(name) | |
# # Combine the paragraphs and labels to a dataframe | |
# df_predictions = pd.DataFrame({'Paragraph': par_list, 'Prediction': predictions_names}) | |
# # Drop all "Other" and "NA" predictions | |
# filtered_df = df[df['Prediction'].isin(['Other', 'NA'])] | |
# ##################################### | |
# st.write(df) | |