import streamlit as st | |
import os | |
import pkg_resources | |
# # Using this wacky hack to get around the massively ridicolous managed env loading order | |
def is_installed(package_name, version): | |
try: | |
pkg = pkg_resources.get_distribution(package_name) | |
return pkg.version == version | |
except pkg_resources.DistributionNotFound: | |
return False | |
# shifted from below - this must be the first streamlit call; otherwise: problems | |
st.set_page_config(page_title = 'Vulnerability Analysis', | |
initial_sidebar_state='expanded', layout="wide") | |
# cache the function so it's not called every time app.py is triggered | |
def install_packages(): | |
install_commands = [] | |
if not is_installed("spaces", "0.12.0"): | |
install_commands.append("pip install spaces==0.17.0") | |
if not is_installed("pydantic", "1.8.2"): | |
install_commands.append("pip install pydantic==1.8.2") | |
if not is_installed("typer", "0.4.0"): | |
install_commands.append("pip install typer==0.4.0") | |
if install_commands: | |
os.system(" && ".join(install_commands)) | |
# install packages if necessary | |
install_packages() | |
import appStore.vulnerability_analysis as vulnerability_analysis | |
import appStore.target as target_analysis | |
import appStore.doc_processing as processing | |
from utils.uploadAndExample import add_upload | |
from utils.vulnerability_classifier import label_dict | |
import pandas as pd | |
import plotly.express as px | |
# st.set_page_config(page_title = 'Vulnerability Analysis', | |
# initial_sidebar_state='expanded', layout="wide") | |
with st.sidebar: | |
# upload and example doc | |
choice = st.sidebar.radio(label = 'Select the Document', | |
help = 'You can upload the document \ | |
or else you can try a example document', | |
options = ('Upload Document', 'Try Example'), | |
horizontal = True) | |
add_upload(choice) | |
with st.container(): | |
st.markdown("<h2 style='text-align: center;'> Vulnerability Analysis 3.0 </h2>", unsafe_allow_html=True) | |
st.write(' ') | |
with st.expander("ℹ️ - About this app", expanded=False): | |
st.write( | |
""" | |
The Vulnerability Analysis App is an open-source\ | |
digital tool which aims to assist policy analysts and \ | |
other users in extracting and filtering references \ | |
to different groups in vulnerable situations from public documents. \ | |
We use Natural Language Processing (NLP), specifically deep \ | |
learning-based text representations to search context-sensitively \ | |
for mentions of the special needs of groups in vulnerable situations | |
to cluster them thematically. | |
""") | |
st.write(""" | |
What Happens in background? | |
- Step 1: Once the document is provided to app, it undergoes *Pre-processing*.\ | |
In this step the document is broken into smaller paragraphs \ | |
(based on word/sentence count). | |
- Step 2: The paragraphs are then fed to the **Vulnerability Classifier** which detects if | |
the paragraph contains any or multiple references to vulnerable groups. | |
""") | |
st.write("") | |
# Define the apps used | |
apps = [processing.app, vulnerability_analysis.app, target_analysis.app] | |
multiplier_val =1/len(apps) | |
if st.button("Analyze Document"): | |
prg = st.progress(0.0) | |
for i,func in enumerate(apps): | |
func() | |
prg.progress((i+1)*multiplier_val) | |
# If there is data stored | |
if 'key0' in st.session_state: | |
vulnerability_analysis.vulnerability_display() | |
target_analysis.target_display() | |
# ################################################################### | |
# #with st.sidebar: | |
# # topic = st.radio( | |
# # "Which category you want to explore?", | |
# # (['Vulnerability', 'Concrete targets/actions/measures'])) | |
# #if topic == 'Vulnerability': | |
# # Assign dataframe a name | |
# df_vul = st.session_state['key0'] | |
# st.write(df_vul) | |
# col1, col2 = st.columns([1,1]) | |
# with col1: | |
# # Header | |
# st.subheader("Explore references to vulnerable groups:") | |
# # Text | |
# num_paragraphs = len(df_vul['Vulnerability Label']) | |
# num_references = df_vul['Vulnerability Label'].apply(lambda x: 'Other' not in x).sum() | |
# st.markdown(f"""<div style="text-align: justify;"> The document contains a | |
# total of <span style="color: red;">{num_paragraphs}</span> paragraphs. | |
# We identified <span style="color: red;">{num_references}</span> | |
# references to vulnerable groups.</div> | |
# <br> | |
# In the pie chart on the right you can see the distribution of the different | |
# groups defined. For a more detailed view in the text, see the paragraphs and | |
# their respective labels in the table below.</div>""", unsafe_allow_html=True) | |
# with col2: | |
# ### Bar chart | |
# # # Create a df that stores all the labels | |
# df_labels = pd.DataFrame(list(label_dict.items()), columns=['Label ID', 'Label']) | |
# # Count how often each label appears in the "Vulnerability Labels" column | |
# group_counts = {} | |
# # Iterate through each sublist | |
# for index, row in df_vul.iterrows(): | |
# # Iterate through each group in the sublist | |
# for sublist in row['Vulnerability Label']: | |
# # Update the count in the dictionary | |
# group_counts[sublist] = group_counts.get(sublist, 0) + 1 | |
# # Create a new dataframe from group_counts | |
# df_label_count = pd.DataFrame(list(group_counts.items()), columns=['Label', 'Count']) | |
# # Merge the label counts with the df_label DataFrame | |
# df_label_count = df_labels.merge(df_label_count, on='Label', how='left') | |
# st.write("df_label_count") | |
# # # Configure graph | |
# # fig = px.pie(df_labels, | |
# # names="Label", | |
# # values="Count", | |
# # title='Label Counts', | |
# # hover_name="Count", | |
# # color_discrete_sequence=px.colors.qualitative.Plotly | |
# # ) | |
# # #Show plot | |
# # st.plotly_chart(fig, use_container_width=True) | |
# # ### Table | |
# st.table(df_vul[df_vul['Vulnerability Label'] != 'Other']) | |
# vulnerability_analysis.vulnerability_display() | |
# elif topic == 'Action': | |
# policyaction.action_display() | |
# else: | |
# policyaction.policy_display() | |
#st.write(st.session_state.key0) |