|
import streamlit as st |
|
import os |
|
import pkg_resources |
|
|
|
|
|
def is_installed(package_name, version): |
|
try: |
|
pkg = pkg_resources.get_distribution(package_name) |
|
return pkg.version == version |
|
except pkg_resources.DistributionNotFound: |
|
return False |
|
|
|
|
|
st.set_page_config(page_title = 'Vulnerability Analysis', |
|
initial_sidebar_state='expanded', layout="wide") |
|
|
|
@st.cache_resource |
|
def install_packages(): |
|
install_commands = [] |
|
|
|
if not is_installed("spaces", "0.12.0"): |
|
install_commands.append("pip install spaces==0.17.0") |
|
|
|
if not is_installed("pydantic", "1.8.2"): |
|
install_commands.append("pip install pydantic==1.8.2") |
|
|
|
if not is_installed("typer", "0.4.0"): |
|
install_commands.append("pip install typer==0.4.0") |
|
|
|
if install_commands: |
|
os.system(" && ".join(install_commands)) |
|
|
|
|
|
install_packages() |
|
|
|
import appStore.vulnerability_analysis as vulnerability_analysis |
|
import appStore.target as target_extraction |
|
import appStore.doc_processing as processing |
|
from utils.uploadAndExample import add_upload |
|
from utils.vulnerability_classifier import label_dict |
|
import pandas as pd |
|
import plotly.express as px |
|
|
|
|
|
|
|
|
|
with st.sidebar: |
|
|
|
choice = st.sidebar.radio(label = 'Select the Document', |
|
help = 'You can upload the document \ |
|
or else you can try a example document', |
|
options = ('Upload Document', 'Try Example'), |
|
horizontal = True) |
|
add_upload(choice) |
|
|
|
with st.container(): |
|
st.markdown("<h2 style='text-align: center; color: black;'> Vulnerability Analysis 2.0 </h2>", unsafe_allow_html=True) |
|
st.write(' ') |
|
|
|
with st.expander("ℹ️ - About this app", expanded=False): |
|
st.write( |
|
""" |
|
The Vulnerability Analysis App is an open-source\ |
|
digital tool which aims to assist policy analysts and \ |
|
other users in extracting and filtering references \ |
|
to different groups in vulnerable situations from public documents. \ |
|
We use Natural Language Processing (NLP), specifically deep \ |
|
learning-based text representations to search context-sensitively \ |
|
for mentions of the special needs of groups in vulnerable situations |
|
to cluster them thematically. |
|
""") |
|
|
|
st.write(""" |
|
What Happens in background? |
|
|
|
- Step 1: Once the document is provided to app, it undergoes *Pre-processing*.\ |
|
In this step the document is broken into smaller paragraphs \ |
|
(based on word/sentence count). |
|
- Step 2: The paragraphs are then fed to the **Vulnerability Classifier** which detects if |
|
the paragraph contains any or multiple references to vulnerable groups. |
|
""") |
|
|
|
st.write("") |
|
|
|
|
|
apps = [processing.app, vulnerability_analysis.app] |
|
|
|
multiplier_val =1/len(apps) |
|
if st.button("Analyze Document"): |
|
prg = st.progress(0.0) |
|
for i,func in enumerate(apps): |
|
func() |
|
prg.progress((i+1)*multiplier_val) |
|
|
|
|
|
if 'key0' in st.session_state: |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
df_vul = st.session_state['key0'] |
|
st.write(df_vul) |
|
|
|
col1, col2 = st.columns([1,1]) |
|
|
|
with col1: |
|
|
|
|
|
st.subheader("Explore references to vulnerable groups:") |
|
|
|
|
|
num_paragraphs = len(df_vul['Vulnerability Label']) |
|
num_references = df_vul['Vulnerability Label'].apply(lambda x: 'Other' not in x).sum() |
|
|
|
st.markdown(f"""<div style="text-align: justify;"> The document contains a |
|
total of <span style="color: red;">{num_paragraphs}</span> paragraphs. |
|
We identified <span style="color: red;">{num_references}</span> |
|
references to vulnerable groups.</div> |
|
<br> |
|
In the pie chart on the right you can see the distribution of the different |
|
groups defined. For a more detailed view in the text, see the paragraphs and |
|
their respective labels in the table below.</div>""", unsafe_allow_html=True) |
|
|
|
with col2: |
|
|
|
|
|
|
|
|
|
df_labels = pd.DataFrame(list(label_dict.items()), columns=['Label ID', 'Label']) |
|
|
|
|
|
group_counts = {} |
|
|
|
|
|
for index, row in df_vul.iterrows(): |
|
|
|
|
|
for sublist in row['Vulnerability Label']: |
|
|
|
|
|
group_counts[sublist] = group_counts.get(sublist, 0) + 1 |
|
|
|
|
|
df_label_count = pd.DataFrame(list(group_counts.items()), columns=['Label', 'Count']) |
|
|
|
|
|
df_label_count = df_labels.merge(df_label_count, on='Label', how='left') |
|
st.write("df_label_count") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
st.table(df_vul[df_vul['Vulnerability Label'] != 'Other']) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|