Spaces:

TeresaK
/

cpv_test

Runtime error

App Files Files Community

cpv_test / app.py

leavoigt

Update app.py

4d1a87e 12 months ago

raw

history blame

6.58 kB

	import streamlit as st
	import os
	import pkg_resources

	# Using this wacky hack to get around the massively ridicolous managed env loading order
	def is_installed(package_name, version):
	try:
	pkg = pkg_resources.get_distribution(package_name)
	return pkg.version == version
	except pkg_resources.DistributionNotFound:
	return False

	# shifted from below - this must be the first streamlit call; otherwise: problems
	st.set_page_config(page_title = 'Vulnerability Analysis',
	initial_sidebar_state='expanded', layout="wide")

	@st.cache_resource # cache the function so it's not called every time app.py is triggered
	def install_packages():
	install_commands = []

	if not is_installed("spaces", "0.12.0"):
	install_commands.append("pip install spaces==0.17.0")

	if not is_installed("pydantic", "1.8.2"):
	install_commands.append("pip install pydantic==1.8.2")

	if not is_installed("typer", "0.4.0"):
	install_commands.append("pip install typer==0.4.0")

	if install_commands:
	os.system(" && ".join(install_commands))

	# install packages if necessary
	install_packages()

	import appStore.vulnerability_analysis as vulnerability_analysis
	import appStore.target as target_extraction
	import appStore.doc_processing as processing
	from utils.uploadAndExample import add_upload
	from utils.vulnerability_classifier import label_dict
	import pandas as pd
	import plotly.express as px

	#st.set_page_config(page_title = 'Vulnerability Analysis',
	# initial_sidebar_state='expanded', layout="wide")

	with st.sidebar:
	# upload and example doc
	choice = st.sidebar.radio(label = 'Select the Document',
	help = 'You can upload the document \
	or else you can try a example document',
	options = ('Upload Document', 'Try Example'),
	horizontal = True)
	add_upload(choice)

	with st.container():
	st.markdown("<h2 style='text-align: center; color: black;'> Vulnerability Analysis 2.0 </h2>", unsafe_allow_html=True)
	st.write(' ')

	with st.expander("ℹ️ - About this app", expanded=False):
	st.write(
	"""
	The Vulnerability Analysis App is an open-source\
	digital tool which aims to assist policy analysts and \
	other users in extracting and filtering references \
	to different groups in vulnerable situations from public documents. \
	We use Natural Language Processing (NLP), specifically deep \
	learning-based text representations to search context-sensitively \
	for mentions of the special needs of groups in vulnerable situations
	to cluster them thematically.
	""")

	st.write("""
	What Happens in background?

	- Step 1: Once the document is provided to app, it undergoes Pre-processing.\
	In this step the document is broken into smaller paragraphs \
	(based on word/sentence count).
	- Step 2: The paragraphs are then fed to the Vulnerability Classifier which detects if
	the paragraph contains any or multiple references to vulnerable groups.
	""")

	st.write("")

	# Define the apps used
	apps = [processing.app, vulnerability_analysis.app]

	multiplier_val =1/len(apps)
	if st.button("Analyze Document"):
	prg = st.progress(0.0)
	for i,func in enumerate(apps):
	func()
	prg.progress((i+1)*multiplier_val)

	# If there is data stored
	if 'key0' in st.session_state:

	###################################################################

	#with st.sidebar:
	# topic = st.radio(
	# "Which category you want to explore?",
	# (['Vulnerability', 'Concrete targets/actions/measures']))

	#if topic == 'Vulnerability':

	# Assign dataframe a name
	df_vul = st.session_state['key0']
	st.write(df_vul)

	col1, col2 = st.columns([1,1])

	with col1:

	# Header
	st.subheader("Explore references to vulnerable groups:")

	# Text
	num_paragraphs = len(df_vul['Vulnerability Label'])
	num_references = df_vul['Vulnerability Label'].apply(lambda x: 'Other' not in x).sum()

	st.markdown(f"""<div style="text-align: justify;"> The document contains a
	total of <span style="color: red;">{num_paragraphs}</span> paragraphs.
	We identified <span style="color: red;">{num_references}</span>
	references to vulnerable groups.</div>
	<br>
	In the pie chart on the right you can see the distribution of the different
	groups defined. For a more detailed view in the text, see the paragraphs and
	their respective labels in the table below.</div>""", unsafe_allow_html=True)

	with col2:

	### Bar chart

	# # Create a df that stores all the labels
	df_labels = pd.DataFrame(list(label_dict.items()), columns=['Label ID', 'Label'])

	# Count how often each label appears in the "Vulnerability Labels" column
	group_counts = {}

	# Iterate through each sublist
	for index, row in df_vul.iterrows():

	# Iterate through each group in the sublist
	for sublist in row['Vulnerability Label']:

	# Update the count in the dictionary
	group_counts[sublist] = group_counts.get(sublist, 0) + 1

	# Create a new dataframe from group_counts
	df_label_count = pd.DataFrame(list(group_counts.items()), columns=['Label', 'Count'])

	# Merge the label counts with the df_label DataFrame
	df_label_count = df_labels.merge(df_label_count, on='Label', how='left')
	st.write("df_label_count")

	# # Configure graph
	# fig = px.pie(df_labels,
	# names="Label",
	# values="Count",
	# title='Label Counts',
	# hover_name="Count",
	# color_discrete_sequence=px.colors.qualitative.Plotly
	# )

	# #Show plot
	# st.plotly_chart(fig, use_container_width=True)

	# ### Table
	st.table(df_vul[df_vul['Vulnerability Label'] != 'Other'])

	# vulnerability_analysis.vulnerability_display()
	# elif topic == 'Action':
	# policyaction.action_display()
	# else:
	# policyaction.policy_display()
	#st.write(st.session_state.key0)