File size: 6,582 Bytes
0a1e238
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3936853
c7320de
31b5a19
 
c48611a
614088e
db74214
eab471f
0a1e238
 
74a942d
31b5a19
 
 
 
 
 
 
 
afff22e
31b5a19
546504d
0a1e238
57455f3
31b5a19
 
 
 
 
6f96de4
546504d
 
 
 
 
31b5a19
546504d
31b5a19
 
 
 
 
 
 
546504d
31b5a19
 
 
a12fa3b
 
89abd1c
31b5a19
 
 
 
 
 
 
 
a12fa3b
f57ce49
13beabf
 
 
 
 
 
 
879b028
13beabf
 
 
 
4e33061
 
13beabf
4e33061
13beabf
 
 
 
 
 
 
4d1a87e
 
13beabf
 
 
 
 
 
 
 
 
 
857a9de
 
13beabf
4e33061
857a9de
4e33061
857a9de
 
4e33061
857a9de
b4d6c4c
4e33061
857a9de
 
4e33061
857a9de
 
86b5a28
857a9de
 
86b5a28
857a9de
4d1a87e
857a9de
 
86b5a28
 
 
 
 
 
 
 
13beabf
86b5a28
 
13beabf
86b5a28
4e33061
13beabf
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
import streamlit as st
import os
import pkg_resources

# Using this wacky hack to get around the massively ridicolous managed env loading order
def is_installed(package_name, version):
    try:
        pkg = pkg_resources.get_distribution(package_name)
        return pkg.version == version
    except pkg_resources.DistributionNotFound:
        return False

# shifted from below - this must be the first streamlit call; otherwise: problems
st.set_page_config(page_title = 'Vulnerability Analysis', 
                   initial_sidebar_state='expanded', layout="wide") 

@st.cache_resource # cache the function so it's not called every time app.py is triggered
def install_packages():
    install_commands = []

    if not is_installed("spaces", "0.12.0"):
        install_commands.append("pip install spaces==0.17.0")
    
    if not is_installed("pydantic", "1.8.2"):
        install_commands.append("pip install pydantic==1.8.2")

    if not is_installed("typer", "0.4.0"):
        install_commands.append("pip install typer==0.4.0")

    if install_commands:
        os.system(" && ".join(install_commands))

# install packages if necessary
install_packages()

import appStore.vulnerability_analysis as vulnerability_analysis
import appStore.target as target_extraction
import appStore.doc_processing as processing
from utils.uploadAndExample import add_upload
from utils.vulnerability_classifier import label_dict
import pandas as pd
import plotly.express as px

#st.set_page_config(page_title = 'Vulnerability Analysis', 
 #                  initial_sidebar_state='expanded', layout="wide") 

with st.sidebar:
    # upload and example doc
    choice = st.sidebar.radio(label = 'Select the Document',
                            help = 'You can upload the document \
                            or else you can try a example document', 
                            options = ('Upload Document', 'Try Example'), 
                            horizontal = True)
    add_upload(choice) 

with st.container():
    st.markdown("<h2 style='text-align: center; color: black;'> Vulnerability Analysis 2.0 </h2>", unsafe_allow_html=True)
    st.write(' ')

with st.expander("ℹ️ - About this app", expanded=False):
    st.write(
        """
        The Vulnerability Analysis App is an open-source\
        digital tool which aims to assist policy analysts and \
        other users in extracting and filtering references \
        to different groups in vulnerable situations from public documents. \
        We use Natural Language Processing (NLP), specifically deep \
        learning-based text representations  to search context-sensitively \
        for mentions of the special needs of groups in vulnerable situations 
        to cluster them thematically. 
        """)
    
    st.write("""
        What Happens in background?
        
        - Step 1: Once the document is provided to app, it undergoes *Pre-processing*.\
        In this step the document is broken into smaller paragraphs \
        (based on word/sentence count).
        - Step 2: The paragraphs are then fed to the **Vulnerability Classifier** which detects if
        the paragraph contains any or multiple references to vulnerable groups.
        """)
                  
    st.write("")

# Define the apps used
apps = [processing.app, vulnerability_analysis.app]

multiplier_val =1/len(apps)
if st.button("Analyze Document"):
    prg = st.progress(0.0)
    for i,func in enumerate(apps):
        func()
        prg.progress((i+1)*multiplier_val)

# If there is data stored
if 'key0' in st.session_state:

    ###################################################################
       
    #with st.sidebar:
     #  topic = st.radio(
      #                 "Which category you want to explore?",
       #                (['Vulnerability', 'Concrete targets/actions/measures']))
    
    #if topic == 'Vulnerability':

    # Assign dataframe a name
    df_vul = st.session_state['key0']
    st.write(df_vul)
    
    col1, col2 = st.columns([1,1])
    
    with col1:
        
        # Header
        st.subheader("Explore references to vulnerable groups:")

        # Text 
        num_paragraphs = len(df_vul['Vulnerability Label'])
        num_references = df_vul['Vulnerability Label'].apply(lambda x: 'Other' not in x).sum()
       
        st.markdown(f"""<div style="text-align: justify;"> The document contains a
                total of <span style="color: red;">{num_paragraphs}</span> paragraphs.
                We identified <span style="color: red;">{num_references}</span>
                references to vulnerable groups.</div>
                <br>
                In the pie chart on the right you can see the distribution of the different 
                groups defined. For a more detailed view in the text, see the paragraphs and 
                their respective labels in the table below.</div>""", unsafe_allow_html=True)

    with col2:
        
        ### Bar chart
                    
        # # Create a df that stores all the labels
        df_labels = pd.DataFrame(list(label_dict.items()), columns=['Label ID', 'Label'])

        # Count how often each label appears in the "Vulnerability Labels" column
        group_counts = {}

        # Iterate through each sublist
        for index, row in df_vul.iterrows():
            
            # Iterate through each group in the sublist
            for sublist in row['Vulnerability Label']:
                
                # Update the count in the dictionary
                group_counts[sublist] = group_counts.get(sublist, 0) + 1

        # Create a new dataframe from group_counts
        df_label_count = pd.DataFrame(list(group_counts.items()), columns=['Label', 'Count'])

        # Merge the label counts with the df_label DataFrame
        df_label_count = df_labels.merge(df_label_count, on='Label', how='left')
        st.write("df_label_count")
        
    #     # Configure graph
    #     fig = px.pie(df_labels,
    #             names="Label", 
    #             values="Count",
    #             title='Label Counts',
    #             hover_name="Count",
    #             color_discrete_sequence=px.colors.qualitative.Plotly
    #     )
        
    #     #Show plot
    #     st.plotly_chart(fig, use_container_width=True)

    # ### Table 
    st.table(df_vul[df_vul['Vulnerability Label'] != 'Other'])

   # vulnerability_analysis.vulnerability_display()
# elif topic == 'Action':
#     policyaction.action_display()
# else: 
#     policyaction.policy_display()
#st.write(st.session_state.key0)