Create excel_convert.py
Browse files- appStore/excel_convert.py +145 -0
appStore/excel_convert.py
ADDED
@@ -0,0 +1,145 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# set path
|
2 |
+
import glob, os, sys;
|
3 |
+
sys.path.append('../utils')
|
4 |
+
|
5 |
+
#import needed libraries
|
6 |
+
import seaborn as sns
|
7 |
+
import matplotlib.pyplot as plt
|
8 |
+
import numpy as np
|
9 |
+
import pandas as pd
|
10 |
+
import streamlit as st
|
11 |
+
from st_aggrid import AgGrid
|
12 |
+
import logging
|
13 |
+
logger = logging.getLogger(__name__)
|
14 |
+
from utils.config import get_classifier_params
|
15 |
+
from io import BytesIO
|
16 |
+
import xlsxwriter
|
17 |
+
import plotly.express as px
|
18 |
+
from pandas.api.types import (
|
19 |
+
is_categorical_dtype,
|
20 |
+
is_datetime64_any_dtype,
|
21 |
+
is_numeric_dtype,
|
22 |
+
is_object_dtype,
|
23 |
+
is_list_like)
|
24 |
+
|
25 |
+
|
26 |
+
def to_excel():
|
27 |
+
if 'key1' in st.session_state:
|
28 |
+
df = st.session_state['key1']
|
29 |
+
len_df = len(df)
|
30 |
+
output = BytesIO()
|
31 |
+
writer = pd.ExcelWriter(output, engine='xlsxwriter')
|
32 |
+
df.to_excel(writer, index=False, sheet_name='rawdata')
|
33 |
+
def build_sheet(df,name):
|
34 |
+
df = df[df.keep == True]
|
35 |
+
df = df.reset_index(drop=True)
|
36 |
+
df.drop(columns = ['keep'], inplace=True)
|
37 |
+
df.to_excel(writer,index=False,sheet_name = name)
|
38 |
+
|
39 |
+
|
40 |
+
if 'target_hits' in st.session_state:
|
41 |
+
target_hits = st.session_state['target_hits']
|
42 |
+
build_sheet(target_hits[['text','page','keep','MitigationLabel','AdaptationLabel','Sector','Sub-Target']],'Target')
|
43 |
+
if 'action_hits' in st.session_state:
|
44 |
+
action_hits = st.session_state['action_hits']
|
45 |
+
build_sheet(action_hits[['text','page','keep','MitigationLabel','AdaptationLabel','Sector']],'Actions')
|
46 |
+
if 'policy_hits' in st.session_state:
|
47 |
+
policy_hits = st.session_state['policy_hits']
|
48 |
+
build_sheet(policy_hits[['text','page','keep','MitigationLabel','AdaptationLabel','Sector']],'Policy')
|
49 |
+
if 'plan_hits' in st.session_state:
|
50 |
+
plan_hits = st.session_state['plan_hits']
|
51 |
+
build_sheet(adaptation_hits[['text','page','keep','MitigationLabel','AdaptationLabel','Sector']],'Plans')
|
52 |
+
|
53 |
+
workbook = writer.book
|
54 |
+
writer.close()
|
55 |
+
processed_data = output.getvalue()
|
56 |
+
return processed_data
|
57 |
+
|
58 |
+
|
59 |
+
def filter_dataframe(key, cols):
|
60 |
+
"""
|
61 |
+
Adds a UI on top of a dataframe to let viewers filter columns
|
62 |
+
Args:
|
63 |
+
key: key to look for in session_state
|
64 |
+
cols: columns to use for filter in that order
|
65 |
+
Returns:
|
66 |
+
None
|
67 |
+
"""
|
68 |
+
modify = st.checkbox("Add filters")
|
69 |
+
|
70 |
+
if not modify:
|
71 |
+
return
|
72 |
+
if key not in st.session_state:
|
73 |
+
return
|
74 |
+
else:
|
75 |
+
df = st.session_state[key]
|
76 |
+
df = df[cols + list(set(df.columns) - set(cols))]
|
77 |
+
if len(df)==0:
|
78 |
+
return
|
79 |
+
|
80 |
+
modification_container = st.container()
|
81 |
+
|
82 |
+
with modification_container:
|
83 |
+
temp = list(set(cols) -{'page','keep'})
|
84 |
+
to_filter_columns = st.multiselect("Filter dataframe on", temp)
|
85 |
+
for column in to_filter_columns:
|
86 |
+
left, right = st.columns((1, 20))
|
87 |
+
left.write("↳")
|
88 |
+
# Treat columns with < 10 unique values as categorical
|
89 |
+
if is_categorical_dtype(df[column]):
|
90 |
+
# st.write(type(df[column][0]), column)
|
91 |
+
user_cat_input = right.multiselect(
|
92 |
+
f"Values for {column}",
|
93 |
+
df[column].unique(),
|
94 |
+
default=list(df[column].unique()),
|
95 |
+
)
|
96 |
+
df = df[df[column].isin(user_cat_input)]
|
97 |
+
elif is_numeric_dtype(df[column]):
|
98 |
+
_min = float(df[column].min())
|
99 |
+
_max = float(df[column].max())
|
100 |
+
step = (_max - _min) / 100
|
101 |
+
user_num_input = right.slider(
|
102 |
+
f"Values for {column}",
|
103 |
+
_min,
|
104 |
+
_max,
|
105 |
+
(_min, _max),
|
106 |
+
step=step,
|
107 |
+
)
|
108 |
+
df = df[df[column].between(*user_num_input)]
|
109 |
+
elif is_list_like(df[column]) & (type(df[column][0]) == list) :
|
110 |
+
list_vals = set(x for lst in df[column].tolist() for x in lst)
|
111 |
+
user_multi_input = right.multiselect(
|
112 |
+
f"Values for {column}",
|
113 |
+
list_vals,
|
114 |
+
default=list_vals,
|
115 |
+
)
|
116 |
+
df['check'] = df[column].apply(lambda x: any(i in x for i in user_multi_input))
|
117 |
+
df = df[df.check == True]
|
118 |
+
df.drop(columns = ['check'],inplace=True)
|
119 |
+
else:
|
120 |
+
user_text_input = right.text_input(
|
121 |
+
f"Substring or regex in {column}",
|
122 |
+
)
|
123 |
+
if user_text_input:
|
124 |
+
df = df[df[column].str.lower().str.contains(user_text_input)]
|
125 |
+
|
126 |
+
df = df.reset_index(drop=True)
|
127 |
+
df = st.data_editor(
|
128 |
+
df,
|
129 |
+
column_config={
|
130 |
+
"keep": st.column_config.CheckboxColumn(
|
131 |
+
help="Select which rows to keep",
|
132 |
+
default=False,
|
133 |
+
)
|
134 |
+
},
|
135 |
+
disabled=list(set(df.columns) - {'keep'}),
|
136 |
+
hide_index=True,
|
137 |
+
key = 'editor'+key,
|
138 |
+
)
|
139 |
+
|
140 |
+
|
141 |
+
#("updating target hits....")
|
142 |
+
# st.write(len(df[df.keep == True]))
|
143 |
+
st.session_state[key] = df
|
144 |
+
|
145 |
+
return
|