Spaces:
Runtime error
Runtime error
ziggycross
commited on
Commit
•
6c3e9dd
1
Parent(s):
aa72b6d
Fixed download bug and created sample GUI.
Browse files- app.py +42 -33
- modules.py +3 -19
app.py
CHANGED
@@ -5,17 +5,10 @@ from streamlit_extras.let_it_rain import rain
|
|
5 |
# Options
|
6 |
DISCLAIMER = "*Lorem ipsum dolor sit amet, consectetur adipiscing elit. Aliquam urna sem, bibendum efficitur pellentesque a, sollicitudin pharetra urna. Nam vel lectus vitae elit luctus feugiat a a purus. Aenean mollis quis ipsum sed ornare. Nunc sit amet ultricies tellus. Vivamus vulputate sem id molestie viverra. Etiam egestas lobortis enim, sit amet lobortis ligula sollicitudin vel. Nunc eget ipsum sollicitudin, convallis.*"
|
7 |
|
8 |
-
# Cleaning parameters
|
9 |
-
drop_missing = None,
|
10 |
-
remove_duplicates = None,
|
11 |
-
|
12 |
-
# Anonymizing parameters
|
13 |
-
anonymize_data = None
|
14 |
-
|
15 |
# Page Config
|
16 |
st.set_page_config(layout="wide")
|
17 |
|
18 |
-
|
19 |
with st.sidebar:
|
20 |
st.header("🕵️ 2anonymity")
|
21 |
st.markdown("*Clean and anonymize data*")
|
@@ -23,53 +16,69 @@ with st.sidebar:
|
|
23 |
file = st.file_uploader(f"Upload dataset:", type=modules.SUPPORTED_TYPES, label_visibility="collapsed")
|
24 |
df, (filename, extension), result = modules.load_file(file)
|
25 |
|
26 |
-
|
27 |
-
if df is None:
|
28 |
rain("🤠")
|
29 |
else:
|
30 |
-
|
31 |
with st.sidebar:
|
32 |
-
|
33 |
# Options for data cleaning
|
34 |
with st.container() as cleaning_options:
|
35 |
-
st.markdown("Data cleaning options:")
|
36 |
remove_duplicates = st.checkbox("Remove duplicate rows", value=True)
|
37 |
drop_missing = st.checkbox("Remove rows with missing values", value=False)
|
38 |
|
39 |
# Options for data optimization
|
40 |
with st.container() as anonymizing_options:
|
41 |
-
st.markdown("Anonymizing options:")
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
download_file = modules.create_file(df, extension)
|
47 |
-
with st.container() as downloader:
|
48 |
-
st.download_button("Download", download_file, file_name=filename)
|
49 |
-
|
50 |
-
# Add a disclaimer for data security
|
51 |
-
with st.container() as disclaimer:
|
52 |
-
st.markdown(
|
53 |
-
f"""
|
54 |
-
Disclaimer:
|
55 |
-
{DISCLAIMER}
|
56 |
-
"""
|
57 |
-
)
|
58 |
|
59 |
-
|
|
|
|
|
60 |
with st.container() as before_data:
|
61 |
s = df.style
|
62 |
s = s.set_properties(**{'background-color': '#fce4e4'})
|
63 |
st.dataframe(s)
|
64 |
|
65 |
-
#
|
66 |
df = modules.data_cleaner(df, drop_missing, remove_duplicates)
|
|
|
|
|
67 |
|
68 |
-
# Preview data after
|
69 |
with st.container() as after_data:
|
70 |
s = df.style
|
71 |
s = s.set_properties(**{'background-color': '#e4fce4'})
|
72 |
st.dataframe(s)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
73 |
|
74 |
# Attribution
|
75 |
st.sidebar.markdown("Created by team #2hack2furious for the hackthethreat2023")
|
|
|
5 |
# Options
|
6 |
DISCLAIMER = "*Lorem ipsum dolor sit amet, consectetur adipiscing elit. Aliquam urna sem, bibendum efficitur pellentesque a, sollicitudin pharetra urna. Nam vel lectus vitae elit luctus feugiat a a purus. Aenean mollis quis ipsum sed ornare. Nunc sit amet ultricies tellus. Vivamus vulputate sem id molestie viverra. Etiam egestas lobortis enim, sit amet lobortis ligula sollicitudin vel. Nunc eget ipsum sollicitudin, convallis.*"
|
7 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
# Page Config
|
9 |
st.set_page_config(layout="wide")
|
10 |
|
11 |
+
### FILE LOADER for sidebar
|
12 |
with st.sidebar:
|
13 |
st.header("🕵️ 2anonymity")
|
14 |
st.markdown("*Clean and anonymize data*")
|
|
|
16 |
file = st.file_uploader(f"Upload dataset:", type=modules.SUPPORTED_TYPES, label_visibility="collapsed")
|
17 |
df, (filename, extension), result = modules.load_file(file)
|
18 |
|
19 |
+
### MAIN
|
20 |
+
if df is None: # Await file to be uploaded
|
21 |
rain("🤠")
|
22 |
else:
|
23 |
+
### PRE-TRANSFORM features for sidebar
|
24 |
with st.sidebar:
|
|
|
25 |
# Options for data cleaning
|
26 |
with st.container() as cleaning_options:
|
27 |
+
st.markdown("### Data cleaning options:")
|
28 |
remove_duplicates = st.checkbox("Remove duplicate rows", value=True)
|
29 |
drop_missing = st.checkbox("Remove rows with missing values", value=False)
|
30 |
|
31 |
# Options for data optimization
|
32 |
with st.container() as anonymizing_options:
|
33 |
+
st.markdown("### Anonymizing options:")
|
34 |
+
sample_checkbox = st.checkbox("Test checkbox", value=True)
|
35 |
+
sample_slider = st.slider("Test slider", min_value=1, max_value=10, value=2)
|
36 |
+
sample_number = st.number_input("Test number", min_value=0, max_value=100, value=50)
|
37 |
+
sample_dropdown = st.selectbox("Test dropdown", ["A", "B", "C"], index=1)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
|
39 |
+
|
40 |
+
### DATA PREVIEW AND TRANSFORM
|
41 |
+
# Preview data before transform
|
42 |
with st.container() as before_data:
|
43 |
s = df.style
|
44 |
s = s.set_properties(**{'background-color': '#fce4e4'})
|
45 |
st.dataframe(s)
|
46 |
|
47 |
+
# Transform data
|
48 |
df = modules.data_cleaner(df, drop_missing, remove_duplicates)
|
49 |
+
df = modules.data_anonymizer(df)
|
50 |
+
# download_file = modules.create_file(df, ".csv")
|
51 |
|
52 |
+
# Preview data after before_data
|
53 |
with st.container() as after_data:
|
54 |
s = df.style
|
55 |
s = s.set_properties(**{'background-color': '#e4fce4'})
|
56 |
st.dataframe(s)
|
57 |
+
|
58 |
+
|
59 |
+
### POST-TRANSFORM features for sidebar
|
60 |
+
with st.sidebar:
|
61 |
+
# Options for download
|
62 |
+
with st.container() as download_header:
|
63 |
+
st.markdown("### Download")
|
64 |
+
output_extension = st.selectbox("File type", [".csv", ".json", ".xlsx"])
|
65 |
+
|
66 |
+
# Prepare file for download
|
67 |
+
with st.container() as downloader:
|
68 |
+
if output_extension == ".csv": output_file = df.to_csv().encode("utf-8")
|
69 |
+
elif output_extension == ".json": output_file = df.to_json().encode("utf-8")
|
70 |
+
elif output_extension == ".xlsx": output_file = df.to_excel().encode("utf-8")
|
71 |
+
output_filename = f"""{filename.split(".")[:-1][0]}-clean{output_extension}"""
|
72 |
+
st.download_button("Download", output_file, file_name=output_filename)
|
73 |
+
|
74 |
+
# Add a disclaimer for data security
|
75 |
+
with st.container() as disclaimer:
|
76 |
+
st.markdown(
|
77 |
+
f"""
|
78 |
+
Disclaimer:
|
79 |
+
{DISCLAIMER}
|
80 |
+
"""
|
81 |
+
)
|
82 |
|
83 |
# Attribution
|
84 |
st.sidebar.markdown("Created by team #2hack2furious for the hackthethreat2023")
|
modules.py
CHANGED
@@ -36,25 +36,6 @@ def load_file(file):
|
|
36 |
except Exception as error:
|
37 |
return df, metadata, f"Error: Unable to read file '{filename}' ({type(error)}: {error})"
|
38 |
|
39 |
-
def create_file(df, extension):
|
40 |
-
"""
|
41 |
-
Prepares a dataframe from streamlit for download.
|
42 |
-
|
43 |
-
@type df: pd.DataFrame
|
44 |
-
@param df: A DataFrame to package into a file.
|
45 |
-
@type extension: pd.DataFrame
|
46 |
-
@param extension: The desired filetype.
|
47 |
-
@return: A file container ready for download.
|
48 |
-
"""
|
49 |
-
export_functions = {
|
50 |
-
"csv": pd.DataFrame.to_csv,
|
51 |
-
"json": pd.DataFrame.to_json,
|
52 |
-
"xlsx": pd.DataFrame.to_excel
|
53 |
-
}
|
54 |
-
exporter = export_functions.get(extension, None)
|
55 |
-
if exporter is None: return None
|
56 |
-
return exporter(df)
|
57 |
-
|
58 |
def data_cleaner(df, drop_missing=False, remove_duplicates=True):
|
59 |
"""
|
60 |
Takes a DataFrame and removes empty and duplicate entries.
|
@@ -70,4 +51,7 @@ def data_cleaner(df, drop_missing=False, remove_duplicates=True):
|
|
70 |
"""
|
71 |
df = df.dropna(how="any" if drop_missing else "all")
|
72 |
if remove_duplicates: df = df.drop_duplicates()
|
|
|
|
|
|
|
73 |
return df
|
|
|
36 |
except Exception as error:
|
37 |
return df, metadata, f"Error: Unable to read file '{filename}' ({type(error)}: {error})"
|
38 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
def data_cleaner(df, drop_missing=False, remove_duplicates=True):
|
40 |
"""
|
41 |
Takes a DataFrame and removes empty and duplicate entries.
|
|
|
51 |
"""
|
52 |
df = df.dropna(how="any" if drop_missing else "all")
|
53 |
if remove_duplicates: df = df.drop_duplicates()
|
54 |
+
return df
|
55 |
+
|
56 |
+
def data_anonymizer(df):
|
57 |
return df
|