Spaces:
Runtime error
Runtime error
File size: 3,679 Bytes
7ad6c98 1cddef5 7ad6c98 1cddef5 003953a 4db793d 1cddef5 4db793d 6c3e9dd 1cddef5 aa72b6d 1cddef5 aa72b6d 1cddef5 7ad6c98 6c3e9dd 1cddef5 6c3e9dd 1cddef5 003953a aa72b6d 1cddef5 aa72b6d 1cddef5 6c3e9dd 003953a aa72b6d 6c3e9dd aa72b6d 6c3e9dd aa72b6d 003953a aa72b6d 6c3e9dd aa72b6d 6c3e9dd 003953a 6c3e9dd 003953a 6c3e9dd 1cddef5 aa72b6d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 |
import modules
import streamlit as st
from streamlit_extras.let_it_rain import rain
# Options
DISCLAIMER = "*Lorem ipsum dolor sit amet, consectetur adipiscing elit. Aliquam urna sem, bibendum efficitur pellentesque a, sollicitudin pharetra urna. Nam vel lectus vitae elit luctus feugiat a a purus. Aenean mollis quis ipsum sed ornare. Nunc sit amet ultricies tellus. Vivamus vulputate sem id molestie viverra. Etiam egestas lobortis enim, sit amet lobortis ligula sollicitudin vel. Nunc eget ipsum sollicitudin, convallis.*"
K = 2
# Page Config
st.set_page_config(layout="wide")
### FILE LOADER for sidebar
with st.sidebar:
st.header("🕵️ 2anonymity")
st.markdown("*Clean and anonymize data*")
with st.container() as upload:
file = st.file_uploader(f"Upload dataset:", type=modules.SUPPORTED_TYPES, label_visibility="collapsed")
df, (filename, extension), result = modules.load_file(file)
### MAIN
if df is None: # Await file to be uploaded
rain("🤠")
else:
### PRE-TRANSFORM features for sidebar
with st.sidebar:
# Options for data loading
with st.container() as loading_options:
st.markdown("### Data loading options:")
remove_duplicates = st.checkbox("Remove duplicate rows", value=True)
drop_missing = st.checkbox("Remove rows with missing values", value=False)
# Options for data optimization
with st.container() as anonymizing_options:
st.markdown("### Anonymizing options:")
max_categorical_size = st.slider("Maximum number of categories", min_value=2, max_value=200, value=50)
bin_size = st.slider("Target bin size", min_value=2, max_value=200, value=20)
sensitivity_minimum = st.number_input("Minimum count", min_value=2, max_value=10, value=2)
### DATA PREVIEW AND TRANSFORM
# Preview data before transform
with st.container() as before_data:
s = df.style
s = s.set_properties(**{'background-color': '#fce4e4'})
st.dataframe(s)
# Transform data
df = modules.data_cleaner(df, drop_missing, remove_duplicates)
df, unprocessed = modules.data_anonymizer(df, K, max_categorical_size, bin_size, sensitivity_minimum)
# Preview data after before_data
with st.container() as after_data:
s = df.style
s = s.set_properties(**{'background-color': '#e4fce4'})
st.dataframe(s)
### POST-TRANSFORM features for sidebar
with st.sidebar:
# Options for download
with st.container() as download_header:
st.markdown("### Download options:")
output_extension = st.selectbox("File type", [".csv", ".json", ".xlsx"])
if unprocessed: st.markdown(f"Error encountered when processing columns {str(unprocessed)}")
# Prepare file for download
with st.container() as downloader:
if output_extension == ".csv": output_file = df.to_csv().encode("utf-8")
elif output_extension == ".json": output_file = df.to_json().encode("utf-8")
elif output_extension == ".xlsx": output_file = df.to_excel().encode("utf-8")
output_filename = f"""{filename.split(".")[:-1][0]}-clean{output_extension}"""
st.download_button("Download", output_file, file_name=output_filename)
# Add a disclaimer for data security
with st.container() as disclaimer:
st.markdown(
f"""
Disclaimer:
{DISCLAIMER}
"""
)
# Attribution
st.sidebar.markdown("Created by team #2hack2furious for the hackthethreat2023") |