Seanyoon commited on
Commit
2774ca7
1 Parent(s): 5d44f4d

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -37
app.py CHANGED
@@ -1,47 +1,38 @@
1
- import transformers
2
- import pandas as pd
3
  import streamlit as st
4
- from preprocess import preprocess_data
5
-
6
- def anonymize_text(text):
7
- model_name = "distilbert-base-uncased"
8
- tokenizer = transformers.AutoTokenizer.from_pretrained(model_name)
9
- model = transformers.AutoModelForMaskedLM.from_pretrained(model_name)
10
-
11
- input_ids = tokenizer.encode(text, return_tensors="pt")
12
- mask_token_index = torch.where(input_ids == tokenizer.mask_token_id)[1]
13
-
14
- token_logits = model(input_ids)[0]
15
- mask_token_logits = token_logits[0, mask_token_index, :]
16
 
17
- top_5_tokens = torch.topk(mask_token_logits, 5, dim=1).indices[0].tolist()
18
 
19
- anonymized_text = []
20
- for token in top_5_tokens:
21
- token = tokenizer.decode([token])
22
- anonymized_text.append(token)
23
 
24
- return anonymized_text
 
25
 
26
- def run_app():
27
- st.title("Text Anonymization App")
 
 
28
 
29
- # File upload
30
- st.subheader("Upload your data")
31
- file = st.file_uploader("Upload CSV", type=["csv"])
32
 
33
- if file is not None:
34
- # Read the file
35
- data = pd.read_csv(file)
36
 
37
- # Preprocess the data
38
- preprocessed_data = preprocess_data(data)
 
 
 
 
 
39
 
40
- # Column selection
41
- st.subheader("Select columns to anonymize")
42
- selected_columns = []
43
- for col in preprocessed_data.columns:
44
- if st.checkbox(col):
45
- selected_columns.append(col)
46
 
47
- #
 
 
 
 
1
  import streamlit as st
2
+ import process
3
+ import pandas as pd
 
 
 
 
 
 
 
 
 
 
4
 
5
+ st.set_page_config(page_title="Data Anonymizer App")
6
 
7
+ st.title("Data Anonymizer App")
 
 
 
8
 
9
+ st.sidebar.title("Data Upload")
10
+ uploaded_file = st.sidebar.file_uploader("Choose a CSV file", type="csv")
11
 
12
+ if uploaded_file:
13
+ df = pd.read_csv(uploaded_file)
14
+ st.write("Original Data:")
15
+ st.write(df)
16
 
17
+ # process the data
18
+ processed_df, sensitive_cols = process.process_data(df)
 
19
 
20
+ # display processed data
21
+ st.write("Processed Data:")
22
+ st.write(processed_df)
23
 
24
+ # ask for sensitive columns removal
25
+ if sensitive_cols:
26
+ st.write(f"The following columns contain sensitive data: {', '.join(sensitive_cols)}")
27
+ if st.checkbox("Remove sensitive columns"):
28
+ processed_df.drop(columns=sensitive_cols, inplace=True)
29
+ else:
30
+ st.write("Sensitive columns will not be removed.")
31
 
32
+ # ask for k-anonymity
33
+ if st.checkbox("Apply k-anonymity"):
34
+ k = st.number_input("Enter the value of k", min_value=1)
35
+ processed_df = process.apply_k_anonymity(processed_df, k)
 
 
36
 
37
+ st.write("Final Processed Data:")
38
+ st.write(processed_df)