Spaces:
Runtime error
Runtime error
File size: 1,376 Bytes
78a2900 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 |
import transformers
import pandas as pd
import streamlit as st
from preprocess import preprocess_data
def anonymize_text(text):
model_name = "distilbert-base-uncased"
tokenizer = transformers.AutoTokenizer.from_pretrained(model_name)
model = transformers.AutoModelForMaskedLM.from_pretrained(model_name)
input_ids = tokenizer.encode(text, return_tensors="pt")
mask_token_index = torch.where(input_ids == tokenizer.mask_token_id)[1]
token_logits = model(input_ids)[0]
mask_token_logits = token_logits[0, mask_token_index, :]
top_5_tokens = torch.topk(mask_token_logits, 5, dim=1).indices[0].tolist()
anonymized_text = []
for token in top_5_tokens:
token = tokenizer.decode([token])
anonymized_text.append(token)
return anonymized_text
def run_app():
st.title("Text Anonymization App")
# File upload
st.subheader("Upload your data")
file = st.file_uploader("Upload CSV", type=["csv"])
if file is not None:
# Read the file
data = pd.read_csv(file)
# Preprocess the data
preprocessed_data = preprocess_data(data)
# Column selection
st.subheader("Select columns to anonymize")
selected_columns = []
for col in preprocessed_data.columns:
if st.checkbox(col):
selected_columns.append(col)
#
|