Spaces:
Runtime error
Runtime error
import transformers | |
import pandas as pd | |
import streamlit as st | |
from preprocess import preprocess_data | |
def anonymize_text(text): | |
model_name = "distilbert-base-uncased" | |
tokenizer = transformers.AutoTokenizer.from_pretrained(model_name) | |
model = transformers.AutoModelForMaskedLM.from_pretrained(model_name) | |
input_ids = tokenizer.encode(text, return_tensors="pt") | |
mask_token_index = torch.where(input_ids == tokenizer.mask_token_id)[1] | |
token_logits = model(input_ids)[0] | |
mask_token_logits = token_logits[0, mask_token_index, :] | |
top_5_tokens = torch.topk(mask_token_logits, 5, dim=1).indices[0].tolist() | |
anonymized_text = [] | |
for token in top_5_tokens: | |
token = tokenizer.decode([token]) | |
anonymized_text.append(token) | |
return anonymized_text | |
def run_app(): | |
st.title("Text Anonymization App") | |
# File upload | |
st.subheader("Upload your data") | |
file = st.file_uploader("Upload CSV", type=["csv"]) | |
if file is not None: | |
# Read the file | |
data = pd.read_csv(file) | |
# Preprocess the data | |
preprocessed_data = preprocess_data(data) | |
# Column selection | |
st.subheader("Select columns to anonymize") | |
selected_columns = [] | |
for col in preprocessed_data.columns: | |
if st.checkbox(col): | |
selected_columns.append(col) | |
# | |