Spaces:
Runtime error
Runtime error
# Import necessary libraries | |
import pandas as pd | |
from datasets import Dataset | |
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments | |
from sklearn.model_selection import train_test_split | |
# Load the dataset | |
df = pd.read_csv('diabetes_prediction_dataset.csv') # Ensure this file is uploaded to the root directory | |
# Define the target column (e.g., 'hypertension') and create binary labels | |
# Replace 'hypertension' with your actual target column if needed | |
threshold_value = 0 | |
df['label'] = (df['hypertension'] > threshold_value).astype(int) | |
# Split the dataset into train and test sets | |
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42) | |
train_dataset = Dataset.from_pandas(train_df) | |
test_dataset = Dataset.from_pandas(test_df) | |
# Load the tokenizer and model from Hugging Face | |
model_name = "bert-base-uncased" # You can replace this with another compatible model | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2) | |
# Define a tokenization function | |
def preprocess_function(examples): | |
# Convert each feature to a string and concatenate them | |
inputs = [f"{age} {bmi} {hba1c}" for age, bmi, hba1c in zip(examples["age"], examples["bmi"], examples["HbA1c_level"])] | |
return tokenizer(inputs, padding="max_length", truncation=True, max_length=32) | |
# Apply the tokenization function to the datasets | |
tokenized_train = train_dataset.map(preprocess_function, batched=True) | |
tokenized_test = test_dataset.map(preprocess_function, batched=True) | |
# Set up training arguments | |
training_args = TrainingArguments( | |
output_dir="./results", | |
evaluation_strategy="epoch", | |
per_device_train_batch_size=16, | |
per_device_eval_batch_size=16, | |
num_train_epochs=3, | |
weight_decay=0.01, | |
) | |
# Initialize the Trainer | |
trainer = Trainer( | |
model=model, | |
args=training_args, | |
train_dataset=tokenized_train, | |
eval_dataset=tokenized_test, | |
) | |
# Train and evaluate the model | |
trainer.train() | |
trainer.evaluate() | |