Spaces:
Runtime error
Runtime error
File size: 2,068 Bytes
efa0882 4b771a1 b62adba fde29d9 efa0882 fde29d9 b62adba efa0882 b62adba efa0882 fde29d9 efa0882 b62adba fde29d9 efa0882 b62adba efa0882 da88bf0 fde29d9 efa0882 b62adba fde29d9 efa0882 b62adba efa0882 0901b66 b62adba efa0882 fde29d9 4b771a1 b62adba efa0882 da88bf0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 |
# Import necessary libraries
import pandas as pd
from datasets import Dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
from sklearn.model_selection import train_test_split
# Load the dataset
df = pd.read_csv('diabetes_prediction_dataset.csv') # Ensure this file is uploaded to the root directory
# Define the target column (e.g., 'hypertension') and create binary labels
# Replace 'hypertension' with your actual target column if needed
threshold_value = 0
df['label'] = (df['hypertension'] > threshold_value).astype(int)
# Split the dataset into train and test sets
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)
train_dataset = Dataset.from_pandas(train_df)
test_dataset = Dataset.from_pandas(test_df)
# Load the tokenizer and model from Hugging Face
model_name = "bert-base-uncased" # You can replace this with another compatible model
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)
# Define a tokenization function
def preprocess_function(examples):
# Convert each feature to a string and concatenate them
inputs = [f"{age} {bmi} {hba1c}" for age, bmi, hba1c in zip(examples["age"], examples["bmi"], examples["HbA1c_level"])]
return tokenizer(inputs, padding="max_length", truncation=True, max_length=32)
# Apply the tokenization function to the datasets
tokenized_train = train_dataset.map(preprocess_function, batched=True)
tokenized_test = test_dataset.map(preprocess_function, batched=True)
# Set up training arguments
training_args = TrainingArguments(
output_dir="./results",
evaluation_strategy="epoch",
per_device_train_batch_size=16,
per_device_eval_batch_size=16,
num_train_epochs=3,
weight_decay=0.01,
)
# Initialize the Trainer
trainer = Trainer(
model=model,
args=training_args,
train_dataset=tokenized_train,
eval_dataset=tokenized_test,
)
# Train and evaluate the model
trainer.train()
trainer.evaluate()
|