Spaces:

Sujatha
/

TabularClassification

Runtime error

App Files Files Community

Sujatha commited on Nov 16, 2024

Commit

efa0882

verified ·

1 Parent(s): 24fb26f

Update app.py

Browse files

Files changed (1) hide show

app.py +44 -67

app.py CHANGED Viewed

@@ -1,72 +1,49 @@
-import gradio as gr
 import pandas as pd
-from pytorch_tabular import TabularModel
-from pytorch_tabular.config import DataConfig, TrainerConfig
-from pytorch_tabular.models import CategoryEmbeddingModelConfig
-# Sample data representing health insurance approval prediction
-data = {
-    'age': [22, 55, 36, 45, 25, 65, 48, 30],
-    'BMI': [18.5, 24.0, 28.5, 23.0, 19.0, 27.5, 30.0, 21.5],
-    'annual_income': [30000, 90000, 60000, 80000, 40000, 120000, 70000, 50000],
-    'approved': [1, 1, 0, 1, 0, 1, 0, 0]  # Binary target: 1 for approved, 0 for rejected
-}
-df = pd.DataFrame(data)
-# Configure pytorch_tabular
-data_config = DataConfig(
-    target=["approved"],
-    continuous_cols=["age", "BMI", "annual_income"]
-)
-model_config = CategoryEmbeddingModelConfig(
-    task="classification",
-    layers="64-64",
-    learning_rate=1e-3
 )
-trainer_config = TrainerConfig(
-    max_epochs=10
-)
-# Initialize and train the model
-try:
-    tabular_model = TabularModel(
-        data_config=data_config,
-        model_config=model_config,
-        trainer_config=trainer_config
-    )
-    tabular_model.fit(df)
-except ValueError as e:
-    print(f"Error initializing TabularModel: {e}")
-# Define Inference Function with Error Handling
-def classify(age, BMI, annual_income):
-    try:
-        input_data = pd.DataFrame({
-            "age": [age],
-            "BMI": [BMI],
-            "annual_income": [annual_income]
-        })
-        prediction = tabular_model.predict(input_data)["prediction"].iloc[0]
-        return "Insurance Approved" if prediction == 1 else "Insurance Rejected"
-    except Exception as e:
-        print(f"Prediction error: {e}")
-        return "An error occurred during prediction."
-# Gradio Interface
-iface = gr.Interface(
-    fn=classify,
-    inputs=[
-        gr.Slider(18, 70, step=1, label="Age"),
-        gr.Slider(15.0, 40.0, step=0.5, label="BMI"),
-        gr.Slider(20000, 150000, step=5000, label="Annual Income")
-    ],
-    outputs="text",
-    title="Health Insurance Approval Prediction",
-    description="Predicts health insurance approval based on age, BMI, and annual income."
 )
-# Launch the Gradio app with necessary server settings
-print("Launching Gradio Interface...")
-iface.launch(server_name="0.0.0.0", server_port=7860, share=True)

+# Import necessary libraries
+from datasets import load_dataset
+from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
 import pandas as pd
+from sklearn.model_selection import train_test_split
+# Convert PDF to DataFrame (assuming it's already loaded as df in CSV or DataFrame format)
+df = pd.read_csv('diabetes_data.csv')  # Replace with the path to your CSV
+df['label'] = (df['target_column'] > threshold_value).astype(int)  # Adjust target column for binary classification
+# Split the dataset
+train_df, test_df = train_test_split(df, test_size=0.2)
+train_df.to_csv("train.csv", index=False)
+test_df.to_csv("test.csv", index=False)
+# Load dataset with Hugging Face Datasets
+dataset = load_dataset('csv', data_files={'train': 'train.csv', 'test': 'test.csv'})
+# Load tokenizer and model
+tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
+model = AutoModelForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)
+# Tokenize the dataset
+def preprocess_function(examples):
+    return tokenizer(examples['text_column'], padding="max_length", truncation=True)
+tokenized_dataset = dataset.map(preprocess_function, batched=True)
+# Set training arguments
+training_args = TrainingArguments(
+    output_dir="./results",
+    evaluation_strategy="epoch",
+    per_device_train_batch_size=16,
+    per_device_eval_batch_size=16,
+    num_train_epochs=3,
+    weight_decay=0.01,
 )
+# Initialize Trainer
+trainer = Trainer(
+    model=model,
+    args=training_args,
+    train_dataset=tokenized_dataset['train'],
+    eval_dataset=tokenized_dataset['test'],
 )
+# Train and evaluate
+trainer.train()
+trainer.evaluate()