Sujatha commited on
Commit
efa0882
·
verified ·
1 Parent(s): 24fb26f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -67
app.py CHANGED
@@ -1,72 +1,49 @@
1
- import gradio as gr
 
 
2
  import pandas as pd
3
- from pytorch_tabular import TabularModel
4
- from pytorch_tabular.config import DataConfig, TrainerConfig
5
- from pytorch_tabular.models import CategoryEmbeddingModelConfig
6
-
7
- # Sample data representing health insurance approval prediction
8
- data = {
9
- 'age': [22, 55, 36, 45, 25, 65, 48, 30],
10
- 'BMI': [18.5, 24.0, 28.5, 23.0, 19.0, 27.5, 30.0, 21.5],
11
- 'annual_income': [30000, 90000, 60000, 80000, 40000, 120000, 70000, 50000],
12
- 'approved': [1, 1, 0, 1, 0, 1, 0, 0] # Binary target: 1 for approved, 0 for rejected
13
- }
14
- df = pd.DataFrame(data)
15
-
16
- # Configure pytorch_tabular
17
- data_config = DataConfig(
18
- target=["approved"],
19
- continuous_cols=["age", "BMI", "annual_income"]
20
- )
21
-
22
- model_config = CategoryEmbeddingModelConfig(
23
- task="classification",
24
- layers="64-64",
25
- learning_rate=1e-3
 
 
 
 
 
 
 
 
 
26
  )
27
 
28
- trainer_config = TrainerConfig(
29
- max_epochs=10
30
- )
31
-
32
- # Initialize and train the model
33
- try:
34
- tabular_model = TabularModel(
35
- data_config=data_config,
36
- model_config=model_config,
37
- trainer_config=trainer_config
38
- )
39
- tabular_model.fit(df)
40
- except ValueError as e:
41
- print(f"Error initializing TabularModel: {e}")
42
-
43
- # Define Inference Function with Error Handling
44
- def classify(age, BMI, annual_income):
45
- try:
46
- input_data = pd.DataFrame({
47
- "age": [age],
48
- "BMI": [BMI],
49
- "annual_income": [annual_income]
50
- })
51
- prediction = tabular_model.predict(input_data)["prediction"].iloc[0]
52
- return "Insurance Approved" if prediction == 1 else "Insurance Rejected"
53
- except Exception as e:
54
- print(f"Prediction error: {e}")
55
- return "An error occurred during prediction."
56
-
57
- # Gradio Interface
58
- iface = gr.Interface(
59
- fn=classify,
60
- inputs=[
61
- gr.Slider(18, 70, step=1, label="Age"),
62
- gr.Slider(15.0, 40.0, step=0.5, label="BMI"),
63
- gr.Slider(20000, 150000, step=5000, label="Annual Income")
64
- ],
65
- outputs="text",
66
- title="Health Insurance Approval Prediction",
67
- description="Predicts health insurance approval based on age, BMI, and annual income."
68
  )
69
 
70
- # Launch the Gradio app with necessary server settings
71
- print("Launching Gradio Interface...")
72
- iface.launch(server_name="0.0.0.0", server_port=7860, share=True)
 
1
+ # Import necessary libraries
2
+ from datasets import load_dataset
3
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
4
  import pandas as pd
5
+ from sklearn.model_selection import train_test_split
6
+
7
+ # Convert PDF to DataFrame (assuming it's already loaded as df in CSV or DataFrame format)
8
+ df = pd.read_csv('diabetes_data.csv') # Replace with the path to your CSV
9
+ df['label'] = (df['target_column'] > threshold_value).astype(int) # Adjust target column for binary classification
10
+
11
+ # Split the dataset
12
+ train_df, test_df = train_test_split(df, test_size=0.2)
13
+ train_df.to_csv("train.csv", index=False)
14
+ test_df.to_csv("test.csv", index=False)
15
+
16
+ # Load dataset with Hugging Face Datasets
17
+ dataset = load_dataset('csv', data_files={'train': 'train.csv', 'test': 'test.csv'})
18
+
19
+ # Load tokenizer and model
20
+ tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
21
+ model = AutoModelForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)
22
+
23
+ # Tokenize the dataset
24
+ def preprocess_function(examples):
25
+ return tokenizer(examples['text_column'], padding="max_length", truncation=True)
26
+
27
+ tokenized_dataset = dataset.map(preprocess_function, batched=True)
28
+
29
+ # Set training arguments
30
+ training_args = TrainingArguments(
31
+ output_dir="./results",
32
+ evaluation_strategy="epoch",
33
+ per_device_train_batch_size=16,
34
+ per_device_eval_batch_size=16,
35
+ num_train_epochs=3,
36
+ weight_decay=0.01,
37
  )
38
 
39
+ # Initialize Trainer
40
+ trainer = Trainer(
41
+ model=model,
42
+ args=training_args,
43
+ train_dataset=tokenized_dataset['train'],
44
+ eval_dataset=tokenized_dataset['test'],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  )
46
 
47
+ # Train and evaluate
48
+ trainer.train()
49
+ trainer.evaluate()