Spaces:
Runtime error
Runtime error
deepumanju
commited on
Commit
•
d126006
1
Parent(s):
9689c99
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,106 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
import matplotlib.pyplot as plt
|
3 |
+
%matplotlib inline
|
4 |
+
churn=pd.read_csv(r"C:\Users\deepu\Downloads\archive (9)\churn-bigml-80.csv")
|
5 |
+
churn
|
6 |
+
import pandas as pd
|
7 |
+
import seaborn as sns
|
8 |
+
import matplotlib.pyplot as plt
|
9 |
+
|
10 |
+
|
11 |
+
categorical_cols = churn.select_dtypes(include=['object']).columns.tolist()
|
12 |
+
|
13 |
+
# One-hot encode categorical columns
|
14 |
+
data_encoded = pd.get_dummies(churn, columns=categorical_cols, drop_first=True)
|
15 |
+
|
16 |
+
# Calculate correlations with the target variable (assuming the target column is named 'churn')
|
17 |
+
target = 'Churn'
|
18 |
+
correlations = data_encoded.corr()[target].drop(target) # Drop target's self-correlation
|
19 |
+
|
20 |
+
# Sort correlations by absolute value (strongest to weakest correlation)
|
21 |
+
correlations = correlations.sort_values(key=abs, ascending=False)
|
22 |
+
|
23 |
+
# Plot the correlations as a bar chart
|
24 |
+
plt.figure(figsize=(10, 8))
|
25 |
+
sns.barplot(x=correlations.values, y=correlations.index, palette="coolwarm")
|
26 |
+
plt.title("Feature Correlations with Churn")
|
27 |
+
plt.xlabel("Correlation Coefficient")
|
28 |
+
plt.ylabel("Features")
|
29 |
+
plt.show()
|
30 |
+
import pandas as pd
|
31 |
+
|
32 |
+
|
33 |
+
# Identify categorical columns
|
34 |
+
categorical_cols = churn.select_dtypes(include=['object']).columns.tolist()
|
35 |
+
|
36 |
+
# One-hot encode categorical columns
|
37 |
+
data_encoded = pd.get_dummies(churn, columns=categorical_cols, drop_first=True)
|
38 |
+
|
39 |
+
# Calculate correlations with the target variable (assuming the target column is named 'churn')
|
40 |
+
target = 'Churn'
|
41 |
+
correlations = data_encoded.corr()[target].drop(target) # Drop self-correlation of target
|
42 |
+
|
43 |
+
# Filter features with correlation > 0.1 or < -0.05
|
44 |
+
filtered_features = correlations[(correlations > 0.1) | (correlations < -0.05)].index.tolist()
|
45 |
+
|
46 |
+
# Create a new DataFrame with only the filtered features and the target column
|
47 |
+
data_filtered = data_encoded[filtered_features + [target]]
|
48 |
+
|
49 |
+
# Save the filtered DataFrame to a CSV file
|
50 |
+
data_filtered.to_csv("filtered_features_churn.csv", index=False)
|
51 |
+
|
52 |
+
print("CSV file 'filtered_features_churn.csv' created successfully with filtered features.")
|
53 |
+
data=pd.read_csv(r"filtered_features_churn.csv")
|
54 |
+
data = data.drop(columns=["Churn"])
|
55 |
+
import gradio as gr
|
56 |
+
import numpy as np
|
57 |
+
from sklearn.ensemble import RandomForestClassifier
|
58 |
+
from sklearn.model_selection import train_test_split
|
59 |
+
|
60 |
+
X = data.drop(columns=["Churn"])
|
61 |
+
y =data["Churn"]
|
62 |
+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
63 |
+
model = RandomForestClassifier(random_state=42)
|
64 |
+
model.fit(X_train, y_train)
|
65 |
+
import gradio as gr
|
66 |
+
import numpy as np
|
67 |
+
from sklearn.preprocessing import StandardScaler
|
68 |
+
from sklearn.ensemble import RandomForestClassifier
|
69 |
+
|
70 |
+
|
71 |
+
# Initialize and fit the scaler
|
72 |
+
scaler = StandardScaler()
|
73 |
+
scaler.fit(X_train) # Fit the scaler to your training data
|
74 |
+
|
75 |
+
# Initialize and fit your model
|
76 |
+
model = RandomForestClassifier()
|
77 |
+
model.fit(X_train, y_train) # Fit the model to your training data
|
78 |
+
|
79 |
+
# Define a prediction function
|
80 |
+
def predict_churn(*features):
|
81 |
+
input_data = np.array(features).reshape(1, -1)
|
82 |
+
input_data_scaled = scaler.transform(input_data) # Use the fitted scaler here
|
83 |
+
|
84 |
+
# Predict churn probability
|
85 |
+
prediction = model.predict(input_data_scaled)
|
86 |
+
return "Churn" if prediction[0] == 1 else "Not Churn"
|
87 |
+
|
88 |
+
# Example feature labels (adjust based on your dataset)
|
89 |
+
feature_labels = ['Number vmail messages', 'Total day minutes', 'Total day charge',
|
90 |
+
'Total intl calls', 'Customer service calls', 'International plan_Yes',
|
91 |
+
'Voice mail plan_Yes'] # Replace with your actual feature names
|
92 |
+
|
93 |
+
# Set up Gradio interface
|
94 |
+
interface = gr.Interface(
|
95 |
+
fn=predict_churn,
|
96 |
+
inputs=[gr.Number(label=label) for label in feature_labels],
|
97 |
+
outputs="text",
|
98 |
+
title="Customer Churn Prediction",
|
99 |
+
description="Enter customer information to predict churn .",
|
100 |
+
theme="soft",
|
101 |
+
flagging_options=["average prediction", "good prediction", "bad prediction"]
|
102 |
+
|
103 |
+
)
|
104 |
+
|
105 |
+
# Launch the interface
|
106 |
+
interface.launch(share=True)
|