import pandas as pd import matplotlib.pyplot as plt %matplotlib inline churn=pd.read_csv(r"C:\Users\deepu\Downloads\archive (9)\churn-bigml-80.csv") churn import pandas as pd import seaborn as sns import matplotlib.pyplot as plt categorical_cols = churn.select_dtypes(include=['object']).columns.tolist() # One-hot encode categorical columns data_encoded = pd.get_dummies(churn, columns=categorical_cols, drop_first=True) # Calculate correlations with the target variable (assuming the target column is named 'churn') target = 'Churn' correlations = data_encoded.corr()[target].drop(target) # Drop target's self-correlation # Sort correlations by absolute value (strongest to weakest correlation) correlations = correlations.sort_values(key=abs, ascending=False) # Plot the correlations as a bar chart plt.figure(figsize=(10, 8)) sns.barplot(x=correlations.values, y=correlations.index, palette="coolwarm") plt.title("Feature Correlations with Churn") plt.xlabel("Correlation Coefficient") plt.ylabel("Features") plt.show() import pandas as pd # Identify categorical columns categorical_cols = churn.select_dtypes(include=['object']).columns.tolist() # One-hot encode categorical columns data_encoded = pd.get_dummies(churn, columns=categorical_cols, drop_first=True) # Calculate correlations with the target variable (assuming the target column is named 'churn') target = 'Churn' correlations = data_encoded.corr()[target].drop(target) # Drop self-correlation of target # Filter features with correlation > 0.1 or < -0.05 filtered_features = correlations[(correlations > 0.1) | (correlations < -0.05)].index.tolist() # Create a new DataFrame with only the filtered features and the target column data_filtered = data_encoded[filtered_features + [target]] # Save the filtered DataFrame to a CSV file data_filtered.to_csv("filtered_features_churn.csv", index=False) print("CSV file 'filtered_features_churn.csv' created successfully with filtered features.") data=pd.read_csv(r"filtered_features_churn.csv") data = data.drop(columns=["Churn"]) import gradio as gr import numpy as np from sklearn.ensemble import RandomForestClassifier from sklearn.model_selection import train_test_split X = data.drop(columns=["Churn"]) y =data["Churn"] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) model = RandomForestClassifier(random_state=42) model.fit(X_train, y_train) import gradio as gr import numpy as np from sklearn.preprocessing import StandardScaler from sklearn.ensemble import RandomForestClassifier # Initialize and fit the scaler scaler = StandardScaler() scaler.fit(X_train) # Fit the scaler to your training data # Initialize and fit your model model = RandomForestClassifier() model.fit(X_train, y_train) # Fit the model to your training data # Define a prediction function def predict_churn(*features): input_data = np.array(features).reshape(1, -1) input_data_scaled = scaler.transform(input_data) # Use the fitted scaler here # Predict churn probability prediction = model.predict(input_data_scaled) return "Churn" if prediction[0] == 1 else "Not Churn" # Example feature labels (adjust based on your dataset) feature_labels = ['Number vmail messages', 'Total day minutes', 'Total day charge', 'Total intl calls', 'Customer service calls', 'International plan_Yes', 'Voice mail plan_Yes'] # Replace with your actual feature names # Set up Gradio interface interface = gr.Interface( fn=predict_churn, inputs=[gr.Number(label=label) for label in feature_labels], outputs="text", title="Customer Churn Prediction", description="Enter customer information to predict churn .", theme="soft", flagging_options=["average prediction", "good prediction", "bad prediction"] ) # Launch the interface interface.launch(share=True)