Spaces:

deepumanju
/

sugar_pri

Runtime error

App Files Files Community

sugar_pri / app.py

deepumanju

Create app.py

d126006 verified about 2 months ago

raw

history blame contribute delete

3.86 kB

	import pandas as pd
	import matplotlib.pyplot as plt
	%matplotlib inline
	churn=pd.read_csv(r"C:\Users\deepu\Downloads\archive (9)\churn-bigml-80.csv")
	churn
	import pandas as pd
	import seaborn as sns
	import matplotlib.pyplot as plt


	categorical_cols = churn.select_dtypes(include=['object']).columns.tolist()

	# One-hot encode categorical columns
	data_encoded = pd.get_dummies(churn, columns=categorical_cols, drop_first=True)

	# Calculate correlations with the target variable (assuming the target column is named 'churn')
	target = 'Churn'
	correlations = data_encoded.corr()[target].drop(target) # Drop target's self-correlation

	# Sort correlations by absolute value (strongest to weakest correlation)
	correlations = correlations.sort_values(key=abs, ascending=False)

	# Plot the correlations as a bar chart
	plt.figure(figsize=(10, 8))
	sns.barplot(x=correlations.values, y=correlations.index, palette="coolwarm")
	plt.title("Feature Correlations with Churn")
	plt.xlabel("Correlation Coefficient")
	plt.ylabel("Features")
	plt.show()
	import pandas as pd


	# Identify categorical columns
	categorical_cols = churn.select_dtypes(include=['object']).columns.tolist()

	# One-hot encode categorical columns
	data_encoded = pd.get_dummies(churn, columns=categorical_cols, drop_first=True)

	# Calculate correlations with the target variable (assuming the target column is named 'churn')
	target = 'Churn'
	correlations = data_encoded.corr()[target].drop(target) # Drop self-correlation of target

	# Filter features with correlation > 0.1 or < -0.05
	filtered_features = correlations[(correlations > 0.1) \| (correlations < -0.05)].index.tolist()

	# Create a new DataFrame with only the filtered features and the target column
	data_filtered = data_encoded[filtered_features + [target]]

	# Save the filtered DataFrame to a CSV file
	data_filtered.to_csv("filtered_features_churn.csv", index=False)

	print("CSV file 'filtered_features_churn.csv' created successfully with filtered features.")
	data=pd.read_csv(r"filtered_features_churn.csv")
	data = data.drop(columns=["Churn"])
	import gradio as gr
	import numpy as np
	from sklearn.ensemble import RandomForestClassifier
	from sklearn.model_selection import train_test_split

	X = data.drop(columns=["Churn"])
	y =data["Churn"]
	X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
	model = RandomForestClassifier(random_state=42)
	model.fit(X_train, y_train)
	import gradio as gr
	import numpy as np
	from sklearn.preprocessing import StandardScaler
	from sklearn.ensemble import RandomForestClassifier


	# Initialize and fit the scaler
	scaler = StandardScaler()
	scaler.fit(X_train) # Fit the scaler to your training data

	# Initialize and fit your model
	model = RandomForestClassifier()
	model.fit(X_train, y_train) # Fit the model to your training data

	# Define a prediction function
	def predict_churn(*features):
	input_data = np.array(features).reshape(1, -1)
	input_data_scaled = scaler.transform(input_data) # Use the fitted scaler here

	# Predict churn probability
	prediction = model.predict(input_data_scaled)
	return "Churn" if prediction[0] == 1 else "Not Churn"

	# Example feature labels (adjust based on your dataset)
	feature_labels = ['Number vmail messages', 'Total day minutes', 'Total day charge',
	'Total intl calls', 'Customer service calls', 'International plan_Yes',
	'Voice mail plan_Yes'] # Replace with your actual feature names

	# Set up Gradio interface
	interface = gr.Interface(
	fn=predict_churn,
	inputs=[gr.Number(label=label) for label in feature_labels],
	outputs="text",
	title="Customer Churn Prediction",
	description="Enter customer information to predict churn .",
	theme="soft",
	flagging_options=["average prediction", "good prediction", "bad prediction"]

	)

	# Launch the interface
	interface.launch(share=True)