|
import pandas as pd |
|
import seaborn as sns |
|
import matplotlib.pyplot as plt |
|
import matplotlib.ticker as mticker |
|
import warnings |
|
warnings.filterwarnings('ignore') |
|
|
|
usa_house = pd.read_csv('/content/USA Housing Dataset.csv') |
|
print(usa_house.info()) |
|
|
|
print(usa_house.isnull().sum()) |
|
|
|
print(usa_house.describe()) |
|
|
|
plt.figure(figsize=(10, 6)) |
|
sns.histplot(usa_house['price'], bins=30, kde=True, color='blue') |
|
plt.title('Price Distribution') |
|
plt.xlabel('Price') |
|
plt.ylabel('Frequency') |
|
plt.gca().xaxis.set_major_formatter(mticker.FuncFormatter(lambda x, _: f'{int(x):,}')) |
|
plt.show() |
|
|
|
plt.figure(figsize=(8, 6)) |
|
sns.countplot(x='bedrooms', data=usa_house, palette='pastel') |
|
plt.title('Bedrooms Distribution') |
|
plt.xlabel('Bedrooms') |
|
plt.ylabel('Count') |
|
plt.show() |
|
|
|
plt.figure(figsize=(8, 6)) |
|
sns.countplot(x='bathrooms', data=usa_house, palette='pastel') |
|
plt.title('Bathrooms Distribution') |
|
plt.xlabel('Bathrooms') |
|
plt.ylabel('Count') |
|
plt.show() |
|
|
|
plt.figure(figsize=(10, 6)) |
|
sns.histplot(usa_house['sqft_living'], bins=30, kde=True, color='green') |
|
plt.title('Living Area (sqft) Distribution') |
|
plt.xlabel('Living Area (sqft)') |
|
plt.ylabel('Frequency') |
|
plt.show() |
|
|
|
plt.figure(figsize=(10, 6)) |
|
sns.histplot(usa_house['sqft_lot'], bins=30, kde=True, color='orange') |
|
plt.title('Lot Area (sqft) Distribution') |
|
plt.xlabel('Lot Area (sqft)') |
|
plt.ylabel('Frequency') |
|
plt.show() |
|
|
|
plt.figure(figsize=(8, 6)) |
|
sns.countplot(x='floors', data=usa_house, palette='pastel') |
|
plt.title('Floor Distribution') |
|
plt.xlabel('Floors') |
|
plt.ylabel('Count') |
|
plt.show() |
|
|
|
plt.figure(figsize=(8, 6)) |
|
sns.countplot(x='waterfront', data=usa_house, palette='pastel') |
|
plt.title('Waterfront Distribution') |
|
plt.xlabel('Waterfront') |
|
plt.ylabel('Count') |
|
plt.show() |
|
|
|
plt.figure(figsize=(8, 6)) |
|
sns.countplot(x='condition', data=usa_house, palette='pastel') |
|
plt.title('Condition Distribution') |
|
plt.xlabel('Condition') |
|
plt.ylabel('Count') |
|
plt.show() |
|
|
|
plt.figure(figsize=(10, 6)) |
|
sns.scatterplot(x='sqft_living', y='price', data=usa_house, color='purple') |
|
plt.title('Living Area vs. Price') |
|
plt.xlabel('Living Area (sqft)') |
|
plt.ylabel('Price') |
|
plt.show() |
|
|
|
plt.figure(figsize=(10, 6)) |
|
sns.scatterplot(x='sqft_lot', y='price', data=usa_house, color='red') |
|
plt.title('Lot Area vs Price') |
|
plt.xlabel('Lot Area (sqft)') |
|
plt.ylabel('Price') |
|
plt.show() |
|
|
|
plt.figure(figsize=(10, 6)) |
|
sns.scatterplot(x='yr_built', y='price', data=usa_house, color='blue') |
|
plt.title('Year Built vs Price') |
|
plt.xlabel('Year Built') |
|
plt.ylabel('Price') |
|
plt.show() |
|
|
|
plt.figure(figsize=(10, 6)) |
|
sns.scatterplot(x='yr_renovated', y='price', data=usa_house, color='green') |
|
plt.title('Year Renovated vs Price') |
|
plt.xlabel('Year Renovated') |
|
plt.ylabel('Price') |
|
plt.show() |