import streamlit as st import pandas as pd import numpy as np from sklearn.ensemble import RandomForestRegressor from sklearn.preprocessing import StandardScaler, OneHotEncoder from sklearn.compose import ColumnTransformer from sklearn.pipeline import Pipeline from sklearn.model_selection import train_test_split # Load the dataset url = "https://raw.githubusercontent.com/manishkr1754/CarDekho_Used_Car_Price_Prediction/main/notebooks/data/cardekho_dataset.csv" df = pd.read_csv(url) # Feature engineering num_features = ['vehicle_age', 'km_driven', 'mileage', 'engine', 'max_power', 'seats'] cat_features = ['brand', 'model', 'seller_type', 'fuel_type', 'transmission_type'] X = df[num_features + cat_features] y = df['selling_price'] # Encode categorical features X = pd.get_dummies(X, columns=cat_features, drop_first=True) # Train-test split X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # Train the Random Forest model model = RandomForestRegressor(n_estimators=100, random_state=42) model.fit(X_train, y_train) # Streamlit app st.title('Used Car Price Prediction') # Main form for user input st.header('Enter Car Details') # Input fields brand = st.selectbox('Brand', df['brand'].unique()) model_input = st.text_input('Model', '') vehicle_age = st.number_input('Vehicle Age (in years)', min_value=0, max_value=50, value=5) km_driven = st.number_input('Kilometers Driven', min_value=0, max_value=300000, value=50000) mileage = st.number_input('Mileage (kmpl)', min_value=0.0, max_value=50.0, value=15.0) engine = st.number_input('Engine (cc)', min_value=500, max_value=5000, value=1500) max_power = st.number_input('Max Power (bhp)', min_value=0, max_value=500, value=100) seats = st.number_input('Seats', min_value=2, max_value=8, value=5) seller_type = st.selectbox('Seller Type', df['seller_type'].unique()) fuel_type = st.selectbox('Fuel Type', df['fuel_type'].unique()) transmission_type = st.selectbox('Transmission Type', df['transmission_type'].unique()) # Button to trigger the prediction if st.button('Predict Price'): # Create input dataframe input_data = pd.DataFrame({ 'vehicle_age': [vehicle_age], 'km_driven': [km_driven], 'mileage': [mileage], 'engine': [engine], 'max_power': [max_power], 'seats': [seats], 'brand': [brand], 'model': [model_input], 'seller_type': [seller_type], 'fuel_type': [fuel_type], 'transmission_type': [transmission_type] }) # Encode input data input_data = pd.get_dummies(input_data, columns=['brand', 'model', 'seller_type', 'fuel_type', 'transmission_type'], drop_first=True) # Align the input data with the model features missing_cols = set(X.columns) - set(input_data.columns) for c in missing_cols: input_data[c] = 0 input_data = input_data[X.columns] # Predict the price predicted_price = model.predict(input_data) # Display the result st.write(f'The predicted selling price for the car is: ₹ {predicted_price[0]:,.2f}')