File size: 3,485 Bytes
b5c06f5 49525d0 bf8aa5a b5c06f5 bf8aa5a d9b8500 5dada9c d9b8500 b5c06f5 d9b8500 ae905a3 d9b8500 b5c06f5 49525d0 b5c06f5 49525d0 b5c06f5 49525d0 b5c06f5 41c3b0f b5c06f5 010b150 b5c06f5 d9b8500 b5c06f5 d9b8500 263a565 9d62ef5 b5c06f5 bf8aa5a b5c06f5 bf8aa5a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 |
import gradio as gr
import pandas as pd
import plotly.graph_objects as go
from sklearn.linear_model import LinearRegression
import numpy as np
from pandas.tseries.offsets import MonthEnd
import datetime
def plot_and_predict(zip, start_year, start_month, prediction_months):
# Input validation for ZIP code
if not zip.isdigit() or len(zip) != 5:
return "Error: Please enter a valid 5-digit ZIP code."
# Input validation for start year
current_year = datetime.datetime.now().year
if not start_year.isdigit() or not (2000 <= int(start_year) <= current_year):
return f"Error: Please enter a valid year between 2000 and {current_year}."
# Convert start_month to integer and combine year and month into a start date
try:
start_month_int = int(start_month)
start_date = f"{start_year}-{start_month_int:02d}-01"
except ValueError:
return "Error: Invalid start month. Please enter a numeric month between 1 and 12."
start_date = pd.to_datetime(start_date)
# Read and process the real estate data from Zillow
df = pd.read_csv('https://files.zillowstatic.com/research/public_csvs/zhvi/Zip_zhvi_uc_sfrcondo_tier_0.33_0.67_sm_sa_month.csv')
df = df[df['RegionName'] == int(zip)]
df = df.loc[:, '2000-01-31':]
df = df.T.reset_index()
df.columns = ['Date', 'Price']
df['Date'] = pd.to_datetime(df['Date'])
# Filter data based on start date
df = df[df['Date'] >= start_date]
# Check for empty dataframe
if df.empty:
return "Error: No data found for the provided ZIP code or start year/month. Please check your inputs."
# Train linear regression model
df['MonthsSinceStart'] = np.arange(len(df))
X = df['MonthsSinceStart'].values.reshape(-1, 1)
y = df['Price'].values
model = LinearRegression()
model.fit(X, y)
# Predict future prices
last_month_index = df['MonthsSinceStart'].iloc[-1]
future_months = np.array([last_month_index + i for i in range(1, prediction_months + 1)]).reshape(-1, 1)
predicted_prices = model.predict(future_months)
# Prepare data for plotting
historical_prices_trace = go.Scatter(
x=df['Date'],
y=df['Price'],
mode="lines",
name="Historical Prices"
)
future_dates = [df['Date'].iloc[-1] + MonthEnd(i) for i in range(1, prediction_months + 1)]
predicted_prices_trace = go.Scatter(
x=future_dates,
y=predicted_prices,
mode="lines",
name="Predicted Prices"
)
# Plot data
fig = go.Figure()
fig.add_trace(historical_prices_trace)
fig.add_trace(predicted_prices_trace)
fig.update_layout(
title=f"Real Estate Price Prediction for Zip Code {zip}",
xaxis_title="Date",
yaxis_title="Price",
legend_title_text="Data"
)
return fig
# Gradio interface with updated inputs
interface = gr.Interface(
fn=plot_and_predict,
inputs=[
gr.Textbox(label="ZIP Code", placeholder="e.g., 90210"),
gr.Textbox(label="Start Year", placeholder="e.g., 2020"),
gr.Dropdown(label="Start Month", choices=[str(i) for i in range(1, 13)]),
gr.Slider(minimum=1, maximum=60, step=1, label="Prediction Months"),
],
outputs="plot",
title="Real Estate Price Predictor",
description="Enter a ZIP code, start year, start month, and the number of months for price prediction."
)
# Launch the app
interface.launch(debug=True) |