import streamlit as st
import pandas as pd
import numpy as np
from sklearn.neighbors import KNeighborsRegressor
from geopy.distance import geodesic
import googlemaps
from geopy.exc import GeocoderTimedOut
# Function to calculate distance in meters between two coordinates
def calculate_distance(lat1, lon1, lat2, lon2):
coords_1 = (lat1, lon1)
coords_2 = (lat2, lon2)
return geodesic(coords_1, coords_2).meters
def knn_predict(df, target_column, features_columns, k=5):
# Separate features and target variable
X = df[features_columns]
y = df[target_column]
# Check if there is enough data for prediction
if len(X) < k:
return np.zeros(len(X)) # Return an array of zeros if there isn't enough data
# Create KNN regressor
knn = KNeighborsRegressor(n_neighbors=k)
# Fit the model
knn.fit(X, y)
# Use the model to predict target_column for the filtered_data
predictions = knn.predict(df[features_columns])
return predictions
# Set wide mode
st.set_page_config(layout="wide")
# Set dark theme
st.markdown(
"""
""",
unsafe_allow_html=True
)
# Create a DataFrame with sample data
data = pd.read_excel('avaliase_crawler_pedo_nov_23.xlsx')
# Initialize variables to avoid NameError
radius_visible = True
custom_address_initial = 'Centro, Lajeado - RS, Brazil' # Initial custom address
custom_lat = data['latitude'].median()
custom_lon = data['longitude'].median()
radius_in_meters = 1500
filtered_data = data # Initialize with the entire dataset
# Calculate a zoom level based on the maximum distance
zoom_level = 14
# Set font to 'Quicksand' for title_html
title_html = """
aval
ia
.NEXUS
"""
# Set font to 'Quicksand' for factor_html
factor_html = """
aval
ia
.FACTOR
"""
# Set font to 'Quicksand' for evo_html
evo_html = """
aval
ia
.EVO
"""
# Create a sidebar for controls
with st.sidebar:
st.sidebar.markdown(title_html, unsafe_allow_html=True)
# Add a dropdown for filtering "Fonte"
selected_fonte = st.selectbox(data['Fonte'].unique())
data = data[data['Fonte'] == selected_fonte]
# Add a dropdown for filtering "Tipo"
selected_tipo = st.selectbox('Tipo de imóvel', data['Tipo'].unique())
data_tipo = data[data['Tipo'] == selected_tipo]
custom_address = st.text_input('Informe o endereço', custom_address_initial)
radius_visible = True # Show radius slider for custom coordinates
# Geocode the custom address using the Google Maps API
gmaps = googlemaps.Client(key='AIzaSyDoJ6C7NE2CHqFcaHTnhreOfgJeTk4uSH0') # Replace with your API key
try:
location = gmaps.geocode(custom_address)[0]['geometry']['location']
custom_lat, custom_lon = location['lat'], location['lng']
except (IndexError, GeocoderTimedOut):
st.error("Erro: Não foi possível geocodificar o endereço fornecido. Por favor, verifique e tente novamente.")
# Slider for setting the zoom level
zoom_level = st.slider('Nível de zoom', min_value=1, max_value=15, value=zoom_level)
# Conditionally render the radius slider
if radius_visible:
radius_in_meters = st.slider('Selecione raio (em metros)', min_value=100, max_value=5000, value=1000)
# Initialize sliders variables
dorm_range = (int(data_tipo['Dorm'].min()), int(data_tipo['Dorm'].max()))
banho_range = (int(data_tipo['Banheiro'].min()), int(data_tipo['Banheiro'].max()))
vaga_range = (int(data_tipo['Vaga'].min()), int(data_tipo['Vaga'].max()))
# Add sliders to filter data based
atotal_range = st.slider('Área Total', float(data_tipo['Atotal'].min()), float(data_tipo['Atotal'].max()), (float(data_tipo['Atotal'].min()), float(data_tipo['Atotal'].max())), step=.1 if data_tipo['Atotal'].min() != data_tipo['Atotal'].max() else 0.1)
apriv_range = st.slider('Área Privativa', float(data_tipo['Apriv'].min()), float(data_tipo['Apriv'].max()), (float(data_tipo['Apriv'].min()), float(data_tipo['Apriv'].max())), step=.1 if data_tipo['Apriv'].min() != data_tipo['Apriv'].max() else 0.1)
if int(data_tipo['Dorm'].min()) != 0 and int(data_tipo['Dorm'].max()) != 0:
dorm_range = st.slider('Dormitórios', int(data_tipo['Dorm'].min()), int(data_tipo['Dorm'].max()), (int(data_tipo['Dorm'].min()), int(data_tipo['Dorm'].max())), step=1 if data_tipo['Dorm'].min() != data_tipo['Dorm'].max() else 1)
if int(data_tipo['Banheiro'].min()) != 0 and int(data_tipo['Banheiro'].max()) != 0:
banho_range = st.slider('Banheiros', int(data_tipo['Banheiro'].min()), int(data_tipo['Banheiro'].max()), (int(data_tipo['Banheiro'].min()), int(data_tipo['Banheiro'].max())), step=1 if data_tipo['Banheiro'].min() != data_tipo['Banheiro'].max() else 1)
if int(data_tipo['Vaga'].min()) != 0 and int(data_tipo['Vaga'].max()) != 0:
vaga_range = st.slider('Vaga de estacionamento', int(data_tipo['Vaga'].min()), int(data_tipo['Vaga'].max()), (int(data_tipo['Vaga'].min()), int(data_tipo['Vaga'].max())), step=1 if data_tipo['Vaga'].min() != data_tipo['Vaga'].max() else 1)
# Initialize checkbox variables
elev_checkbox = False
churr_checkbox = False
esq_checkbox = False
# Add checkboxes for dummy features
if int(data_tipo['Elevador'].min()) != 0 and int(data_tipo['Elevador'].max()) != 0:
elev_checkbox = st.checkbox('Elevador')
if int(data_tipo['Churrasq'].min()) != 0 and int(data_tipo['Churrasq'].max()) != 0:
churr_checkbox = st.checkbox('Churrasqueira')
if int(data_tipo['Lot_pos'].min()) != 0 and int(data_tipo['Lot_pos'].max()) != 0:
esq_checkbox = st.checkbox('Duas ou mais frentes')
# Transform checkbox values into 1s and 0s
elev_value = 1 if elev_checkbox else 0
churr_value = 1 if churr_checkbox else 0
esq_value = 1 if esq_checkbox else 0
data_tipo = data_tipo[(data_tipo['Atotal'].between(atotal_range[0], atotal_range[1])) &
(data_tipo['Apriv'].between(apriv_range[0], apriv_range[1])) &
(data_tipo['Dorm'].between(dorm_range[0], dorm_range[1])) &
(data_tipo['Banheiro'].between(banho_range[0], banho_range[1])) &
(data_tipo['Vaga'].between(vaga_range[0], vaga_range[1])) &
(data_tipo['Elevador'] == elev_value) &
(data_tipo['Churrasq'] == churr_value) &
(data_tipo['Lot_pos'] == esq_value)]
# Links to other apps at the bottom of the sidebar
st.sidebar.markdown(factor_html, unsafe_allow_html=True)
st.sidebar.markdown(evo_html, unsafe_allow_html=True)
filtered_data = data_tipo[data_tipo.apply(lambda x: calculate_distance(x['latitude'], x['longitude'], custom_lat, custom_lon), axis=1) <= radius_in_meters]
filtered_data = filtered_data.dropna() # Drop rows with NaN values
# Add a custom CSS class to the map container
st.markdown(f"""""", unsafe_allow_html=True)
# Determine which area feature to use for prediction
filtered_data['area_feature'] = np.where(filtered_data['Apriv'] != 0, filtered_data['Apriv'], filtered_data['Atotal'])
# Define the target column based on conditions
filtered_data['target_column'] = np.where(filtered_data['Vunit_priv'] != 0, filtered_data['Vunit_priv'], filtered_data['Vunit_total'])
# Apply KNN and get predicted target values
predicted_target = knn_predict(filtered_data, 'target_column', ['latitude', 'longitude', 'area_feature']) # Update with your features
# Add predicted target values to filtered_data
filtered_data['Predicted_target'] = predicted_target
# Display the map and filtered_data
with st.container():
st.map(filtered_data, zoom=zoom_level, use_container_width=True)
st.write("Dados:", filtered_data) # Debug: Print filtered_data
k_threshold = 5
# Function to perform bootstrap on the predicted target values
def bootstrap_stats(predicted_target, num_samples=1000):
# Reshape the predicted_target array
predicted_target = np.array(predicted_target).reshape(-1, 1)
# Bootstrap resampling
bootstrapped_means = []
for _ in range(num_samples):
bootstrap_sample = np.random.choice(predicted_target.flatten(), len(predicted_target), replace=True)
bootstrapped_means.append(np.mean(bootstrap_sample))
# Calculate lower and higher bounds
lower_bound = np.percentile(bootstrapped_means, 15.)
higher_bound = np.percentile(bootstrapped_means, 85.)
# Calculate the mean value
mean_value = np.mean(bootstrapped_means)
return lower_bound, higher_bound, mean_value
# Apply KNN and get predicted Predicted_target values
predicted_target = knn_predict(filtered_data, 'Predicted_target', ['latitude', 'longitude', 'area_feature'])
# Check if there are predictions to display
if 'Predicted_target' in filtered_data.columns and not np.all(predicted_target == 0):
# Add predicted Predicted_target values to filtered_data
filtered_data['Predicted_target'] = predicted_target
# Apply bootstrap on the predicted values
lower_bound, higher_bound, mean_value = bootstrap_stats(predicted_target)
# Display the results with custom styling
st.markdown("## **Resultado da Análise Estatística**")
st.write(f"Valor médio (Reais/m²) para as características selecionadas: ${mean_value:.2f}$ Reais")
st.write(f"Os valores podem variar entre ${lower_bound:.2f}$ e ${higher_bound:.2f}$ Reais, dependendo das características dos imóveis.")
else:
st.warning(f"**Dados insuficientes para inferência do valor. Mínimo necessário:** {k_threshold}")