import streamlit as st import pandas as pd import numpy as np from sklearn.neighbors import KNeighborsRegressor from geopy.distance import geodesic import googlemaps from geopy.exc import GeocoderTimedOut from streamlit_folium import st_folium import folium from branca.colormap import LinearColormap # Function to add heatmap layer to folium map def add_heatmap_layer(map_obj, filtered_data, column_name, colormap_name, radius=15): heat_data = filtered_data[['latitude', 'longitude', column_name]].dropna() heat_layer = folium.FeatureGroup(name=f'Heatmap - {column_name}') cmap = LinearColormap(colors=['blue', 'white', 'red'], vmin=heat_data[column_name].min(), vmax=heat_data[column_name].max()) for index, row in heat_data.iterrows(): folium.CircleMarker( location=[row['latitude'], row['longitude']], radius=radius, fill=True, fill_color=cmap(row[column_name]), fill_opacity=0.7, color='black', weight=0.5, ).add_to(heat_layer) heat_layer.add_to(map_obj) # Function to calculate distance in meters between two coordinates def calculate_distance(lat1, lon1, lat2, lon2): coords_1 = (lat1, lon1) coords_2 = (lat2, lon2) return geodesic(coords_1, coords_2).meters def knn_predict(df, target_column, features_columns, k=5): # Separate features and target variable X = df[features_columns] y = df[target_column] # Check if there is enough data for prediction if len(X) < k: return np.zeros(len(X)) # Return an array of zeros if there isn't enough data # Create KNN regressor knn = KNeighborsRegressor(n_neighbors=k) # Fit the model knn.fit(X, y) # Use the model to predict target_column for the filtered_data predictions = knn.predict(df[features_columns]) return predictions # Set wide mode st.set_page_config(layout="wide") # Set dark theme st.markdown( """ """, unsafe_allow_html=True ) # Create a DataFrame with sample data data = pd.read_excel('data_nexus.xlsx') # Initialize variables to avoid NameError radius_visible = True custom_address_initial = 'Centro, Lajeado - RS, Brazil' # Initial custom address #custom_lat = data['latitude'].median() custom_lat = -29.7168 #custom_lon = data['longitude'].median() custom_lon = -52.4943 radius_in_meters = 150000 filtered_data = data # Initialize with the entire dataset # Calculate a zoom level based on the maximum distance zoom_level = 14 # Set font to 'Quicksand' for title_html title_html = """ aval ia .NEXUS """ # Set font to 'Quicksand' for factor_html factor_html = """ aval ia .FACTOR """ # Set font to 'Quicksand' for evo_html evo_html = """ aval ia .EVO """ # Create a sidebar for controls with st.sidebar: st.markdown(title_html, unsafe_allow_html=True) # Add a dropdown for filtering "Fonte" selected_fonte = st.selectbox('Finalidade', data['Fonte'].unique()) data = data[data['Fonte'] == selected_fonte] # Add a dropdown for filtering "Tipo" selected_tipo = st.selectbox('Tipo de imóvel', data['Tipo'].unique()) data_tipo = data[data['Tipo'] == selected_tipo] custom_address = st.text_input('Informe o endereço', custom_address_initial) radius_visible = True # Show radius slider for custom coordinates gmaps = googlemaps.Client(key='AIzaSyDoJ6C7NE2CHqFcaHTnhreOfgJeTk4uSH0') # Replace with your API key try: # Ensure custom_address ends with " - RS, Brazil" custom_address = custom_address.strip() # Remove leading/trailing whitespaces if not custom_address.endswith(" - RS, Brazil"): custom_address += " - RS, Brazil" location = gmaps.geocode(custom_address)[0]['geometry']['location'] custom_lat, custom_lon = location['lat'], location['lng'] except (IndexError, GeocoderTimedOut): st.error("Erro: Não foi possível geocodificar o endereço fornecido. Por favor, verifique e tente novamente.") # Slider for setting the zoom level zoom_level = st.slider('Nível de zoom', min_value=1, max_value=15, value=zoom_level) # Conditionally render the radius slider if radius_visible: radius_in_meters = st.number_input('Selecione raio (em metros)', min_value=0, max_value=100000, value=2000) # Initialize sliders variables dorm_range = (int(data_tipo['Dorm'].min()), int(data_tipo['Dorm'].max())) banho_range = (int(data_tipo['Banh'].min()), int(data_tipo['Banh'].max())) vaga_range = (int(data_tipo['Vaga'].min()), int(data_tipo['Vaga'].max())) test_range = (int(data_tipo['Test'].min()), int(data_tipo['Test'].max())) # Add sliders to filter data based atotal_range = st.slider('Área Total', float(data_tipo['Atotal'].min()), float(data_tipo['Atotal'].max()), (float(data_tipo['Atotal'].min()), float(data_tipo['Atotal'].max())), step=.1 if data_tipo['Atotal'].min() != data_tipo['Atotal'].max() else 0.1) apriv_range = st.slider('Área Privativa', float(data_tipo['Apriv'].min()), float(data_tipo['Apriv'].max()), (float(data_tipo['Apriv'].min()), float(data_tipo['Apriv'].max())), step=.1 if data_tipo['Apriv'].min() != data_tipo['Apriv'].max() else 0.1) if int(data_tipo['Dorm'].min()) != 0 and int(data_tipo['Dorm'].max()) != 0: dorm_range = st.slider('Dormitórios', int(data_tipo['Dorm'].min()), int(data_tipo['Dorm'].max()), (int(data_tipo['Dorm'].min()), int(data_tipo['Dorm'].max())), step=1 if data_tipo['Dorm'].min() != data_tipo['Dorm'].max() else 1) if int(data_tipo['Banh'].min()) != 0 and int(data_tipo['Banh'].max()) != 0: banho_range = st.slider('Banheiros', int(data_tipo['Banh'].min()), int(data_tipo['Banh'].max()), (int(data_tipo['Banh'].min()), int(data_tipo['Banh'].max())), step=1 if data_tipo['Banh'].min() != data_tipo['Banh'].max() else 1) if int(data_tipo['Vaga'].min()) != 0 and int(data_tipo['Vaga'].max()) != 0: vaga_range = st.slider('Vaga de estacionamento', int(data_tipo['Vaga'].min()), int(data_tipo['Vaga'].max()), (int(data_tipo['Vaga'].min()), int(data_tipo['Vaga'].max())), step=1 if data_tipo['Vaga'].min() != data_tipo['Vaga'].max() else 1) if int(data_tipo['Test'].min()) != 0 and int(data_tipo['Test'].max()) != 0: test_range = st.slider('Testada', int(data_tipo['Test'].min()), int(data_tipo['Test'].max()), (int(data_tipo['Test'].min()), int(data_tipo['Test'].max())), step=1 if data_tipo['Test'].min() != data_tipo['Test'].max() else 1) # Initialize checkbox variables elev_checkbox = False esq_checkbox = False # Add checkboxes for dummy features if int(data_tipo['Elevador'].min()) != 0 and int(data_tipo['Elevador'].max()) != 0: elev_checkbox = st.checkbox('Elevador') if int(data_tipo['Lot_pos'].min()) != 0 and int(data_tipo['Lot_pos'].max()) != 0: esq_checkbox = st.checkbox('Duas ou mais frentes') # Transform checkbox values into 1s and 0s elev_value = 1 if elev_checkbox else 0 esq_value = 1 if esq_checkbox else 0 data_tipo = data_tipo[(data_tipo['Atotal'].between(atotal_range[0], atotal_range[1])) & (data_tipo['Apriv'].between(apriv_range[0], apriv_range[1])) & (data_tipo['Dorm'].between(dorm_range[0], dorm_range[1])) & (data_tipo['Banh'].between(banho_range[0], banho_range[1])) & (data_tipo['Vaga'].between(vaga_range[0], vaga_range[1])) & (data_tipo['Test'].between(test_range[0], test_range[1])) & (data_tipo['Elevador'] == elev_value) & (data_tipo['Lot_pos'] == esq_value)] # Links to other apps at the bottom of the sidebar st.sidebar.markdown(factor_html, unsafe_allow_html=True) st.sidebar.markdown(evo_html, unsafe_allow_html=True) filtered_data = data_tipo[data_tipo.apply(lambda x: calculate_distance(x['latitude'], x['longitude'], custom_lat, custom_lon), axis=1) <= radius_in_meters] filtered_data = filtered_data.dropna() # Drop rows with NaN values # Add a custom CSS class to the map container st.markdown(f"""""", unsafe_allow_html=True) # Determine which area feature to use for prediction filtered_data['area_feature'] = np.where(filtered_data['Apriv'] != 0, filtered_data['Apriv'], filtered_data['Atotal']) # Define the target column based on conditions filtered_data['target_column'] = np.where(filtered_data['Vunit_priv'] != 0, filtered_data['Vunit_priv'], filtered_data['Vunit_total']) # Apply KNN and get predicted target values predicted_target = knn_predict(filtered_data, 'target_column', ['latitude', 'longitude', 'area_feature']) # Update with your features # Add predicted target values to filtered_data filtered_data['Predicted_target'] = predicted_target # Display the map and filtered_data with st.container(): st.map(filtered_data, zoom=zoom_level, use_container_width=True) st.write("Dados:", filtered_data) # Debug: Print filtered_data folium_layermap = folium.Map(location=[custom_lat, custom_lon], zoom_start=zoom_level, control_scale=True) # Add heatmap layers for 'Valor_Urb', 'Valor_Eqp', and 'RENDA' add_heatmap_layer(folium_layermap, filtered_data, 'Valor_Urb', 'RdBu_r') add_heatmap_layer(folium_layermap, filtered_data, 'Valor_Eqp', 'RdBu_r') add_heatmap_layer(folium_layermap, filtered_data, 'RENDA', 'RdBu_r') # Add layer control folium.LayerControl().add_to(folium_layermap) # Display the map using st_folium st_folium(folium_layermap, width="100%", height="100%") k_threshold = 5 # Function to perform bootstrap on the predicted target values def bootstrap_stats(bound_data, num_samples=1000): # Reshape the predicted_target array bound_data = np.array(bound_data).reshape(-1, 1) # Bootstrap resampling bootstrapped_means = [] for _ in range(num_samples): bootstrap_sample = np.random.choice(bound_data.flatten(), len(bound_data), replace=True) bootstrapped_means.append(np.mean(bootstrap_sample)) # Calculate lower and higher bounds lower_bound = np.percentile(bootstrapped_means, 25.) higher_bound = np.percentile(bootstrapped_means, 75.) return lower_bound, higher_bound # Apply KNN and get predicted Predicted_target values predicted_target = knn_predict(filtered_data, 'Predicted_target', ['latitude', 'longitude', 'area_feature']) # Check if there are predictions to display if 'Predicted_target' in filtered_data.columns and not np.all(predicted_target == 0): # Apply bootstrap - bounds lower_bound, higher_bound = bootstrap_stats(filtered_data['target_column']) mean_value = np.mean(filtered_data['Predicted_target']) # Display the results with custom styling st.markdown("## **Resultado da Análise Estatística**") st.write(f"Valor médio (Reais/m²) para as características selecionadas: ${mean_value:.2f}$ Reais") st.write(f"Os valores podem variar entre ${lower_bound:.2f}$ e ${higher_bound:.2f}$ Reais, dependendo das características dos imóveis.") else: st.warning(f"**Dados insuficientes para inferência do valor. Mínimo necessário:** {k_threshold}")