fschwartzer commited on
Commit
c608fd2
1 Parent(s): bb85b9a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -159
app.py CHANGED
@@ -14,6 +14,8 @@ import sys
14
  import pydeck as pdk
15
  from ydata_profiling import ProfileReport
16
  import streamlit.components.v1 as components
 
 
17
 
18
  # Set wide mode
19
  st.set_page_config(layout="wide")
@@ -250,169 +252,52 @@ with tab2:
250
 
251
  with tab3:
252
  st.markdown(f'<style>{css_style}</style>', unsafe_allow_html=True)
253
- k_threshold = 5
254
-
255
- # Function to perform bootstrap on the predicted target values
256
- def bootstrap_stats(bound_data, num_samples=1000):
257
- # Reshape the predicted_target array
258
- bound_data = np.array(bound_data).reshape(-1, 1)
259
-
260
- # Bootstrap resampling
261
- bootstrapped_means = []
262
- for _ in range(num_samples):
263
- bootstrap_sample = np.random.choice(bound_data.flatten(), len(bound_data), replace=True)
264
- bootstrapped_means.append(np.mean(bootstrap_sample))
265
-
266
- # Calculate lower and higher bounds
267
- lower_bound = np.percentile(bootstrapped_means, 16.)
268
- higher_bound = np.percentile(bootstrapped_means, 84.)
269
-
270
- return lower_bound, higher_bound
271
-
272
- # Apply KNN and get predicted Predicted_target values
273
- predicted_target = knn_predict(filtered_data, 'Predicted_target', ['latitude', 'longitude', 'area_feature'])
274
-
275
- # Check if there are predictions to display
276
  if 'Predicted_target' in filtered_data.columns and not np.all(predicted_target == 0):
277
-
278
- # Apply bootstrap - bounds
279
- lower_bound, higher_bound = bootstrap_stats(filtered_data['target_column'])
280
-
281
- mean_value = np.mean(filtered_data['Predicted_target'])
282
-
283
- # Display the results with custom styling
284
- st.markdown("## **Algoritmo KNN (K-nearest neighbors)**")
285
- st.write(f"Valor médio (Reais/m²) para as características selecionadas: ${mean_value:.2f}$ Reais")
286
- st.write(f"Os valores podem variar entre ${lower_bound:.2f}$ e ${higher_bound:.2f}$ Reais, dependendo das características dos imóveis.")
 
 
 
 
 
 
 
 
 
 
287
  else:
288
  st.warning(f"**Dados insuficientes para inferência do valor. Mínimo necessário:** {k_threshold}")
289
-
290
- # Generate the profile report
291
- with st.spinner('Carregando análise...'):
292
- profile = ProfileReport(filtered_data, title="Análise Exploratória dos Dados", explorative=True)
293
- print(profile.config.json(indent=4))
294
- profile.config.html.style.primary_colors = ['#FFD700', '#FFD700', '#FFD700'] # Define todas as cores primárias para amarelo ouro
295
- profile_html = profile.to_html()
296
 
297
- # Replace English text with Portuguese
298
- profile_html = profile_html.replace("Overview", "Visão geral")
299
- profile_html = profile_html.replace("Alerts", "Alertas")
300
- profile_html = profile_html.replace("Reproduction", "Reprodução")
301
- profile_html = profile_html.replace("Dataset statistics", "Estatísticas do conjunto de dados")
302
- profile_html = profile_html.replace("Variable types", "Tipos de variáveis")
303
- profile_html = profile_html.replace("Variables", "Variáveis")
304
- profile_html = profile_html.replace("Interactions", "Interações")
305
- profile_html = profile_html.replace("Correlations", "Correlações")
306
- profile_html = profile_html.replace("Missing values", "Valores faltantes")
307
- profile_html = profile_html.replace("Sample", "Amostra")
308
- profile_html = profile_html.replace("Number of variables", "Número de variáveis")
309
- profile_html = profile_html.replace("Number of observations", "Número de observações")
310
- profile_html = profile_html.replace("Missing cells", "Células faltantes")
311
- profile_html = profile_html.replace("Missing cells (%)", "Células faltantes (%)")
312
- profile_html = profile_html.replace("Duplicate rows", "Linhas duplicadas")
313
- profile_html = profile_html.replace("Duplicate rows (%)", "Linhas duplicadas (%)")
314
- profile_html = profile_html.replace("Total size in memory", "Tamanho total na memória")
315
- profile_html = profile_html.replace("Average record size in memory", "Tamanho médio do registro na memória")
316
- profile_html = profile_html.replace("Text", "Texto")
317
- profile_html = profile_html.replace("Numeric", "Numérico")
318
- profile_html = profile_html.replace("Categorical", "Categórico")
319
- profile_html = profile_html.replace("Distinct", "Distinto")
320
- profile_html = profile_html.replace("Distinct (%)", "Distinto (%)")
321
- profile_html = profile_html.replace("Missing", "Faltando")
322
- profile_html = profile_html.replace("Missing (%)", "Faltando (%)")
323
- profile_html = profile_html.replace("Memory size", "Tamanho da memória")
324
- profile_html = profile_html.replace("Real number", "Número real")
325
- profile_html = profile_html.replace("Infinite", "Infinito")
326
- profile_html = profile_html.replace("Infinite (%)", "Infinito (%)")
327
- profile_html = profile_html.replace("Mean", "Média")
328
- profile_html = profile_html.replace("Minimum", "Mínimo")
329
- profile_html = profile_html.replace("Maximum", "Máximo")
330
- profile_html = profile_html.replace("Zeros", "Zeros")
331
- profile_html = profile_html.replace("Zeros (%)", "Zeros (%)")
332
- profile_html = profile_html.replace("Negative", "Negativo")
333
- profile_html = profile_html.replace("Negative (%)", "Negativo (%)")
334
- profile_html = profile_html.replace("Other values (2)", "Outros valores (2)")
335
- profile_html = profile_html.replace("Link", "Link")
336
- profile_html = profile_html.replace("UNIQUE", "ÚNICO")
337
- profile_html = profile_html.replace("CONSTANT", "CONSTANTE")
338
- profile_html = profile_html.replace("Average", "Média")
339
- profile_html = profile_html.replace("Number of rows", "Número de linhas")
340
- profile_html = profile_html.replace("Distinct values", "Valores distintos")
341
- profile_html = profile_html.replace("Histogram", "Histograma")
342
- profile_html = profile_html.replace("Top", "Top")
343
- profile_html = profile_html.replace("Bottom", "Inferior")
344
- profile_html = profile_html.replace("Frequency", "Frequência")
345
- profile_html = profile_html.replace("has constant value", "tem valores constantes")
346
- profile_html = profile_html.replace("has unique value", "tem valores únicos")
347
- profile_html = profile_html.replace("Analysis started", "Início da análise")
348
- profile_html = profile_html.replace("Analysis finished", "Término da análise")
349
- profile_html = profile_html.replace("Duration", "Duração")
350
- profile_html = profile_html.replace("Software version", "Versão do software")
351
- profile_html = profile_html.replace("Download configuration", "Configuração para download")
352
- profile_html = profile_html.replace("Select Columns", "Selecione coluna")
353
- profile_html = profile_html.replace("Length", "Comprimento")
354
- profile_html = profile_html.replace("Max length", "Comprimento máximo")
355
- profile_html = profile_html.replace("Median length", "Comprimento mediano")
356
- profile_html = profile_html.replace("Mean length", "Comprimento médio")
357
- profile_html = profile_html.replace("Min length", "Comprimento mínimo")
358
- profile_html = profile_html.replace("Characters and Unicode", "Caracteres e Unicode")
359
- profile_html = profile_html.replace("Total characters", "Total de caracteres")
360
- profile_html = profile_html.replace("Distinct characters", "Caracteres distintos")
361
- profile_html = profile_html.replace("Distinct categories", "Categorias distintas")
362
- profile_html = profile_html.replace("Distinct scripts", "Scripts distintos")
363
- profile_html = profile_html.replace("Distinct blocks", "Blocos distintos")
364
- profile_html = profile_html.replace("The Unicode Standard assigns character properties to each code point, which can be used to analyse textual variables.", "O Padrão Unicode atribui propriedades de caracteres a cada ponto de código, que podem ser usados para analisar variáveis textuais.")
365
- profile_html = profile_html.replace("Unique", "Único")
366
- profile_html = profile_html.replace("Unique (%)", "Único (%)")
367
- profile_html = profile_html.replace("Words", "Palavras")
368
- profile_html = profile_html.replace("Characters", "Caracteres")
369
- profile_html = profile_html.replace("Most occurring characters", "Caracteres mais frequentes")
370
- profile_html = profile_html.replace("Categories", "Categorias")
371
- profile_html = profile_html.replace("Most occurring categories", "Categorias mais frequentes")
372
- profile_html = profile_html.replace("(unknown)", "(desconhecido)")
373
- profile_html = profile_html.replace("Most frequent character per category", "Caractere mais frequente por categoria")
374
- profile_html = profile_html.replace("Scripts", "Scripts")
375
- profile_html = profile_html.replace("Most occurring scripts", "Scripts mais frequentes")
376
- profile_html = profile_html.replace("Most frequent character per script", "Caractere mais frequente por script")
377
- profile_html = profile_html.replace("Blocks", "Blocos")
378
- profile_html = profile_html.replace("Most occurring blocks", "Blocos mais frequentes")
379
- profile_html = profile_html.replace("Frequency (%)", "Frequência (%)")
380
- profile_html = profile_html.replace("Most frequent character per block", "Caractere mais frequente por bloco")
381
- profile_html = profile_html.replace("Matrix", "Matriz")
382
- profile_html = profile_html.replace("First rows", "Primeiras linhas")
383
- profile_html = profile_html.replace("Last rows", "Últimas linhas")
384
- profile_html = profile_html.replace("More details", "Maior detalhamento")
385
- profile_html = profile_html.replace("Statistics", "Estatísticas")
386
- profile_html = profile_html.replace("Quantile statistics", "Estatísticas de quantis")
387
- profile_html = profile_html.replace("Common values", "Valores comuns")
388
- profile_html = profile_html.replace("Extreme values", "Valores extremos")
389
- profile_html = profile_html.replace("5-th percentile", "5º percentil")
390
- profile_html = profile_html.replace("median", "mediana")
391
- profile_html = profile_html.replace("95-th percentile", "95º percentil")
392
- profile_html = profile_html.replace("Range", "Intervalo")
393
- profile_html = profile_html.replace("Interquartile range (IQR)", "Intervalo Interquartil")
394
- profile_html = profile_html.replace("Descriptive statistics", "Estatísticas descritivas")
395
- profile_html = profile_html.replace("Standard deviation", "Desvio padrão")
396
- profile_html = profile_html.replace("Coefficient of variation (CV)", "Coeficiente de variação (CV)")
397
- profile_html = profile_html.replace("Kurtosis", "Curtose")
398
- profile_html = profile_html.replace("Median Absolute Deviation (MAD)", "Desvio Absoluto Mediano (MAD)")
399
- profile_html = profile_html.replace("Skewness", "Assimetria")
400
- profile_html = profile_html.replace("Sum", "Soma")
401
- profile_html = profile_html.replace("Variance", "Variância")
402
- profile_html = profile_html.replace("Monotonicity", "Monotonicidade")
403
- profile_html = profile_html.replace("Not monotonic", "Não monotônica")
404
- profile_html = profile_html.replace("Histogram with fixed size bins (bins=16)", "Histograma com intervalos de tamanho fixo (intervalos=16)")
405
- profile_html = profile_html.replace("Minimum 10 values", "Mínimo 10 valores")
406
- profile_html = profile_html.replace("Maximum 10 values", "Máximo 10 valores")
407
- profile_html = profile_html.replace("1st row", "1ª linha")
408
- profile_html = profile_html.replace("2nd row", "2ª linha")
409
- profile_html = profile_html.replace("3rd row", "3ª linha")
410
- profile_html = profile_html.replace("4th row", "4ª linha")
411
- profile_html = profile_html.replace("5th row", "5ª linha")
412
-
413
- # Display the modified HTML in Streamlit
414
- components.html(profile_html, height=600, scrolling=True)
415
-
416
  with tab4:
417
  st.markdown(f'<style>{css_style}</style>', unsafe_allow_html=True)
418
  components.iframe("https://davidsb-rl-2.hf.space", height=600, scrolling=True)
 
14
  import pydeck as pdk
15
  from ydata_profiling import ProfileReport
16
  import streamlit.components.v1 as components
17
+ from folium.plugins import MarkerCluster
18
+ from sklearn.neighbors import NearestNeighbors
19
 
20
  # Set wide mode
21
  st.set_page_config(layout="wide")
 
252
 
253
  with tab3:
254
  st.markdown(f'<style>{css_style}</style>', unsafe_allow_html=True)
255
+ k_neighbors = 5
256
+
257
+ # Função para prever valores usando KNN e retornar os vizinhos mais próximos
258
+ def knn_predict(data, target_column, feature_columns, k=5):
259
+ knn = NearestNeighbors(n_neighbors=k)
260
+ knn.fit(data[feature_columns])
261
+ distances, indices = knn.kneighbors(data[feature_columns])
262
+
263
+ # Calcular a média dos vizinhos como valor predito
264
+ predicted_target = []
265
+ for i in range(len(data)):
266
+ neighbors_targets = data.iloc[indices[i]][target_column]
267
+ predicted_target.append(neighbors_targets.mean())
268
+ return np.array(predicted_target), distances, indices
269
+
270
+ # Aplicar KNN e obter valores e índices dos vizinhos mais próximos
271
+ predicted_target, distances, indices = knn_predict(filtered_data, 'target_column', ['latitude', 'longitude'], k=k_neighbors)
272
+
273
+ # Adicionar coluna dos valores preditos ao DataFrame
274
+ filtered_data['Predicted_target'] = predicted_target
275
+
276
+ # Verifica se há previsões para exibir
 
277
  if 'Predicted_target' in filtered_data.columns and not np.all(predicted_target == 0):
278
+ # Criar o mapa com folium
279
+ map_center = [filtered_data['latitude'].mean(), filtered_data['longitude'].mean()]
280
+ mapa = folium.Map(location=map_center, zoom_start=13)
281
+ marker_cluster = MarkerCluster().add_to(mapa)
282
+
283
+ # Iterar pelos pontos e conectar vizinhos mais próximos
284
+ for i, row in filtered_data.iterrows():
285
+ coord_calculo = [row['latitude'], row['longitude']]
286
+ folium.Marker(coord_calculo, popup=f"Predicted: {row['Predicted_target']:.2f}").add_to(marker_cluster)
287
+
288
+ # Conectar os vizinhos com linhas
289
+ for neighbor_idx in indices[i]:
290
+ neighbor_row = filtered_data.iloc[neighbor_idx]
291
+ coord_vizinho = [neighbor_row['latitude'], neighbor_row['longitude']]
292
+ folium.PolyLine([coord_calculo, coord_vizinho], color='blue', weight=2).add_to(mapa)
293
+
294
+ # Exibir o mapa no Streamlit
295
+ st.markdown("## **Mapa dos Vizinhos mais Próximos (KNN)**")
296
+ st.write("O mapa mostra os pontos de dados usados no cálculo do KNN, conectados ao ponto de cálculo.")
297
+ st_folium = st.components.v1.html(mapa._repr_html_(), height=500)
298
  else:
299
  st.warning(f"**Dados insuficientes para inferência do valor. Mínimo necessário:** {k_threshold}")
 
 
 
 
 
 
 
300
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
301
  with tab4:
302
  st.markdown(f'<style>{css_style}</style>', unsafe_allow_html=True)
303
  components.iframe("https://davidsb-rl-2.hf.space", height=600, scrolling=True)