Spaces:
Sleeping
Sleeping
fschwartzer
commited on
Commit
•
c608fd2
1
Parent(s):
bb85b9a
Update app.py
Browse files
app.py
CHANGED
@@ -14,6 +14,8 @@ import sys
|
|
14 |
import pydeck as pdk
|
15 |
from ydata_profiling import ProfileReport
|
16 |
import streamlit.components.v1 as components
|
|
|
|
|
17 |
|
18 |
# Set wide mode
|
19 |
st.set_page_config(layout="wide")
|
@@ -250,169 +252,52 @@ with tab2:
|
|
250 |
|
251 |
with tab3:
|
252 |
st.markdown(f'<style>{css_style}</style>', unsafe_allow_html=True)
|
253 |
-
|
254 |
-
|
255 |
-
#
|
256 |
-
def
|
257 |
-
|
258 |
-
|
259 |
-
|
260 |
-
|
261 |
-
|
262 |
-
|
263 |
-
|
264 |
-
|
265 |
-
|
266 |
-
|
267 |
-
|
268 |
-
|
269 |
-
|
270 |
-
|
271 |
-
|
272 |
-
|
273 |
-
|
274 |
-
|
275 |
-
# Check if there are predictions to display
|
276 |
if 'Predicted_target' in filtered_data.columns and not np.all(predicted_target == 0):
|
277 |
-
|
278 |
-
|
279 |
-
|
280 |
-
|
281 |
-
|
282 |
-
|
283 |
-
|
284 |
-
|
285 |
-
|
286 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
287 |
else:
|
288 |
st.warning(f"**Dados insuficientes para inferência do valor. Mínimo necessário:** {k_threshold}")
|
289 |
-
|
290 |
-
# Generate the profile report
|
291 |
-
with st.spinner('Carregando análise...'):
|
292 |
-
profile = ProfileReport(filtered_data, title="Análise Exploratória dos Dados", explorative=True)
|
293 |
-
print(profile.config.json(indent=4))
|
294 |
-
profile.config.html.style.primary_colors = ['#FFD700', '#FFD700', '#FFD700'] # Define todas as cores primárias para amarelo ouro
|
295 |
-
profile_html = profile.to_html()
|
296 |
|
297 |
-
# Replace English text with Portuguese
|
298 |
-
profile_html = profile_html.replace("Overview", "Visão geral")
|
299 |
-
profile_html = profile_html.replace("Alerts", "Alertas")
|
300 |
-
profile_html = profile_html.replace("Reproduction", "Reprodução")
|
301 |
-
profile_html = profile_html.replace("Dataset statistics", "Estatísticas do conjunto de dados")
|
302 |
-
profile_html = profile_html.replace("Variable types", "Tipos de variáveis")
|
303 |
-
profile_html = profile_html.replace("Variables", "Variáveis")
|
304 |
-
profile_html = profile_html.replace("Interactions", "Interações")
|
305 |
-
profile_html = profile_html.replace("Correlations", "Correlações")
|
306 |
-
profile_html = profile_html.replace("Missing values", "Valores faltantes")
|
307 |
-
profile_html = profile_html.replace("Sample", "Amostra")
|
308 |
-
profile_html = profile_html.replace("Number of variables", "Número de variáveis")
|
309 |
-
profile_html = profile_html.replace("Number of observations", "Número de observações")
|
310 |
-
profile_html = profile_html.replace("Missing cells", "Células faltantes")
|
311 |
-
profile_html = profile_html.replace("Missing cells (%)", "Células faltantes (%)")
|
312 |
-
profile_html = profile_html.replace("Duplicate rows", "Linhas duplicadas")
|
313 |
-
profile_html = profile_html.replace("Duplicate rows (%)", "Linhas duplicadas (%)")
|
314 |
-
profile_html = profile_html.replace("Total size in memory", "Tamanho total na memória")
|
315 |
-
profile_html = profile_html.replace("Average record size in memory", "Tamanho médio do registro na memória")
|
316 |
-
profile_html = profile_html.replace("Text", "Texto")
|
317 |
-
profile_html = profile_html.replace("Numeric", "Numérico")
|
318 |
-
profile_html = profile_html.replace("Categorical", "Categórico")
|
319 |
-
profile_html = profile_html.replace("Distinct", "Distinto")
|
320 |
-
profile_html = profile_html.replace("Distinct (%)", "Distinto (%)")
|
321 |
-
profile_html = profile_html.replace("Missing", "Faltando")
|
322 |
-
profile_html = profile_html.replace("Missing (%)", "Faltando (%)")
|
323 |
-
profile_html = profile_html.replace("Memory size", "Tamanho da memória")
|
324 |
-
profile_html = profile_html.replace("Real number", "Número real")
|
325 |
-
profile_html = profile_html.replace("Infinite", "Infinito")
|
326 |
-
profile_html = profile_html.replace("Infinite (%)", "Infinito (%)")
|
327 |
-
profile_html = profile_html.replace("Mean", "Média")
|
328 |
-
profile_html = profile_html.replace("Minimum", "Mínimo")
|
329 |
-
profile_html = profile_html.replace("Maximum", "Máximo")
|
330 |
-
profile_html = profile_html.replace("Zeros", "Zeros")
|
331 |
-
profile_html = profile_html.replace("Zeros (%)", "Zeros (%)")
|
332 |
-
profile_html = profile_html.replace("Negative", "Negativo")
|
333 |
-
profile_html = profile_html.replace("Negative (%)", "Negativo (%)")
|
334 |
-
profile_html = profile_html.replace("Other values (2)", "Outros valores (2)")
|
335 |
-
profile_html = profile_html.replace("Link", "Link")
|
336 |
-
profile_html = profile_html.replace("UNIQUE", "ÚNICO")
|
337 |
-
profile_html = profile_html.replace("CONSTANT", "CONSTANTE")
|
338 |
-
profile_html = profile_html.replace("Average", "Média")
|
339 |
-
profile_html = profile_html.replace("Number of rows", "Número de linhas")
|
340 |
-
profile_html = profile_html.replace("Distinct values", "Valores distintos")
|
341 |
-
profile_html = profile_html.replace("Histogram", "Histograma")
|
342 |
-
profile_html = profile_html.replace("Top", "Top")
|
343 |
-
profile_html = profile_html.replace("Bottom", "Inferior")
|
344 |
-
profile_html = profile_html.replace("Frequency", "Frequência")
|
345 |
-
profile_html = profile_html.replace("has constant value", "tem valores constantes")
|
346 |
-
profile_html = profile_html.replace("has unique value", "tem valores únicos")
|
347 |
-
profile_html = profile_html.replace("Analysis started", "Início da análise")
|
348 |
-
profile_html = profile_html.replace("Analysis finished", "Término da análise")
|
349 |
-
profile_html = profile_html.replace("Duration", "Duração")
|
350 |
-
profile_html = profile_html.replace("Software version", "Versão do software")
|
351 |
-
profile_html = profile_html.replace("Download configuration", "Configuração para download")
|
352 |
-
profile_html = profile_html.replace("Select Columns", "Selecione coluna")
|
353 |
-
profile_html = profile_html.replace("Length", "Comprimento")
|
354 |
-
profile_html = profile_html.replace("Max length", "Comprimento máximo")
|
355 |
-
profile_html = profile_html.replace("Median length", "Comprimento mediano")
|
356 |
-
profile_html = profile_html.replace("Mean length", "Comprimento médio")
|
357 |
-
profile_html = profile_html.replace("Min length", "Comprimento mínimo")
|
358 |
-
profile_html = profile_html.replace("Characters and Unicode", "Caracteres e Unicode")
|
359 |
-
profile_html = profile_html.replace("Total characters", "Total de caracteres")
|
360 |
-
profile_html = profile_html.replace("Distinct characters", "Caracteres distintos")
|
361 |
-
profile_html = profile_html.replace("Distinct categories", "Categorias distintas")
|
362 |
-
profile_html = profile_html.replace("Distinct scripts", "Scripts distintos")
|
363 |
-
profile_html = profile_html.replace("Distinct blocks", "Blocos distintos")
|
364 |
-
profile_html = profile_html.replace("The Unicode Standard assigns character properties to each code point, which can be used to analyse textual variables.", "O Padrão Unicode atribui propriedades de caracteres a cada ponto de código, que podem ser usados para analisar variáveis textuais.")
|
365 |
-
profile_html = profile_html.replace("Unique", "Único")
|
366 |
-
profile_html = profile_html.replace("Unique (%)", "Único (%)")
|
367 |
-
profile_html = profile_html.replace("Words", "Palavras")
|
368 |
-
profile_html = profile_html.replace("Characters", "Caracteres")
|
369 |
-
profile_html = profile_html.replace("Most occurring characters", "Caracteres mais frequentes")
|
370 |
-
profile_html = profile_html.replace("Categories", "Categorias")
|
371 |
-
profile_html = profile_html.replace("Most occurring categories", "Categorias mais frequentes")
|
372 |
-
profile_html = profile_html.replace("(unknown)", "(desconhecido)")
|
373 |
-
profile_html = profile_html.replace("Most frequent character per category", "Caractere mais frequente por categoria")
|
374 |
-
profile_html = profile_html.replace("Scripts", "Scripts")
|
375 |
-
profile_html = profile_html.replace("Most occurring scripts", "Scripts mais frequentes")
|
376 |
-
profile_html = profile_html.replace("Most frequent character per script", "Caractere mais frequente por script")
|
377 |
-
profile_html = profile_html.replace("Blocks", "Blocos")
|
378 |
-
profile_html = profile_html.replace("Most occurring blocks", "Blocos mais frequentes")
|
379 |
-
profile_html = profile_html.replace("Frequency (%)", "Frequência (%)")
|
380 |
-
profile_html = profile_html.replace("Most frequent character per block", "Caractere mais frequente por bloco")
|
381 |
-
profile_html = profile_html.replace("Matrix", "Matriz")
|
382 |
-
profile_html = profile_html.replace("First rows", "Primeiras linhas")
|
383 |
-
profile_html = profile_html.replace("Last rows", "Últimas linhas")
|
384 |
-
profile_html = profile_html.replace("More details", "Maior detalhamento")
|
385 |
-
profile_html = profile_html.replace("Statistics", "Estatísticas")
|
386 |
-
profile_html = profile_html.replace("Quantile statistics", "Estatísticas de quantis")
|
387 |
-
profile_html = profile_html.replace("Common values", "Valores comuns")
|
388 |
-
profile_html = profile_html.replace("Extreme values", "Valores extremos")
|
389 |
-
profile_html = profile_html.replace("5-th percentile", "5º percentil")
|
390 |
-
profile_html = profile_html.replace("median", "mediana")
|
391 |
-
profile_html = profile_html.replace("95-th percentile", "95º percentil")
|
392 |
-
profile_html = profile_html.replace("Range", "Intervalo")
|
393 |
-
profile_html = profile_html.replace("Interquartile range (IQR)", "Intervalo Interquartil")
|
394 |
-
profile_html = profile_html.replace("Descriptive statistics", "Estatísticas descritivas")
|
395 |
-
profile_html = profile_html.replace("Standard deviation", "Desvio padrão")
|
396 |
-
profile_html = profile_html.replace("Coefficient of variation (CV)", "Coeficiente de variação (CV)")
|
397 |
-
profile_html = profile_html.replace("Kurtosis", "Curtose")
|
398 |
-
profile_html = profile_html.replace("Median Absolute Deviation (MAD)", "Desvio Absoluto Mediano (MAD)")
|
399 |
-
profile_html = profile_html.replace("Skewness", "Assimetria")
|
400 |
-
profile_html = profile_html.replace("Sum", "Soma")
|
401 |
-
profile_html = profile_html.replace("Variance", "Variância")
|
402 |
-
profile_html = profile_html.replace("Monotonicity", "Monotonicidade")
|
403 |
-
profile_html = profile_html.replace("Not monotonic", "Não monotônica")
|
404 |
-
profile_html = profile_html.replace("Histogram with fixed size bins (bins=16)", "Histograma com intervalos de tamanho fixo (intervalos=16)")
|
405 |
-
profile_html = profile_html.replace("Minimum 10 values", "Mínimo 10 valores")
|
406 |
-
profile_html = profile_html.replace("Maximum 10 values", "Máximo 10 valores")
|
407 |
-
profile_html = profile_html.replace("1st row", "1ª linha")
|
408 |
-
profile_html = profile_html.replace("2nd row", "2ª linha")
|
409 |
-
profile_html = profile_html.replace("3rd row", "3ª linha")
|
410 |
-
profile_html = profile_html.replace("4th row", "4ª linha")
|
411 |
-
profile_html = profile_html.replace("5th row", "5ª linha")
|
412 |
-
|
413 |
-
# Display the modified HTML in Streamlit
|
414 |
-
components.html(profile_html, height=600, scrolling=True)
|
415 |
-
|
416 |
with tab4:
|
417 |
st.markdown(f'<style>{css_style}</style>', unsafe_allow_html=True)
|
418 |
components.iframe("https://davidsb-rl-2.hf.space", height=600, scrolling=True)
|
|
|
14 |
import pydeck as pdk
|
15 |
from ydata_profiling import ProfileReport
|
16 |
import streamlit.components.v1 as components
|
17 |
+
from folium.plugins import MarkerCluster
|
18 |
+
from sklearn.neighbors import NearestNeighbors
|
19 |
|
20 |
# Set wide mode
|
21 |
st.set_page_config(layout="wide")
|
|
|
252 |
|
253 |
with tab3:
|
254 |
st.markdown(f'<style>{css_style}</style>', unsafe_allow_html=True)
|
255 |
+
k_neighbors = 5
|
256 |
+
|
257 |
+
# Função para prever valores usando KNN e retornar os vizinhos mais próximos
|
258 |
+
def knn_predict(data, target_column, feature_columns, k=5):
|
259 |
+
knn = NearestNeighbors(n_neighbors=k)
|
260 |
+
knn.fit(data[feature_columns])
|
261 |
+
distances, indices = knn.kneighbors(data[feature_columns])
|
262 |
+
|
263 |
+
# Calcular a média dos vizinhos como valor predito
|
264 |
+
predicted_target = []
|
265 |
+
for i in range(len(data)):
|
266 |
+
neighbors_targets = data.iloc[indices[i]][target_column]
|
267 |
+
predicted_target.append(neighbors_targets.mean())
|
268 |
+
return np.array(predicted_target), distances, indices
|
269 |
+
|
270 |
+
# Aplicar KNN e obter valores e índices dos vizinhos mais próximos
|
271 |
+
predicted_target, distances, indices = knn_predict(filtered_data, 'target_column', ['latitude', 'longitude'], k=k_neighbors)
|
272 |
+
|
273 |
+
# Adicionar coluna dos valores preditos ao DataFrame
|
274 |
+
filtered_data['Predicted_target'] = predicted_target
|
275 |
+
|
276 |
+
# Verifica se há previsões para exibir
|
|
|
277 |
if 'Predicted_target' in filtered_data.columns and not np.all(predicted_target == 0):
|
278 |
+
# Criar o mapa com folium
|
279 |
+
map_center = [filtered_data['latitude'].mean(), filtered_data['longitude'].mean()]
|
280 |
+
mapa = folium.Map(location=map_center, zoom_start=13)
|
281 |
+
marker_cluster = MarkerCluster().add_to(mapa)
|
282 |
+
|
283 |
+
# Iterar pelos pontos e conectar vizinhos mais próximos
|
284 |
+
for i, row in filtered_data.iterrows():
|
285 |
+
coord_calculo = [row['latitude'], row['longitude']]
|
286 |
+
folium.Marker(coord_calculo, popup=f"Predicted: {row['Predicted_target']:.2f}").add_to(marker_cluster)
|
287 |
+
|
288 |
+
# Conectar os vizinhos com linhas
|
289 |
+
for neighbor_idx in indices[i]:
|
290 |
+
neighbor_row = filtered_data.iloc[neighbor_idx]
|
291 |
+
coord_vizinho = [neighbor_row['latitude'], neighbor_row['longitude']]
|
292 |
+
folium.PolyLine([coord_calculo, coord_vizinho], color='blue', weight=2).add_to(mapa)
|
293 |
+
|
294 |
+
# Exibir o mapa no Streamlit
|
295 |
+
st.markdown("## **Mapa dos Vizinhos mais Próximos (KNN)**")
|
296 |
+
st.write("O mapa mostra os pontos de dados usados no cálculo do KNN, conectados ao ponto de cálculo.")
|
297 |
+
st_folium = st.components.v1.html(mapa._repr_html_(), height=500)
|
298 |
else:
|
299 |
st.warning(f"**Dados insuficientes para inferência do valor. Mínimo necessário:** {k_threshold}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
300 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
301 |
with tab4:
|
302 |
st.markdown(f'<style>{css_style}</style>', unsafe_allow_html=True)
|
303 |
components.iframe("https://davidsb-rl-2.hf.space", height=600, scrolling=True)
|