fschwartzer commited on
Commit
2237b4d
1 Parent(s): 601b79b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +52 -13
app.py CHANGED
@@ -23,17 +23,27 @@ def fetch_data_to_dataframe(query, limit=50, source="mercadolibre"):
23
  return pd.DataFrame()
24
 
25
  def refinar_resultados(df):
 
 
 
 
26
  df_refinado = df[~df['Title'].str.contains("kit", case=False, na=False)]
27
- df_refinado = df_refinado[~df_refinado['Title'].str.contains(r'\b(\d+)\s*(unidade|unidades|pacote|pacotes|caixa|caixas)\b', case=False, regex=True)]
 
 
 
28
  return df_refinado
29
 
30
  def get_best_match(query, choices, limit=15):
 
31
  matches = process.extract(query, choices, scorer=fuzz.WRatio, limit=limit)
32
  return [match[0] for match in matches if match[1] > 70]
33
 
34
  def filtrar_itens_similares(df, termo_pesquisa, limit=15):
35
- titulos_similares = get_best_match(termo_pesquisa, df['Title'].tolist(), limit=limit)
36
- return df[df['Title'].isin(titulos_similares)]
 
 
37
 
38
  def calcular_fator_avaliacao(titulo, EC, PU):
39
  filtered_df = bens_df[bens_df['TITULO'] == titulo]
@@ -48,27 +58,56 @@ def calcular_fator_avaliacao(titulo, EC, PU):
48
  fator_avaliacao = max((4 * ec_pontuacao + 6 * PVU - 3 * PUB) / 100, VR)
49
  return fator_avaliacao
50
 
51
- def select_nearest_items(df):
52
- target_price = df['Price'].mode().min() if not df['Price'].mode().empty else df['Price'].median()
53
- df['Distance'] = (df['Price'] - target_price).abs()
54
- return pd.DataFrame([row for _, row in df.sort_values('Distance').iterrows() if row['Marketplace'] not in set()]).head(5)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
 
56
  def integrated_app(query, titulo, EC, PU):
57
- df_mercadolibre = fetch_data_to_dataframe(query)
58
  df_combined = pd.concat([df_mercadolibre, data_crawler], ignore_index=True)
 
59
  if df_combined.empty:
60
  return "Nenhum dado encontrado. Tente uma consulta diferente.", pd.DataFrame()
61
 
62
  df_refined = refinar_resultados(df_combined)
63
- df_similares = filtrar_itens_similares(df_refined, query)
 
64
  if df_similares.empty:
65
  return "Nenhum item similar encontrado.", pd.DataFrame()
66
 
67
- fator_avaliacao = calcular_fator_avaliacao(titulo, EC, PU)
68
- if fator_avaliacao is None:
69
- return "Erro ao calcular o fator de avaliação.", pd.DataFrame()
70
 
71
- df_nearest = select_nearest_items(df_similares)
 
72
  valor_avaliacao = df_nearest['Price'].mean() * fator_avaliacao
73
  return f"Valor Médio do Bem: R$ {df_nearest['Price'].mean():.2f}, Fator de Avaliação: {fator_avaliacao*100:.2f}%, Valor de Avaliação: R$ {valor_avaliacao:.2f}", df_nearest
74
 
 
23
  return pd.DataFrame()
24
 
25
  def refinar_resultados(df):
26
+ # Ensure 'Title' is treated as a string and handle NaN values by replacing them with an empty string
27
+ df['Title'] = df['Title'].astype(str).fillna('')
28
+
29
+ # Now apply your filtering condition
30
  df_refinado = df[~df['Title'].str.contains("kit", case=False, na=False)]
31
+ padrao_unidades = r'\b(\d+)\s*(unidade|unidades|pacote|pacotes|caixa|caixas)\b'
32
+
33
+ # Since 'Title' is ensured to be a string, this should not raise the TypeError
34
+ df_refinado = df_refinado[~df_refinado['Title'].str.contains(padrao_unidades, case=False, regex=True)]
35
  return df_refinado
36
 
37
  def get_best_match(query, choices, limit=15):
38
+ # Using RapidFuzz for improved performance and fuzzy matching
39
  matches = process.extract(query, choices, scorer=fuzz.WRatio, limit=limit)
40
  return [match[0] for match in matches if match[1] > 70]
41
 
42
  def filtrar_itens_similares(df, termo_pesquisa, limit=15):
43
+ titulos = df['Title'].tolist()
44
+ titulos_similares = get_best_match(termo_pesquisa, titulos, limit=limit)
45
+ df_filtrado = df[df['Title'].isin(titulos_similares)]
46
+ return df_filtrado
47
 
48
  def calcular_fator_avaliacao(titulo, EC, PU):
49
  filtered_df = bens_df[bens_df['TITULO'] == titulo]
 
58
  fator_avaliacao = max((4 * ec_pontuacao + 6 * PVU - 3 * PUB) / 100, VR)
59
  return fator_avaliacao
60
 
61
+ def select_nearest_items(df, query):
62
+ # Implement a more refined selection process
63
+ # First, filter by title similarity to ensure relevance
64
+ df['Title_Similarity'] = df['Title'].apply(lambda x: fuzz.WRatio(query, x))
65
+ df_filtered_by_similarity = df[df['Title_Similarity'] > 70] # Adjust similarity threshold
66
+
67
+ if df_filtered_by_similarity.empty:
68
+ # Fallback to broader criteria if no closely matching titles are found
69
+ return pd.DataFrame()
70
+
71
+ # Then, select items based on price, considering only those within a reasonable range
72
+ reasonable_price_df = df_filtered_by_similarity[df_filtered_by_similarity['Price'] <= df_filtered_by_similarity['Price'].quantile(0.75)]
73
+
74
+ target_price = reasonable_price_df['Price'].mode().min() if not reasonable_price_df['Price'].mode().empty else reasonable_price_df['Price'].median()
75
+ reasonable_price_df['Distance'] = (reasonable_price_df['Price'] - target_price).abs()
76
+
77
+ return reasonable_price_df.sort_values(['Distance', 'Title_Similarity'], ascending=[True, False]).head(5)
78
+
79
+ def search_with_fallback(query, df, limit=15):
80
+ # Start with the most specific query and progressively simplify it
81
+ query_parts = query.split()
82
+ for i in range(len(query_parts), 0, -1):
83
+ # Construct a simplified query by progressively removing the least important terms
84
+ simplified_query = " ".join(query_parts[:i])
85
+ df_filtrado = filtrar_itens_similares(df, simplified_query, limit=limit)
86
+ if not df_filtrado.empty:
87
+ # Return the filtered DataFrame as soon as we get any results
88
+ return df_filtrado
89
+ # If no results are found for any simplification of the query, return an empty DataFrame
90
+ return pd.DataFrame()
91
 
92
  def integrated_app(query, titulo, EC, PU):
93
+ df_mercadolibre = fetch_data_to_dataframe(query, 50, "mercadolibre")
94
  df_combined = pd.concat([df_mercadolibre, data_crawler], ignore_index=True)
95
+
96
  if df_combined.empty:
97
  return "Nenhum dado encontrado. Tente uma consulta diferente.", pd.DataFrame()
98
 
99
  df_refined = refinar_resultados(df_combined)
100
+ df_similares = search_with_fallback(query, df_refined)
101
+
102
  if df_similares.empty:
103
  return "Nenhum item similar encontrado.", pd.DataFrame()
104
 
105
+ df_nearest = select_nearest_items(df_similares, query) # Ensure this function is adapted to use the query for relevance
106
+ if df_nearest.empty:
107
+ return "Nenhum resultado próximo encontrado.", pd.DataFrame()
108
 
109
+ # Calculate valuation factor and final valuation based on the nearest items
110
+ fator_avaliacao = calcular_fator_avaliacao(titulo, EC, PU)
111
  valor_avaliacao = df_nearest['Price'].mean() * fator_avaliacao
112
  return f"Valor Médio do Bem: R$ {df_nearest['Price'].mean():.2f}, Fator de Avaliação: {fator_avaliacao*100:.2f}%, Valor de Avaliação: R$ {valor_avaliacao:.2f}", df_nearest
113