Spaces:

fschwartzer
/

bens_moveis

Sleeping

fschwartzer commited on Feb 20

Commit

40ac373

•

1 Parent(s): 78633a0

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -25,6 +25,7 @@ def fetch_data_to_dataframe(query, limit=50, source="mercadolibre"):
 def refinar_resultados(df, exclude_word="conjunto", include_word=False):
     df['Title'] = df['Title'].astype(str)
     df_refinado = df[~df['Title'].str.contains("kit", case=False, na=False)]
     df_refinado = df[~df['Title'].str.contains("conj", case=False, na=False)]
     df_refinado = df[~df['Title'].str.contains(" e ", case=False, na=False)]
     padrao_unidades = r'\b(\d+)\s*(unidade|unidades|pacote|pacotes|caixa|caixas)\b'
@@ -39,7 +40,7 @@ def refinar_resultados(df, exclude_word="conjunto", include_word=False):
 def get_best_match(query, choices, limit=50):
     # Using RapidFuzz for improved performance and fuzzy matching
     matches = process.extract(query, choices, scorer=fuzz.WRatio, limit=limit)
-    return [match[0] for match in matches if match[1] > 85]
 def match_query_words_in_titles(query, title):
     """
@@ -84,7 +85,7 @@ def calcular_fator_avaliacao(titulo, EC, PU):
 def select_nearest_items(df, query):
     # Lower the title similarity threshold if necessary
     df['Title_Similarity'] = df['Title'].apply(lambda x: fuzz.WRatio(query, x))
-    df_filtered = df[df['Title_Similarity'] > 85]  # Adjusted threshold
     # Calculate mode price in a more inclusive manner
     mode_price = df_filtered['Price'].mode()

 def refinar_resultados(df, exclude_word="conjunto", include_word=False):
     df['Title'] = df['Title'].astype(str)
     df_refinado = df[~df['Title'].str.contains("kit", case=False, na=False)]
+    df_refinado = df[~df['Title'].str.contains("Kit", case=False, na=False)]
     df_refinado = df[~df['Title'].str.contains("conj", case=False, na=False)]
     df_refinado = df[~df['Title'].str.contains(" e ", case=False, na=False)]
     padrao_unidades = r'\b(\d+)\s*(unidade|unidades|pacote|pacotes|caixa|caixas)\b'
 def get_best_match(query, choices, limit=50):
     # Using RapidFuzz for improved performance and fuzzy matching
     matches = process.extract(query, choices, scorer=fuzz.WRatio, limit=limit)
+    return [match[0] for match in matches if match[1] > 70]
 def match_query_words_in_titles(query, title):
     """
 def select_nearest_items(df, query):
     # Lower the title similarity threshold if necessary
     df['Title_Similarity'] = df['Title'].apply(lambda x: fuzz.WRatio(query, x))
+    df_filtered = df[df['Title_Similarity'] > 70]  # Adjusted threshold
     # Calculate mode price in a more inclusive manner
     mode_price = df_filtered['Price'].mode()