Spaces:

fschwartzer
/

bens_moveis

Sleeping

App Files Files Community

fschwartzer commited on Feb 20

Commit

905b603

•

1 Parent(s): 40ac373

Update app.py

Browse files

Files changed (1) hide show

app.py +17 -14

app.py CHANGED Viewed

@@ -22,25 +22,28 @@ def fetch_data_to_dataframe(query, limit=50, source="mercadolibre"):
             return df
     return pd.DataFrame()
-def refinar_resultados(df, exclude_word="conjunto", include_word=False):
     df['Title'] = df['Title'].astype(str)
-    df_refinado = df[~df['Title'].str.contains("kit", case=False, na=False)]
-    df_refinado = df[~df['Title'].str.contains("Kit", case=False, na=False)]
-    df_refinado = df[~df['Title'].str.contains("conj", case=False, na=False)]
-    df_refinado = df[~df['Title'].str.contains(" e ", case=False, na=False)]
-    padrao_unidades = r'\b(\d+)\s*(unidade|unidades|pacote|pacotes|caixa|caixas)\b'
-    df_refinado = df_refinado[~df_refinado['Title'].str.contains(padrao_unidades, case=False, regex=True)]
-    if not include_word:
-        # Exclude results containing "conjunto" if it's not part of the original query
-        df_refinado = df_refinado[~df_refinado['Title'].str.contains(exclude_word, case=False)]
     return df_refinado
 def get_best_match(query, choices, limit=50):
     # Using RapidFuzz for improved performance and fuzzy matching
     matches = process.extract(query, choices, scorer=fuzz.WRatio, limit=limit)
-    return [match[0] for match in matches if match[1] > 70]
 def match_query_words_in_titles(query, title):
     """
@@ -85,7 +88,7 @@ def calcular_fator_avaliacao(titulo, EC, PU):
 def select_nearest_items(df, query):
     # Lower the title similarity threshold if necessary
     df['Title_Similarity'] = df['Title'].apply(lambda x: fuzz.WRatio(query, x))
-    df_filtered = df[df['Title_Similarity'] > 70]  # Adjusted threshold
     # Calculate mode price in a more inclusive manner
     mode_price = df_filtered['Price'].mode()

             return df
     return pd.DataFrame()
+def refinar_resultados(df, include_words=[]):
     df['Title'] = df['Title'].astype(str)
+    # Define a list of keywords to exclude, indicating multiples
+    exclude_keywords = ["kit", "conjunto", "pacote", "caixa", "unidades"]
+    # Add conditional exclusion for words not included in the query
+    exclude_patterns = [keyword for keyword in exclude_keywords if keyword not in include_words]
+    # Combine all exclude patterns into a single regex pattern
+    exclude_pattern = r'\b(' + '|'.join(exclude_patterns) + r')\b|\b(\d+)\s*(unidade|pacotes|caixas)\b'
+    # Perform the filtering in one operation
+    df_refinado = df[~df['Title'].str.contains(exclude_pattern, case=False, regex=True, na=False)]
     return df_refinado
 def get_best_match(query, choices, limit=50):
     # Using RapidFuzz for improved performance and fuzzy matching
     matches = process.extract(query, choices, scorer=fuzz.WRatio, limit=limit)
+    return [match[0] for match in matches if match[1] > 65]
 def match_query_words_in_titles(query, title):
     """
 def select_nearest_items(df, query):
     # Lower the title similarity threshold if necessary
     df['Title_Similarity'] = df['Title'].apply(lambda x: fuzz.WRatio(query, x))
+    df_filtered = df[df['Title_Similarity'] > 65]  # Adjusted threshold
     # Calculate mode price in a more inclusive manner
     mode_price = df_filtered['Price'].mode()