Spaces:
Sleeping
Sleeping
fschwartzer
commited on
Commit
•
40ac373
1
Parent(s):
78633a0
Update app.py
Browse files
app.py
CHANGED
@@ -25,6 +25,7 @@ def fetch_data_to_dataframe(query, limit=50, source="mercadolibre"):
|
|
25 |
def refinar_resultados(df, exclude_word="conjunto", include_word=False):
|
26 |
df['Title'] = df['Title'].astype(str)
|
27 |
df_refinado = df[~df['Title'].str.contains("kit", case=False, na=False)]
|
|
|
28 |
df_refinado = df[~df['Title'].str.contains("conj", case=False, na=False)]
|
29 |
df_refinado = df[~df['Title'].str.contains(" e ", case=False, na=False)]
|
30 |
padrao_unidades = r'\b(\d+)\s*(unidade|unidades|pacote|pacotes|caixa|caixas)\b'
|
@@ -39,7 +40,7 @@ def refinar_resultados(df, exclude_word="conjunto", include_word=False):
|
|
39 |
def get_best_match(query, choices, limit=50):
|
40 |
# Using RapidFuzz for improved performance and fuzzy matching
|
41 |
matches = process.extract(query, choices, scorer=fuzz.WRatio, limit=limit)
|
42 |
-
return [match[0] for match in matches if match[1] >
|
43 |
|
44 |
def match_query_words_in_titles(query, title):
|
45 |
"""
|
@@ -84,7 +85,7 @@ def calcular_fator_avaliacao(titulo, EC, PU):
|
|
84 |
def select_nearest_items(df, query):
|
85 |
# Lower the title similarity threshold if necessary
|
86 |
df['Title_Similarity'] = df['Title'].apply(lambda x: fuzz.WRatio(query, x))
|
87 |
-
df_filtered = df[df['Title_Similarity'] >
|
88 |
|
89 |
# Calculate mode price in a more inclusive manner
|
90 |
mode_price = df_filtered['Price'].mode()
|
|
|
25 |
def refinar_resultados(df, exclude_word="conjunto", include_word=False):
|
26 |
df['Title'] = df['Title'].astype(str)
|
27 |
df_refinado = df[~df['Title'].str.contains("kit", case=False, na=False)]
|
28 |
+
df_refinado = df[~df['Title'].str.contains("Kit", case=False, na=False)]
|
29 |
df_refinado = df[~df['Title'].str.contains("conj", case=False, na=False)]
|
30 |
df_refinado = df[~df['Title'].str.contains(" e ", case=False, na=False)]
|
31 |
padrao_unidades = r'\b(\d+)\s*(unidade|unidades|pacote|pacotes|caixa|caixas)\b'
|
|
|
40 |
def get_best_match(query, choices, limit=50):
|
41 |
# Using RapidFuzz for improved performance and fuzzy matching
|
42 |
matches = process.extract(query, choices, scorer=fuzz.WRatio, limit=limit)
|
43 |
+
return [match[0] for match in matches if match[1] > 70]
|
44 |
|
45 |
def match_query_words_in_titles(query, title):
|
46 |
"""
|
|
|
85 |
def select_nearest_items(df, query):
|
86 |
# Lower the title similarity threshold if necessary
|
87 |
df['Title_Similarity'] = df['Title'].apply(lambda x: fuzz.WRatio(query, x))
|
88 |
+
df_filtered = df[df['Title_Similarity'] > 70] # Adjusted threshold
|
89 |
|
90 |
# Calculate mode price in a more inclusive manner
|
91 |
mode_price = df_filtered['Price'].mode()
|