commited on
Browse files
@@ -22,25 +22,28 @@ def fetch_data_to_dataframe(query, limit=50, source="mercadolibre"):
22 |
return df
23 |
return pd.DataFrame()
24 |
25 |
def refinar_resultados(df,
26 |
df['Title'] = df['Title'].astype(str)
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
return df_refinado
39 |
40 |
def get_best_match(query, choices, limit=50):
41 |
# Using RapidFuzz for improved performance and fuzzy matching
42 |
matches = process.extract(query, choices, scorer=fuzz.WRatio, limit=limit)
43 |
return [match[0] for match in matches if match[1] >
44 |
45 |
def match_query_words_in_titles(query, title):
46 |
@@ -85,7 +88,7 @@ def calcular_fator_avaliacao(titulo, EC, PU):
85 |
def select_nearest_items(df, query):
86 |
# Lower the title similarity threshold if necessary
87 |
df['Title_Similarity'] = df['Title'].apply(lambda x: fuzz.WRatio(query, x))
88 |
df_filtered = df[df['Title_Similarity'] >
89 |
90 |
# Calculate mode price in a more inclusive manner
91 |
mode_price = df_filtered['Price'].mode()
22 |
return df
23 |
return pd.DataFrame()
24 |
25 |
def refinar_resultados(df, include_words=[]):
26 |
df['Title'] = df['Title'].astype(str)
27 |
28 |
# Define a list of keywords to exclude, indicating multiples
29 |
exclude_keywords = ["kit", "conjunto", "pacote", "caixa", "unidades"]
30 |
31 |
# Add conditional exclusion for words not included in the query
32 |
exclude_patterns = [keyword for keyword in exclude_keywords if keyword not in include_words]
33 |
34 |
# Combine all exclude patterns into a single regex pattern
35 |
exclude_pattern = r'\b(' + '|'.join(exclude_patterns) + r')\b|\b(\d+)\s*(unidade|pacotes|caixas)\b'
36 |
37 |
# Perform the filtering in one operation
38 |
df_refinado = df[~df['Title'].str.contains(exclude_pattern, case=False, regex=True, na=False)]
39 |
40 |
return df_refinado
41 |
42 |
43 |
def get_best_match(query, choices, limit=50):
44 |
# Using RapidFuzz for improved performance and fuzzy matching
45 |
matches = process.extract(query, choices, scorer=fuzz.WRatio, limit=limit)
46 |
return [match[0] for match in matches if match[1] > 65]
47 |
48 |
def match_query_words_in_titles(query, title):
49 |
88 |
def select_nearest_items(df, query):
89 |
# Lower the title similarity threshold if necessary
90 |
df['Title_Similarity'] = df['Title'].apply(lambda x: fuzz.WRatio(query, x))
91 |
df_filtered = df[df['Title_Similarity'] > 65] # Adjusted threshold
92 |
93 |
# Calculate mode price in a more inclusive manner
94 |
mode_price = df_filtered['Price'].mode()