Spaces:
Sleeping
Sleeping
fschwartzer
commited on
Commit
•
78633a0
1
Parent(s):
48dc5ed
Update app.py
Browse files
app.py
CHANGED
@@ -39,12 +39,33 @@ def refinar_resultados(df, exclude_word="conjunto", include_word=False):
|
|
39 |
def get_best_match(query, choices, limit=50):
|
40 |
# Using RapidFuzz for improved performance and fuzzy matching
|
41 |
matches = process.extract(query, choices, scorer=fuzz.WRatio, limit=limit)
|
42 |
-
return [match[0] for match in matches if match[1] >
|
43 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
def filtrar_itens_similares(df, termo_pesquisa, limit=50):
|
45 |
-
|
46 |
-
|
47 |
-
df_filtrado = df[
|
|
|
|
|
|
|
|
|
|
|
48 |
return df_filtrado
|
49 |
|
50 |
def calcular_fator_avaliacao(titulo, EC, PU):
|
@@ -63,7 +84,7 @@ def calcular_fator_avaliacao(titulo, EC, PU):
|
|
63 |
def select_nearest_items(df, query):
|
64 |
# Lower the title similarity threshold if necessary
|
65 |
df['Title_Similarity'] = df['Title'].apply(lambda x: fuzz.WRatio(query, x))
|
66 |
-
df_filtered = df[df['Title_Similarity'] >
|
67 |
|
68 |
# Calculate mode price in a more inclusive manner
|
69 |
mode_price = df_filtered['Price'].mode()
|
|
|
39 |
def get_best_match(query, choices, limit=50):
|
40 |
# Using RapidFuzz for improved performance and fuzzy matching
|
41 |
matches = process.extract(query, choices, scorer=fuzz.WRatio, limit=limit)
|
42 |
+
return [match[0] for match in matches if match[1] > 85]
|
43 |
|
44 |
+
def match_query_words_in_titles(query, title):
|
45 |
+
"""
|
46 |
+
Check if all words in the query have a close match within the title.
|
47 |
+
Returns True if all words match to a certain degree; False otherwise.
|
48 |
+
"""
|
49 |
+
query_words = query.lower().split()
|
50 |
+
match_threshold = 80 # Adjust this threshold as needed
|
51 |
+
|
52 |
+
for word in query_words:
|
53 |
+
# Find the best match for each word in the query within the title
|
54 |
+
match_score = fuzz.partial_ratio(word, title.lower())
|
55 |
+
if match_score < match_threshold:
|
56 |
+
return False # If any word doesn't match well enough, return False
|
57 |
+
|
58 |
+
return True # All words matched well enough
|
59 |
+
|
60 |
def filtrar_itens_similares(df, termo_pesquisa, limit=50):
|
61 |
+
# Apply the match function to each title, filtering for those that match the query words
|
62 |
+
matches = df['Title'].apply(lambda title: match_query_words_in_titles(termo_pesquisa, title))
|
63 |
+
df_filtrado = df[matches]
|
64 |
+
|
65 |
+
# Further refine the list to the top N matches based on overall similarity to the query
|
66 |
+
df_filtrado['Overall_Similarity'] = df_filtrado['Title'].apply(lambda title: fuzz.WRatio(termo_pesquisa, title))
|
67 |
+
df_filtrado = df_filtrado.sort_values('Overall_Similarity', ascending=False).head(limit)
|
68 |
+
|
69 |
return df_filtrado
|
70 |
|
71 |
def calcular_fator_avaliacao(titulo, EC, PU):
|
|
|
84 |
def select_nearest_items(df, query):
|
85 |
# Lower the title similarity threshold if necessary
|
86 |
df['Title_Similarity'] = df['Title'].apply(lambda x: fuzz.WRatio(query, x))
|
87 |
+
df_filtered = df[df['Title_Similarity'] > 85] # Adjusted threshold
|
88 |
|
89 |
# Calculate mode price in a more inclusive manner
|
90 |
mode_price = df_filtered['Price'].mode()
|