Spaces:
Sleeping
Sleeping
fschwartzer
commited on
Commit
•
f555ad2
1
Parent(s):
2c05d82
Update app.py
Browse files
app.py
CHANGED
@@ -59,22 +59,37 @@ def calcular_fator_avaliacao(titulo, EC, PU):
|
|
59 |
return fator_avaliacao
|
60 |
|
61 |
def select_nearest_items(df, query):
|
62 |
-
#
|
63 |
-
# First, filter by title similarity to ensure relevance
|
64 |
df['Title_Similarity'] = df['Title'].apply(lambda x: fuzz.WRatio(query, x))
|
65 |
-
|
|
|
66 |
|
67 |
if df_filtered_by_similarity.empty:
|
68 |
-
#
|
69 |
return pd.DataFrame()
|
70 |
|
71 |
-
#
|
72 |
reasonable_price_df = df_filtered_by_similarity[df_filtered_by_similarity['Price'] <= df_filtered_by_similarity['Price'].quantile(0.75)]
|
73 |
-
|
74 |
target_price = reasonable_price_df['Price'].mode().min() if not reasonable_price_df['Price'].mode().empty else reasonable_price_df['Price'].median()
|
75 |
reasonable_price_df['Distance'] = (reasonable_price_df['Price'] - target_price).abs()
|
76 |
-
|
77 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
78 |
|
79 |
def search_with_fallback(query, df, limit=15):
|
80 |
query_parts = query.split()
|
|
|
59 |
return fator_avaliacao
|
60 |
|
61 |
def select_nearest_items(df, query):
|
62 |
+
# Calculate title similarity for relevance
|
|
|
63 |
df['Title_Similarity'] = df['Title'].apply(lambda x: fuzz.WRatio(query, x))
|
64 |
+
# Filter by title similarity threshold
|
65 |
+
df_filtered_by_similarity = df[df['Title_Similarity'] > 70] # Adjust similarity threshold as needed
|
66 |
|
67 |
if df_filtered_by_similarity.empty:
|
68 |
+
# If no closely matching titles are found, return an empty DataFrame
|
69 |
return pd.DataFrame()
|
70 |
|
71 |
+
# Filter by price, considering only those within a reasonable range
|
72 |
reasonable_price_df = df_filtered_by_similarity[df_filtered_by_similarity['Price'] <= df_filtered_by_similarity['Price'].quantile(0.75)]
|
73 |
+
# Calculate distance from target price for sorting
|
74 |
target_price = reasonable_price_df['Price'].mode().min() if not reasonable_price_df['Price'].mode().empty else reasonable_price_df['Price'].median()
|
75 |
reasonable_price_df['Distance'] = (reasonable_price_df['Price'] - target_price).abs()
|
76 |
+
|
77 |
+
# Initialize container for selected items ensuring different marketplaces
|
78 |
+
selected_items = []
|
79 |
+
included_marketplaces = set()
|
80 |
+
|
81 |
+
# Sort by distance to target price, then by title similarity
|
82 |
+
df_sorted = reasonable_price_df.sort_values(['Distance', 'Title_Similarity'], ascending=[True, False])
|
83 |
+
|
84 |
+
for _, row in df_sorted.iterrows():
|
85 |
+
marketplace = row['Marketplace']
|
86 |
+
if marketplace not in included_marketplaces:
|
87 |
+
selected_items.append(row)
|
88 |
+
included_marketplaces.add(marketplace)
|
89 |
+
if len(selected_items) >= 5:
|
90 |
+
break
|
91 |
+
|
92 |
+
return pd.DataFrame(selected_items)
|
93 |
|
94 |
def search_with_fallback(query, df, limit=15):
|
95 |
query_parts = query.split()
|