fschwartzer commited on
Commit
f555ad2
1 Parent(s): 2c05d82

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -8
app.py CHANGED
@@ -59,22 +59,37 @@ def calcular_fator_avaliacao(titulo, EC, PU):
59
  return fator_avaliacao
60
 
61
  def select_nearest_items(df, query):
62
- # Implement a more refined selection process
63
- # First, filter by title similarity to ensure relevance
64
  df['Title_Similarity'] = df['Title'].apply(lambda x: fuzz.WRatio(query, x))
65
- df_filtered_by_similarity = df[df['Title_Similarity'] > 70] # Adjust similarity threshold
 
66
 
67
  if df_filtered_by_similarity.empty:
68
- # Fallback to broader criteria if no closely matching titles are found
69
  return pd.DataFrame()
70
 
71
- # Then, select items based on price, considering only those within a reasonable range
72
  reasonable_price_df = df_filtered_by_similarity[df_filtered_by_similarity['Price'] <= df_filtered_by_similarity['Price'].quantile(0.75)]
73
-
74
  target_price = reasonable_price_df['Price'].mode().min() if not reasonable_price_df['Price'].mode().empty else reasonable_price_df['Price'].median()
75
  reasonable_price_df['Distance'] = (reasonable_price_df['Price'] - target_price).abs()
76
-
77
- return reasonable_price_df.sort_values(['Distance', 'Title_Similarity'], ascending=[True, False]).head(5)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
 
79
  def search_with_fallback(query, df, limit=15):
80
  query_parts = query.split()
 
59
  return fator_avaliacao
60
 
61
  def select_nearest_items(df, query):
62
+ # Calculate title similarity for relevance
 
63
  df['Title_Similarity'] = df['Title'].apply(lambda x: fuzz.WRatio(query, x))
64
+ # Filter by title similarity threshold
65
+ df_filtered_by_similarity = df[df['Title_Similarity'] > 70] # Adjust similarity threshold as needed
66
 
67
  if df_filtered_by_similarity.empty:
68
+ # If no closely matching titles are found, return an empty DataFrame
69
  return pd.DataFrame()
70
 
71
+ # Filter by price, considering only those within a reasonable range
72
  reasonable_price_df = df_filtered_by_similarity[df_filtered_by_similarity['Price'] <= df_filtered_by_similarity['Price'].quantile(0.75)]
73
+ # Calculate distance from target price for sorting
74
  target_price = reasonable_price_df['Price'].mode().min() if not reasonable_price_df['Price'].mode().empty else reasonable_price_df['Price'].median()
75
  reasonable_price_df['Distance'] = (reasonable_price_df['Price'] - target_price).abs()
76
+
77
+ # Initialize container for selected items ensuring different marketplaces
78
+ selected_items = []
79
+ included_marketplaces = set()
80
+
81
+ # Sort by distance to target price, then by title similarity
82
+ df_sorted = reasonable_price_df.sort_values(['Distance', 'Title_Similarity'], ascending=[True, False])
83
+
84
+ for _, row in df_sorted.iterrows():
85
+ marketplace = row['Marketplace']
86
+ if marketplace not in included_marketplaces:
87
+ selected_items.append(row)
88
+ included_marketplaces.add(marketplace)
89
+ if len(selected_items) >= 5:
90
+ break
91
+
92
+ return pd.DataFrame(selected_items)
93
 
94
  def search_with_fallback(query, df, limit=15):
95
  query_parts = query.split()