Spaces:
Sleeping
Sleeping
fschwartzer
commited on
Commit
•
5ef4720
1
Parent(s):
2755612
Update app.py
Browse files
app.py
CHANGED
@@ -34,12 +34,12 @@ def refinar_resultados(df, exclude_word="conjunto", include_word=False):
|
|
34 |
|
35 |
return df_refinado
|
36 |
|
37 |
-
def get_best_match(query, choices, limit=
|
38 |
# Using RapidFuzz for improved performance and fuzzy matching
|
39 |
matches = process.extract(query, choices, scorer=fuzz.WRatio, limit=limit)
|
40 |
return [match[0] for match in matches if match[1] > 50]
|
41 |
|
42 |
-
def filtrar_itens_similares(df, termo_pesquisa, limit=
|
43 |
titulos = df['Title'].tolist()
|
44 |
titulos_similares = get_best_match(termo_pesquisa, titulos, limit=limit)
|
45 |
df_filtrado = df[df['Title'].isin(titulos_similares)]
|
@@ -78,17 +78,17 @@ def select_nearest_items(df, query):
|
|
78 |
results = []
|
79 |
|
80 |
for _, row in df_sorted.iterrows():
|
81 |
-
if row['Marketplace'] not in marketplaces_selected and len(marketplaces_selected) <
|
82 |
results.append(row)
|
83 |
marketplaces_selected.add(row['Marketplace'])
|
84 |
|
85 |
-
if len(results) >=
|
86 |
break
|
87 |
|
88 |
return pd.DataFrame(results)
|
89 |
|
90 |
|
91 |
-
def search_with_fallback(query, df, limit=
|
92 |
query_parts = query.split()
|
93 |
include_conjunto = "conjunto" in query.lower()
|
94 |
|
@@ -104,7 +104,9 @@ def search_with_fallback(query, df, limit=15):
|
|
104 |
|
105 |
def integrated_app(query, titulo, EC, PU):
|
106 |
df_mercadolibre = fetch_data_to_dataframe(query, 50, "mercadolibre")
|
|
|
107 |
df_combined = pd.concat([df_mercadolibre, data_crawler], ignore_index=True)
|
|
|
108 |
|
109 |
if df_combined.empty:
|
110 |
return "Nenhum dado encontrado. Tente uma consulta diferente.", pd.DataFrame()
|
|
|
34 |
|
35 |
return df_refinado
|
36 |
|
37 |
+
def get_best_match(query, choices, limit=50):
|
38 |
# Using RapidFuzz for improved performance and fuzzy matching
|
39 |
matches = process.extract(query, choices, scorer=fuzz.WRatio, limit=limit)
|
40 |
return [match[0] for match in matches if match[1] > 50]
|
41 |
|
42 |
+
def filtrar_itens_similares(df, termo_pesquisa, limit=50):
|
43 |
titulos = df['Title'].tolist()
|
44 |
titulos_similares = get_best_match(termo_pesquisa, titulos, limit=limit)
|
45 |
df_filtrado = df[df['Title'].isin(titulos_similares)]
|
|
|
78 |
results = []
|
79 |
|
80 |
for _, row in df_sorted.iterrows():
|
81 |
+
if row['Marketplace'] not in marketplaces_selected and len(marketplaces_selected) < 5:
|
82 |
results.append(row)
|
83 |
marketplaces_selected.add(row['Marketplace'])
|
84 |
|
85 |
+
if len(results) >= 5:
|
86 |
break
|
87 |
|
88 |
return pd.DataFrame(results)
|
89 |
|
90 |
|
91 |
+
def search_with_fallback(query, df, limit=50):
|
92 |
query_parts = query.split()
|
93 |
include_conjunto = "conjunto" in query.lower()
|
94 |
|
|
|
104 |
|
105 |
def integrated_app(query, titulo, EC, PU):
|
106 |
df_mercadolibre = fetch_data_to_dataframe(query, 50, "mercadolibre")
|
107 |
+
print(df_mercadolibre)
|
108 |
df_combined = pd.concat([df_mercadolibre, data_crawler], ignore_index=True)
|
109 |
+
print(df_combined)
|
110 |
|
111 |
if df_combined.empty:
|
112 |
return "Nenhum dado encontrado. Tente uma consulta diferente.", pd.DataFrame()
|