Spaces:
Sleeping
Sleeping
File size: 7,577 Bytes
c5a97b3 c038e47 c5a97b3 99aed09 53ae920 1a5aa7c b4cb87b 99aed09 2598bbd 7a6bd46 2598bbd 601b79b 2598bbd 7a6bd46 2c05d82 fb309c7 de13740 2237b4d 2c05d82 3992853 5ef4720 2237b4d 5d4f445 78633a0 07df588 78633a0 5ef4720 78633a0 2237b4d c5a97b3 2fe5b5e d3db32a 601b79b d3db32a 601b79b 99aed09 601b79b aff5c22 f308877 f3ff845 6baa204 2237b4d 78633a0 6baa204 f555ad2 6baa204 f555ad2 6baa204 f555ad2 5ef4720 6baa204 5ef4720 f555ad2 6baa204 2237b4d 5ef4720 2237b4d 2c05d82 2237b4d 2c05d82 2237b4d 2c05d82 2237b4d c1badbd 2fe5b5e 2237b4d 5ef4720 074db95 5ef4720 2237b4d 074db95 2598bbd c5a97b3 2c05d82 2237b4d 78ac961 f2141ad 2c05d82 2237b4d 601b79b 2237b4d 601b79b 99aed09 3bd1e98 f308877 601b79b 4d527e1 601b79b c4f8c41 205d4a0 601b79b 3bd1e98 78ac961 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 |
import gradio as gr
import requests
import pandas as pd
from rapidfuzz import process, fuzz
bens_df = pd.read_excel('bens_tab.xlsx')
data_crawler = pd.read_csv('data_crawler.csv', index_col=False)
data_crawler = data_crawler[['Title', 'Price', 'Currency', 'Condition', 'Link', 'Marketplace']]
def fetch_data_to_dataframe(query, limit=50, source="mercadolibre"):
if source == "mercadolibre":
BASE_URL = "https://api.mercadolibre.com/sites/MLB/search"
params = {'q': query, 'limit': limit}
response = requests.get(BASE_URL, params=params)
if response.status_code == 200:
data = response.json()
items = data.get('results', [])
df = pd.DataFrame(items)[['title', 'price', 'currency_id', 'condition', 'permalink']]
df.columns = ['Title', 'Price', 'Currency', 'Condition', 'Link']
df['Marketplace'] = "Mercado Livre"
return df
return pd.DataFrame()
def refinar_resultados(df, exclude_word="conjunto", include_word=False):
df['Title'] = df['Title'].astype(str)
df_refinado = df[~df['Title'].str.contains("kit", case=False, na=False)]
df_refinado = df[~df['Title'].str.contains("conj", case=False, na=False)]
df_refinado = df[~df['Title'].str.contains(" e ", case=False, na=False)]
padrao_unidades = r'\b(\d+)\s*(unidade|unidades|pacote|pacotes|caixa|caixas)\b'
df_refinado = df_refinado[~df_refinado['Title'].str.contains(padrao_unidades, case=False, regex=True)]
if not include_word:
# Exclude results containing "conjunto" if it's not part of the original query
df_refinado = df_refinado[~df_refinado['Title'].str.contains(exclude_word, case=False)]
return df_refinado
def get_best_match(query, choices, limit=50):
# Using RapidFuzz for improved performance and fuzzy matching
matches = process.extract(query, choices, scorer=fuzz.WRatio, limit=limit)
return [match[0] for match in matches if match[1] > 85]
def match_query_words_in_titles(query, title):
"""
Check if all words in the query have a close match within the title.
Returns True if all words match to a certain degree; False otherwise.
"""
query_words = query.lower().split()
match_threshold = 80 # Adjust this threshold as needed
for word in query_words:
# Find the best match for each word in the query within the title
match_score = fuzz.partial_ratio(word, title.lower())
if match_score < match_threshold:
return False # If any word doesn't match well enough, return False
return True # All words matched well enough
def filtrar_itens_similares(df, termo_pesquisa, limit=50):
# Apply the match function to each title, filtering for those that match the query words
matches = df['Title'].apply(lambda title: match_query_words_in_titles(termo_pesquisa, title))
df_filtrado = df[matches]
# Further refine the list to the top N matches based on overall similarity to the query
df_filtrado['Overall_Similarity'] = df_filtrado['Title'].apply(lambda title: fuzz.WRatio(termo_pesquisa, title))
df_filtrado = df_filtrado.sort_values('Overall_Similarity', ascending=False).head(limit)
return df_filtrado
def calcular_fator_avaliacao(titulo, EC, PU):
filtered_df = bens_df[bens_df['TITULO'] == titulo]
if filtered_df.empty:
return None # Or handle the error as needed
bem_info = filtered_df.iloc[0]
VU, VR = bem_info['VIDA_UTIL'], bem_info['VALOR_RESIDUAL']
ec_pontuacao = {'Excelente': 10, 'Bom': 8, 'Regular': 5, 'Péssimo': 2}[EC]
PU, PVU, PUB = float(PU), min(10 - ((PU - 1) * (10 / VU)), 10), min(10 - (((VU - PU) - 1) * (10 / VU)), 10)
fator_avaliacao = max((4 * ec_pontuacao + 6 * PVU - 3 * PUB) / 100, VR)
return fator_avaliacao
def select_nearest_items(df, query):
# Lower the title similarity threshold if necessary
df['Title_Similarity'] = df['Title'].apply(lambda x: fuzz.WRatio(query, x))
df_filtered = df[df['Title_Similarity'] > 85] # Adjusted threshold
# Calculate mode price in a more inclusive manner
mode_price = df_filtered['Price'].mode()
if mode_price.empty:
target_price = df_filtered['Price'].median()
else:
target_price = mode_price.min()
df_filtered['Distance'] = (df_filtered['Price'] - target_price).abs()
df_sorted = df_filtered.sort_values(['Distance', 'Title_Similarity'], ascending=[True, False])
# Ensure diversity in marketplaces
marketplaces_selected = set()
results = []
for _, row in df_sorted.iterrows():
if row['Marketplace'] not in marketplaces_selected and len(marketplaces_selected) < 5:
results.append(row)
marketplaces_selected.add(row['Marketplace'])
if len(results) >= 5:
break
return pd.DataFrame(results)
def search_with_fallback(query, df, limit=50):
query_parts = query.split()
include_conjunto = "conjunto" in query.lower()
for i in range(len(query_parts), 0, -1):
simplified_query = " ".join(query_parts[:i])
df_refinado = refinar_resultados(df, include_word=include_conjunto)
df_filtrado = filtrar_itens_similares(df_refinado, simplified_query, limit=limit)
if not df_filtrado.empty:
return df_filtrado
return pd.DataFrame()
def integrated_app(query, titulo, EC, PU):
df_mercadolibre = fetch_data_to_dataframe(query, 50, "mercadolibre")
print(df_mercadolibre)
df_combined = pd.concat([df_mercadolibre, data_crawler], ignore_index=True)
print(df_combined)
if df_combined.empty:
return "Nenhum dado encontrado. Tente uma consulta diferente.", pd.DataFrame()
# Pass whether "conjunto" is part of the original query
include_conjunto = "conjunto" in query.lower()
df_refined = refinar_resultados(df_combined, include_word=include_conjunto)
df_similares = search_with_fallback(query, df_refined)
if df_similares.empty:
return "Nenhum item similar encontrado.", pd.DataFrame()
df_nearest = select_nearest_items(df_similares, query)
if df_nearest.empty:
return "Nenhum resultado próximo encontrado.", pd.DataFrame()
fator_avaliacao = calcular_fator_avaliacao(titulo, EC, PU)
valor_avaliacao = df_nearest['Price'].mean() * fator_avaliacao
return f"Valor Médio do Bem: R$ {df_nearest['Price'].mean():.2f}, Fator de Avaliação: {fator_avaliacao*100:.2f}%, Valor de Avaliação: R$ {valor_avaliacao:.2f}", df_nearest
iface = gr.Interface(fn=integrated_app,
inputs=[gr.Textbox(label="Digite sua consulta"),
gr.Dropdown(label="Classificação Contábil do Bem", choices=bens_df['TITULO'].unique().tolist(), value="MOBILIÁRIO EM GERAL"),
gr.Radio(label="Estado de Conservação do Bem", choices=['Excelente', 'Bom', 'Regular', 'Péssimo'], value="Excelente"),
gr.Number(label="Período utilizado (anos)", value=1)],
outputs=[gr.Textbox(label="Cálculo"), gr.Dataframe(label="Resultados da Pesquisa")],
theme=gr.themes.Monochrome(),
title="<span style='color: gray; font-size: 48px;'>Avaliação de Bens Móveis</span>",
description="""<p style="text-align: left;"><b><span style='color: gray; font-size: 40px;'>aval</span><span style='color: black; font-size: 40px;'>ia</span><span style='color: gray; font-size: 40px;'>.se</b></p>""")
iface.launch()
|