Spaces:
Sleeping
Sleeping
File size: 6,562 Bytes
c5a97b3 c038e47 c5a97b3 99aed09 53ae920 1a5aa7c b4cb87b 99aed09 2598bbd 7a6bd46 2598bbd 601b79b 2598bbd 7a6bd46 3992853 2237b4d fb309c7 2237b4d 3992853 f691033 2237b4d 5d4f445 601b79b 07df588 f691033 2237b4d c5a97b3 2fe5b5e d3db32a 601b79b d3db32a 601b79b 99aed09 601b79b aff5c22 f308877 2237b4d c1badbd 2fe5b5e 2237b4d 074db95 2237b4d 074db95 2598bbd c5a97b3 074db95 2237b4d 78ac961 f2141ad 2237b4d 601b79b 2237b4d 601b79b 99aed09 3bd1e98 f308877 601b79b 4d527e1 601b79b c4f8c41 205d4a0 601b79b 3bd1e98 78ac961 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 |
import gradio as gr
import requests
import pandas as pd
from rapidfuzz import process, fuzz
bens_df = pd.read_excel('bens_tab.xlsx')
data_crawler = pd.read_csv('data_crawler.csv', index_col=False)
data_crawler = data_crawler[['Title', 'Price', 'Currency', 'Condition', 'Link', 'Marketplace']]
def fetch_data_to_dataframe(query, limit=50, source="mercadolibre"):
if source == "mercadolibre":
BASE_URL = "https://api.mercadolibre.com/sites/MLB/search"
params = {'q': query, 'limit': limit}
response = requests.get(BASE_URL, params=params)
if response.status_code == 200:
data = response.json()
items = data.get('results', [])
df = pd.DataFrame(items)[['title', 'price', 'currency_id', 'condition', 'permalink']]
df.columns = ['Title', 'Price', 'Currency', 'Condition', 'Link']
df['Marketplace'] = "Mercado Livre"
return df
return pd.DataFrame()
def refinar_resultados(df):
# Ensure 'Title' is treated as a string and handle NaN values by replacing them with an empty string
df['Title'] = df['Title'].astype(str).fillna('')
# Now apply your filtering condition
df_refinado = df[~df['Title'].str.contains("kit", case=False, na=False)]
padrao_unidades = r'\b(\d+)\s*(unidade|unidades|pacote|pacotes|caixa|caixas)\b'
# Since 'Title' is ensured to be a string, this should not raise the TypeError
df_refinado = df_refinado[~df_refinado['Title'].str.contains(padrao_unidades, case=False, regex=True)]
return df_refinado
def get_best_match(query, choices, limit=15):
# Using RapidFuzz for improved performance and fuzzy matching
matches = process.extract(query, choices, scorer=fuzz.WRatio, limit=limit)
return [match[0] for match in matches if match[1] > 70]
def filtrar_itens_similares(df, termo_pesquisa, limit=15):
titulos = df['Title'].tolist()
titulos_similares = get_best_match(termo_pesquisa, titulos, limit=limit)
df_filtrado = df[df['Title'].isin(titulos_similares)]
return df_filtrado
def calcular_fator_avaliacao(titulo, EC, PU):
filtered_df = bens_df[bens_df['TITULO'] == titulo]
if filtered_df.empty:
return None # Or handle the error as needed
bem_info = filtered_df.iloc[0]
VU, VR = bem_info['VIDA_UTIL'], bem_info['VALOR_RESIDUAL']
ec_pontuacao = {'Excelente': 10, 'Bom': 8, 'Regular': 5, 'Péssimo': 2}[EC]
PU, PVU, PUB = float(PU), min(10 - ((PU - 1) * (10 / VU)), 10), min(10 - (((VU - PU) - 1) * (10 / VU)), 10)
fator_avaliacao = max((4 * ec_pontuacao + 6 * PVU - 3 * PUB) / 100, VR)
return fator_avaliacao
def select_nearest_items(df, query):
# Implement a more refined selection process
# First, filter by title similarity to ensure relevance
df['Title_Similarity'] = df['Title'].apply(lambda x: fuzz.WRatio(query, x))
df_filtered_by_similarity = df[df['Title_Similarity'] > 70] # Adjust similarity threshold
if df_filtered_by_similarity.empty:
# Fallback to broader criteria if no closely matching titles are found
return pd.DataFrame()
# Then, select items based on price, considering only those within a reasonable range
reasonable_price_df = df_filtered_by_similarity[df_filtered_by_similarity['Price'] <= df_filtered_by_similarity['Price'].quantile(0.75)]
target_price = reasonable_price_df['Price'].mode().min() if not reasonable_price_df['Price'].mode().empty else reasonable_price_df['Price'].median()
reasonable_price_df['Distance'] = (reasonable_price_df['Price'] - target_price).abs()
return reasonable_price_df.sort_values(['Distance', 'Title_Similarity'], ascending=[True, False]).head(5)
def search_with_fallback(query, df, limit=15):
# Start with the most specific query and progressively simplify it
query_parts = query.split()
for i in range(len(query_parts), 0, -1):
# Construct a simplified query by progressively removing the least important terms
simplified_query = " ".join(query_parts[:i])
df_filtrado = filtrar_itens_similares(df, simplified_query, limit=limit)
if not df_filtrado.empty:
# Return the filtered DataFrame as soon as we get any results
return df_filtrado
# If no results are found for any simplification of the query, return an empty DataFrame
return pd.DataFrame()
def integrated_app(query, titulo, EC, PU):
df_mercadolibre = fetch_data_to_dataframe(query, 50, "mercadolibre")
df_combined = pd.concat([df_mercadolibre, data_crawler], ignore_index=True)
if df_combined.empty:
return "Nenhum dado encontrado. Tente uma consulta diferente.", pd.DataFrame()
df_refined = refinar_resultados(df_combined)
df_similares = search_with_fallback(query, df_refined)
if df_similares.empty:
return "Nenhum item similar encontrado.", pd.DataFrame()
df_nearest = select_nearest_items(df_similares, query) # Ensure this function is adapted to use the query for relevance
if df_nearest.empty:
return "Nenhum resultado próximo encontrado.", pd.DataFrame()
# Calculate valuation factor and final valuation based on the nearest items
fator_avaliacao = calcular_fator_avaliacao(titulo, EC, PU)
valor_avaliacao = df_nearest['Price'].mean() * fator_avaliacao
return f"Valor Médio do Bem: R$ {df_nearest['Price'].mean():.2f}, Fator de Avaliação: {fator_avaliacao*100:.2f}%, Valor de Avaliação: R$ {valor_avaliacao:.2f}", df_nearest
iface = gr.Interface(fn=integrated_app,
inputs=[gr.Textbox(label="Digite sua consulta"),
gr.Dropdown(label="Classificação Contábil do Bem", choices=bens_df['TITULO'].unique().tolist(), value="MOBILIÁRIO EM GERAL"),
gr.Radio(label="Estado de Conservação do Bem", choices=['Excelente', 'Bom', 'Regular', 'Péssimo'], value="Excelente"),
gr.Number(label="Período utilizado (anos)", value=1)],
outputs=[gr.Textbox(label="Cálculo"), gr.Dataframe(label="Resultados da Pesquisa")],
theme=gr.themes.Monochrome(),
title="<span style='color: gray; font-size: 48px;'>Avaliação de Bens Móveis</span>",
description="""<p style="text-align: left;"><b><span style='color: gray; font-size: 40px;'>aval</span><span style='color: black; font-size: 40px;'>ia</span><span style='color: gray; font-size: 40px;'>.se</b></p>""")
iface.launch()
|