Spaces:
Sleeping
Sleeping
File size: 7,629 Bytes
c5a97b3 c038e47 c5a97b3 99aed09 53ae920 1a5aa7c b4cb87b 99aed09 2598bbd 7a6bd46 2598bbd 601b79b 2598bbd 7a6bd46 1efcf05 2c05d82 905b603 1efcf05 905b603 3992853 905b603 5ef4720 2237b4d 5d4f445 905b603 07df588 78633a0 5ef4720 78633a0 2237b4d c5a97b3 2fe5b5e d3db32a 601b79b d3db32a 601b79b 99aed09 601b79b aff5c22 f308877 f3ff845 6baa204 2237b4d 905b603 6baa204 f555ad2 6baa204 f555ad2 6baa204 f555ad2 5ef4720 6baa204 5ef4720 f555ad2 6baa204 2237b4d 5ef4720 2237b4d 2c05d82 2237b4d 7c67564 2c05d82 2237b4d 2c05d82 2237b4d c1badbd 7c67564 2fe5b5e 59e4db6 2237b4d 074db95 2237b4d 074db95 2598bbd c5a97b3 2c05d82 ae59a9a 59e4db6 e5b70df 2237b4d 78ac961 f2141ad 2c05d82 2237b4d 601b79b 2237b4d 601b79b 99aed09 59e4db6 3bd1e98 f308877 601b79b 4d527e1 601b79b c4f8c41 205d4a0 601b79b 3bd1e98 78ac961 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 |
import gradio as gr
import requests
import pandas as pd
from rapidfuzz import process, fuzz
bens_df = pd.read_excel('bens_tab.xlsx')
data_crawler = pd.read_csv('data_crawler.csv', index_col=False)
data_crawler = data_crawler[['Title', 'Price', 'Currency', 'Condition', 'Link', 'Marketplace']]
def fetch_data_to_dataframe(query, limit=50, source="mercadolibre"):
if source == "mercadolibre":
BASE_URL = "https://api.mercadolibre.com/sites/MLB/search"
params = {'q': query, 'limit': limit}
response = requests.get(BASE_URL, params=params)
if response.status_code == 200:
data = response.json()
items = data.get('results', [])
df = pd.DataFrame(items)[['title', 'price', 'currency_id', 'condition', 'permalink']]
df.columns = ['Title', 'Price', 'Currency', 'Condition', 'Link']
df['Marketplace'] = "Mercado Livre"
return df
return pd.DataFrame()
def refinar_resultados(df, include_word=[]):
df['Title'] = df['Title'].astype(str)
# Define a list of keywords to exclude, indicating multiples
exclude_keywords = ["kit", "conjunto", "pacote", "caixa", "unidades"]
# Add conditional exclusion for words not included in the query
exclude_patterns = [keyword for keyword in exclude_keywords if keyword not in include_word]
# Combine all exclude patterns into a single regex pattern
exclude_pattern = r'\b(' + '|'.join(exclude_patterns) + r')\b|\b(\d+)\s*(unidade|pacotes|caixas)\b'
# Perform the filtering in one operation
df_refinado = df[~df['Title'].str.contains(exclude_pattern, case=False, regex=True, na=False)]
return df_refinado
def get_best_match(query, choices, limit=50):
# Using RapidFuzz for improved performance and fuzzy matching
matches = process.extract(query, choices, scorer=fuzz.WRatio, limit=limit)
return [match[0] for match in matches if match[1] > 65]
def match_query_words_in_titles(query, title):
"""
Check if all words in the query have a close match within the title.
Returns True if all words match to a certain degree; False otherwise.
"""
query_words = query.lower().split()
match_threshold = 80 # Adjust this threshold as needed
for word in query_words:
# Find the best match for each word in the query within the title
match_score = fuzz.partial_ratio(word, title.lower())
if match_score < match_threshold:
return False # If any word doesn't match well enough, return False
return True # All words matched well enough
def filtrar_itens_similares(df, termo_pesquisa, limit=50):
# Apply the match function to each title, filtering for those that match the query words
matches = df['Title'].apply(lambda title: match_query_words_in_titles(termo_pesquisa, title))
df_filtrado = df[matches]
# Further refine the list to the top N matches based on overall similarity to the query
df_filtrado['Overall_Similarity'] = df_filtrado['Title'].apply(lambda title: fuzz.WRatio(termo_pesquisa, title))
df_filtrado = df_filtrado.sort_values('Overall_Similarity', ascending=False).head(limit)
return df_filtrado
def calcular_fator_avaliacao(titulo, EC, PU):
filtered_df = bens_df[bens_df['TITULO'] == titulo]
if filtered_df.empty:
return None # Or handle the error as needed
bem_info = filtered_df.iloc[0]
VU, VR = bem_info['VIDA_UTIL'], bem_info['VALOR_RESIDUAL']
ec_pontuacao = {'Excelente': 10, 'Bom': 8, 'Regular': 5, 'Péssimo': 2}[EC]
PU, PVU, PUB = float(PU), min(10 - ((PU - 1) * (10 / VU)), 10), min(10 - (((VU - PU) - 1) * (10 / VU)), 10)
fator_avaliacao = max((4 * ec_pontuacao + 6 * PVU - 3 * PUB) / 100, VR)
return fator_avaliacao
def select_nearest_items(df, query):
# Lower the title similarity threshold if necessary
df['Title_Similarity'] = df['Title'].apply(lambda x: fuzz.WRatio(query, x))
df_filtered = df[df['Title_Similarity'] > 65] # Adjusted threshold
# Calculate mode price in a more inclusive manner
mode_price = df_filtered['Price'].mode()
if mode_price.empty:
target_price = df_filtered['Price'].median()
else:
target_price = mode_price.min()
df_filtered['Distance'] = (df_filtered['Price'] - target_price).abs()
df_sorted = df_filtered.sort_values(['Distance', 'Title_Similarity'], ascending=[True, False])
# Ensure diversity in marketplaces
marketplaces_selected = set()
results = []
for _, row in df_sorted.iterrows():
if row['Marketplace'] not in marketplaces_selected and len(marketplaces_selected) < 5:
results.append(row)
marketplaces_selected.add(row['Marketplace'])
if len(results) >= 5:
break
return pd.DataFrame(results)
def search_with_fallback(query, df, limit=50):
query_parts = query.split()
include_conjunto = "conjunto" in query.lower()
for i in range(len(query_parts), 0, -1):
simplified_query = " ".join(query_parts[:i])
include_word = ["conjunto"] if include_conjunto else [] # Ensure include_word is a list
df_refinado = refinar_resultados(df, include_word=include_word)
df_filtrado = filtrar_itens_similares(df_refinado, simplified_query, limit=limit)
if not df_filtrado.empty:
return df_filtrado
return pd.DataFrame()
def integrated_app(query, titulo, EC, PU):
include_conjunto = "conjunto" in query.lower()
df_mercadolibre = fetch_data_to_dataframe(query, 50, "mercadolibre")
df_combined = pd.concat([df_mercadolibre, data_crawler], ignore_index=True)
if df_combined.empty:
return "Nenhum dado encontrado. Tente uma consulta diferente.", pd.DataFrame()
# Pass whether "conjunto" is part of the original query
include_word = ["conjunto"] if include_conjunto else []
df_refined = refinar_resultados(df_combined, include_word=include_word)
df_similares = search_with_fallback(query, df_refined)
if df_similares.empty:
return "Nenhum item similar encontrado.", pd.DataFrame()
df_nearest = select_nearest_items(df_similares, query)
if df_nearest.empty:
return "Nenhum resultado próximo encontrado.", pd.DataFrame()
fator_avaliacao = calcular_fator_avaliacao(titulo, EC, PU)
valor_avaliacao = df_nearest['Price'].mean() * fator_avaliacao
return f"Valor Médio do Bem: R$ {df_nearest['Price'].mean():.2f}, Fator de Avaliação: {fator_avaliacao*100:.2f}%, Valor de Avaliação: R$ {valor_avaliacao:.2f}", df_nearest
iface = gr.Interface(fn=integrated_app,
inputs=[gr.Textbox(label="Digite sua consulta"),
gr.Dropdown(label="Classificação Contábil do Bem", choices=bens_df['TITULO'].unique().tolist(), value="MOBILIÁRIO EM GERAL"),
gr.Radio(label="Estado de Conservação do Bem", choices=['Excelente', 'Bom', 'Regular', 'Péssimo'], value="Excelente"),
gr.Number(label="Período utilizado (anos)", value=1)],
outputs=[gr.Textbox(label="Cálculo"), gr.Dataframe(label="Resultados da Pesquisa")],
theme=gr.themes.Monochrome(),
title="<span style='color: gray; font-size: 48px;'>Avaliação de Bens Móveis</span>",
description="""<p style="text-align: left;"><b><span style='color: gray; font-size: 40px;'>aval</span><span style='color: black; font-size: 40px;'>ia</span><span style='color: gray; font-size: 40px;'>.se</b></p>""")
iface.launch()
|