Spaces:
Sleeping
Sleeping
File size: 9,493 Bytes
c5a97b3 c038e47 3c5c245 c5a97b3 99aed09 53ae920 1a5aa7c b4cb87b 99aed09 3c5c245 2598bbd 7a6bd46 2598bbd 601b79b 2598bbd 7a6bd46 1efcf05 2c05d82 905b603 489461c 905b603 1efcf05 905b603 3992853 905b603 5ef4720 2237b4d 5d4f445 94b2524 07df588 78633a0 94b2524 78633a0 5ef4720 78633a0 2237b4d c5a97b3 2fe5b5e d3db32a 601b79b d3db32a 601b79b 99aed09 601b79b aff5c22 f308877 f3ff845 6baa204 2237b4d 94b2524 6baa204 f555ad2 6baa204 f555ad2 6baa204 f555ad2 5ef4720 6baa204 5ef4720 f555ad2 6baa204 2237b4d 5ef4720 2237b4d 2c05d82 2237b4d 7c67564 2c05d82 2237b4d 2c05d82 2237b4d c1badbd 7c67564 ea98fbe 59e4db6 2237b4d 074db95 2237b4d 074db95 2598bbd c5a97b3 2c05d82 ae59a9a 59e4db6 e5b70df 2237b4d 78ac961 f2141ad 2c05d82 2237b4d 601b79b fbf4f69 550146d 94b2524 550146d 2237b4d 601b79b 3c5c245 99aed09 59e4db6 3bd1e98 f308877 601b79b 4d527e1 fbf4f69 550146d 3c5c245 205d4a0 601b79b 3bd1e98 550146d 78ac961 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 |
import gradio as gr
import requests
import pandas as pd
from rapidfuzz import process, fuzz
from docx import Document
bens_df = pd.read_excel('bens_tab.xlsx')
data_crawler = pd.read_csv('data_crawler.csv', index_col=False)
data_crawler = data_crawler[['Title', 'Price', 'Currency', 'Condition', 'Link', 'Marketplace']]
def get_report(query):
doc = Document()
# Adding a title
doc.add_heading('RELATÓRIO DE AVALIAÇÃO DE VALOR DE MERCADO – EXP.', level=1)
# Adding some details
doc.add_paragraph('Solicitante:')
doc.add_paragraph('Interessado:')
doc.add_paragraph('Proprietário:')
doc.add_paragraph('Objetivo da Avaliação: Definir o valor de mercado dos bens abaixo discriminados para registro no Patrimônio do Município.')
# Adding section heading and content
doc.add_heading('1. Características do(s) Avaliando(s):', level=2)
doc.add_paragraph('1.1 Avaliando 01:')
doc.add_paragraph(f'Descrição: {query}')
# Add evaluation section
doc.add_heading('2. Avaliação:', level=2)
doc.add_paragraph('2.1 Avaliando 01')
# Add market research section
doc.add_heading('3. Pesquisa de Mercado:', level=2)
today_date = datetime.now().strftime('%d/%m/%Y')
doc.add_paragraph(f'Lajeado/RS, {today_date}')
doc.add_paragraph('Membros da Comissão:')
# Save the document
doc.save('laudo.docx')
def fetch_data_to_dataframe(query, limit=50, source="mercadolibre"):
if source == "mercadolibre":
BASE_URL = "https://api.mercadolibre.com/sites/MLB/search"
params = {'q': query, 'limit': limit}
response = requests.get(BASE_URL, params=params)
if response.status_code == 200:
data = response.json()
items = data.get('results', [])
df = pd.DataFrame(items)[['title', 'price', 'currency_id', 'condition', 'permalink']]
df.columns = ['Title', 'Price', 'Currency', 'Condition', 'Link']
df['Marketplace'] = "Mercado Livre"
return df
return pd.DataFrame()
def refinar_resultados(df, include_word=[]):
df['Title'] = df['Title'].astype(str)
# Define a list of keywords to exclude, indicating multiples
exclude_keywords = ["kit", "conjunto", "pacote", "caixa", "unidades", "Kits", " e "]
# Add conditional exclusion for words not included in the query
exclude_patterns = [keyword for keyword in exclude_keywords if keyword not in include_word]
# Combine all exclude patterns into a single regex pattern
exclude_pattern = r'\b(' + '|'.join(exclude_patterns) + r')\b|\b(\d+)\s*(unidade|pacotes|caixas)\b'
# Perform the filtering in one operation
df_refinado = df[~df['Title'].str.contains(exclude_pattern, case=False, regex=True, na=False)]
return df_refinado
def get_best_match(query, choices, limit=50):
# Using RapidFuzz for improved performance and fuzzy matching
matches = process.extract(query, choices, scorer=fuzz.WRatio, limit=limit)
return [match[0] for match in matches if match[1] > 75]
def match_query_words_in_titles(query, title):
"""
Check if all words in the query have a close match within the title.
Returns True if all words match to a certain degree; False otherwise.
"""
query_words = query.lower().split()
match_threshold = 75 # Adjust this threshold as needed
for word in query_words:
# Find the best match for each word in the query within the title
match_score = fuzz.partial_ratio(word, title.lower())
if match_score < match_threshold:
return False # If any word doesn't match well enough, return False
return True # All words matched well enough
def filtrar_itens_similares(df, termo_pesquisa, limit=50):
# Apply the match function to each title, filtering for those that match the query words
matches = df['Title'].apply(lambda title: match_query_words_in_titles(termo_pesquisa, title))
df_filtrado = df[matches]
# Further refine the list to the top N matches based on overall similarity to the query
df_filtrado['Overall_Similarity'] = df_filtrado['Title'].apply(lambda title: fuzz.WRatio(termo_pesquisa, title))
df_filtrado = df_filtrado.sort_values('Overall_Similarity', ascending=False).head(limit)
return df_filtrado
def calcular_fator_avaliacao(titulo, EC, PU):
filtered_df = bens_df[bens_df['TITULO'] == titulo]
if filtered_df.empty:
return None # Or handle the error as needed
bem_info = filtered_df.iloc[0]
VU, VR = bem_info['VIDA_UTIL'], bem_info['VALOR_RESIDUAL']
ec_pontuacao = {'Excelente': 10, 'Bom': 8, 'Regular': 5, 'Péssimo': 2}[EC]
PU, PVU, PUB = float(PU), min(10 - ((PU - 1) * (10 / VU)), 10), min(10 - (((VU - PU) - 1) * (10 / VU)), 10)
fator_avaliacao = max((4 * ec_pontuacao + 6 * PVU - 3 * PUB) / 100, VR)
return fator_avaliacao
def select_nearest_items(df, query):
# Lower the title similarity threshold if necessary
df['Title_Similarity'] = df['Title'].apply(lambda x: fuzz.WRatio(query, x))
df_filtered = df[df['Title_Similarity'] > 75] # Adjusted threshold
# Calculate mode price in a more inclusive manner
mode_price = df_filtered['Price'].mode()
if mode_price.empty:
target_price = df_filtered['Price'].median()
else:
target_price = mode_price.min()
df_filtered['Distance'] = (df_filtered['Price'] - target_price).abs()
df_sorted = df_filtered.sort_values(['Distance', 'Title_Similarity'], ascending=[True, False])
# Ensure diversity in marketplaces
marketplaces_selected = set()
results = []
for _, row in df_sorted.iterrows():
if row['Marketplace'] not in marketplaces_selected and len(marketplaces_selected) < 5:
results.append(row)
marketplaces_selected.add(row['Marketplace'])
if len(results) >= 5:
break
return pd.DataFrame(results)
def search_with_fallback(query, df, limit=50):
query_parts = query.split()
include_conjunto = "conjunto" in query.lower()
for i in range(len(query_parts), 0, -1):
simplified_query = " ".join(query_parts[:i])
include_word = ["conjunto"] if include_conjunto else [] # Ensure include_word is a list
df_refinado = refinar_resultados(df, include_word=include_word)
df_filtrado = filtrar_itens_similares(df_refinado, simplified_query, limit=limit)
if not df_filtrado.empty:
return df_filtrado
return pd.DataFrame()
def integrated_app(query, titulo, EC, PU, selected_rows):
include_conjunto = "conjunto" in query.lower()
df_mercadolibre = fetch_data_to_dataframe(query, 50, "mercadolibre")
df_combined = pd.concat([df_mercadolibre, data_crawler], ignore_index=True)
if df_combined.empty:
return "Nenhum dado encontrado. Tente uma consulta diferente.", pd.DataFrame()
# Pass whether "conjunto" is part of the original query
include_word = ["conjunto"] if include_conjunto else []
df_refined = refinar_resultados(df_combined, include_word=include_word)
df_similares = search_with_fallback(query, df_refined)
if df_similares.empty:
return "Nenhum item similar encontrado.", pd.DataFrame()
df_nearest = select_nearest_items(df_similares, query)
if df_nearest.empty:
return "Nenhum resultado próximo encontrado.", pd.DataFrame()
if selected_rows.strip():
selected_indices = [int(idx) for idx in selected_rows.split(',') if idx.isdigit()]
df_nearest = df_nearest.iloc[selected_indices]
# After calculating df_nearest and before returning it
df_nearest.reset_index(drop=True, inplace=True)
df_nearest['ID'] = df_nearest.index
# Reorder DataFrame columns to make ID first
column_order = ['ID'] + [col for col in df_nearest.columns if col != 'ID']
df_nearest = df_nearest[column_order]
fator_avaliacao = calcular_fator_avaliacao(titulo, EC, PU)
valor_avaliacao = df_nearest['Price'].mean() * fator_avaliacao
get_report(query)
return f"Valor Médio do Bem: R$ {df_nearest['Price'].mean():.2f}, Fator de Avaliação: {fator_avaliacao*100:.2f}%, Valor de Avaliação: R$ {valor_avaliacao:.2f}", df_nearest, 'laudo.docx'
iface = gr.Interface(fn=integrated_app,
inputs=[gr.Textbox(label="Digite sua consulta"),
gr.Dropdown(label="Classificação Contábil do Bem", choices=bens_df['TITULO'].unique().tolist(), value="MOBILIÁRIO EM GERAL"),
gr.Radio(label="Estado de Conservação do Bem", choices=['Excelente', 'Bom', 'Regular', 'Péssimo'], value="Excelente"),
gr.Number(label="Período utilizado (anos)", value=1),
gr.Textbox(label="IDs das linhas selecionadas (separadas por vírgula)", placeholder="Exemplo: 0,2,5")], # Using Textbox for IDs
outputs=[gr.Textbox(label="Cálculo"), gr.Dataframe(label="Resultados da Pesquisa"), gr.File(label="Laudo de Avaliação")],
theme=gr.themes.Monochrome(),
title="<span style='color: gray; font-size: 48px;'>Avaliação de Bens Móveis</span>",
description="""<p style="text-align: left;"><b><span style='color: gray; font-size: 40px;'>aval</span><span style='color: black; font-size: 40px;'>ia</span><span style='color: gray; font-size: 40px;'>.se</b></p>""")
iface.launch()
|