File size: 6,562 Bytes
c5a97b3
 
 
c038e47
c5a97b3
99aed09
53ae920
1a5aa7c
b4cb87b
99aed09
2598bbd
7a6bd46
 
 
 
2598bbd
 
601b79b
 
 
 
 
2598bbd
7a6bd46
3992853
2237b4d
 
 
 
fb309c7
2237b4d
 
 
 
3992853
 
f691033
2237b4d
5d4f445
601b79b
07df588
f691033
2237b4d
 
 
 
c5a97b3
2fe5b5e
d3db32a
 
601b79b
d3db32a
 
601b79b
 
99aed09
601b79b
aff5c22
f308877
 
2237b4d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c1badbd
2fe5b5e
2237b4d
074db95
2237b4d
074db95
2598bbd
c5a97b3
074db95
2237b4d
 
78ac961
 
f2141ad
2237b4d
 
 
601b79b
2237b4d
 
601b79b
 
99aed09
3bd1e98
f308877
601b79b
4d527e1
601b79b
c4f8c41
205d4a0
601b79b
 
3bd1e98
78ac961
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
import gradio as gr
import requests
import pandas as pd
from rapidfuzz import process, fuzz

bens_df = pd.read_excel('bens_tab.xlsx')

data_crawler = pd.read_csv('data_crawler.csv', index_col=False)
data_crawler = data_crawler[['Title', 'Price', 'Currency', 'Condition', 'Link', 'Marketplace']]

def fetch_data_to_dataframe(query, limit=50, source="mercadolibre"):
    if source == "mercadolibre":
        BASE_URL = "https://api.mercadolibre.com/sites/MLB/search"
        params = {'q': query, 'limit': limit}
        response = requests.get(BASE_URL, params=params)
        if response.status_code == 200:
            data = response.json()
            items = data.get('results', [])
            df = pd.DataFrame(items)[['title', 'price', 'currency_id', 'condition', 'permalink']]
            df.columns = ['Title', 'Price', 'Currency', 'Condition', 'Link']
            df['Marketplace'] = "Mercado Livre"
            return df
    return pd.DataFrame()

def refinar_resultados(df):
    # Ensure 'Title' is treated as a string and handle NaN values by replacing them with an empty string
    df['Title'] = df['Title'].astype(str).fillna('')
    
    # Now apply your filtering condition
    df_refinado = df[~df['Title'].str.contains("kit", case=False, na=False)]
    padrao_unidades = r'\b(\d+)\s*(unidade|unidades|pacote|pacotes|caixa|caixas)\b'
    
    # Since 'Title' is ensured to be a string, this should not raise the TypeError
    df_refinado = df_refinado[~df_refinado['Title'].str.contains(padrao_unidades, case=False, regex=True)]
    return df_refinado

def get_best_match(query, choices, limit=15):
    # Using RapidFuzz for improved performance and fuzzy matching
    matches = process.extract(query, choices, scorer=fuzz.WRatio, limit=limit)
    return [match[0] for match in matches if match[1] > 70]
    
def filtrar_itens_similares(df, termo_pesquisa, limit=15):
    titulos = df['Title'].tolist()
    titulos_similares = get_best_match(termo_pesquisa, titulos, limit=limit)
    df_filtrado = df[df['Title'].isin(titulos_similares)]
    return df_filtrado

def calcular_fator_avaliacao(titulo, EC, PU):
    filtered_df = bens_df[bens_df['TITULO'] == titulo]
    if filtered_df.empty:
        return None  # Or handle the error as needed

    bem_info = filtered_df.iloc[0]
    VU, VR = bem_info['VIDA_UTIL'], bem_info['VALOR_RESIDUAL']
    ec_pontuacao = {'Excelente': 10, 'Bom': 8, 'Regular': 5, 'Péssimo': 2}[EC]

    PU, PVU, PUB = float(PU), min(10 - ((PU - 1) * (10 / VU)), 10), min(10 - (((VU - PU) - 1) * (10 / VU)), 10)
    fator_avaliacao = max((4 * ec_pontuacao + 6 * PVU - 3 * PUB) / 100, VR)
    return fator_avaliacao

def select_nearest_items(df, query):
    # Implement a more refined selection process
    # First, filter by title similarity to ensure relevance
    df['Title_Similarity'] = df['Title'].apply(lambda x: fuzz.WRatio(query, x))
    df_filtered_by_similarity = df[df['Title_Similarity'] > 70]  # Adjust similarity threshold
    
    if df_filtered_by_similarity.empty:
        # Fallback to broader criteria if no closely matching titles are found
        return pd.DataFrame()
    
    # Then, select items based on price, considering only those within a reasonable range
    reasonable_price_df = df_filtered_by_similarity[df_filtered_by_similarity['Price'] <= df_filtered_by_similarity['Price'].quantile(0.75)]
    
    target_price = reasonable_price_df['Price'].mode().min() if not reasonable_price_df['Price'].mode().empty else reasonable_price_df['Price'].median()
    reasonable_price_df['Distance'] = (reasonable_price_df['Price'] - target_price).abs()
    
    return reasonable_price_df.sort_values(['Distance', 'Title_Similarity'], ascending=[True, False]).head(5)

def search_with_fallback(query, df, limit=15):
    # Start with the most specific query and progressively simplify it
    query_parts = query.split()
    for i in range(len(query_parts), 0, -1):
        # Construct a simplified query by progressively removing the least important terms
        simplified_query = " ".join(query_parts[:i])
        df_filtrado = filtrar_itens_similares(df, simplified_query, limit=limit)
        if not df_filtrado.empty:
            # Return the filtered DataFrame as soon as we get any results
            return df_filtrado
    # If no results are found for any simplification of the query, return an empty DataFrame
    return pd.DataFrame()

def integrated_app(query, titulo, EC, PU):
    df_mercadolibre = fetch_data_to_dataframe(query, 50, "mercadolibre")
    df_combined = pd.concat([df_mercadolibre, data_crawler], ignore_index=True)

    if df_combined.empty:
        return "Nenhum dado encontrado. Tente uma consulta diferente.", pd.DataFrame()

    df_refined = refinar_resultados(df_combined)
    df_similares = search_with_fallback(query, df_refined)

    if df_similares.empty:
        return "Nenhum item similar encontrado.", pd.DataFrame()

    df_nearest = select_nearest_items(df_similares, query)  # Ensure this function is adapted to use the query for relevance
    if df_nearest.empty:
        return "Nenhum resultado próximo encontrado.", pd.DataFrame()

    # Calculate valuation factor and final valuation based on the nearest items
    fator_avaliacao = calcular_fator_avaliacao(titulo, EC, PU)
    valor_avaliacao = df_nearest['Price'].mean() * fator_avaliacao
    return f"Valor Médio do Bem: R$ {df_nearest['Price'].mean():.2f}, Fator de Avaliação: {fator_avaliacao*100:.2f}%, Valor de Avaliação: R$ {valor_avaliacao:.2f}", df_nearest

iface = gr.Interface(fn=integrated_app,
                     inputs=[gr.Textbox(label="Digite sua consulta"),
                             gr.Dropdown(label="Classificação Contábil do Bem", choices=bens_df['TITULO'].unique().tolist(), value="MOBILIÁRIO EM GERAL"),
                             gr.Radio(label="Estado de Conservação do Bem", choices=['Excelente', 'Bom', 'Regular', 'Péssimo'], value="Excelente"),
                             gr.Number(label="Período utilizado (anos)", value=1)],
                     outputs=[gr.Textbox(label="Cálculo"), gr.Dataframe(label="Resultados da Pesquisa")],
                     theme=gr.themes.Monochrome(),
                     title="<span style='color: gray; font-size: 48px;'>Avaliação de Bens Móveis</span>",
                     description="""<p style="text-align: left;"><b><span style='color: gray; font-size: 40px;'>aval</span><span style='color: black; font-size: 40px;'>ia</span><span style='color: gray; font-size: 40px;'>.se</b></p>""")

iface.launch()