File size: 7,577 Bytes
c5a97b3
 
 
c038e47
c5a97b3
99aed09
53ae920
1a5aa7c
b4cb87b
99aed09
2598bbd
7a6bd46
 
 
 
2598bbd
 
601b79b
 
 
 
 
2598bbd
7a6bd46
2c05d82
 
fb309c7
de13740
 
2237b4d
 
2c05d82
 
 
 
 
3992853
 
5ef4720
2237b4d
5d4f445
78633a0
07df588
78633a0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5ef4720
78633a0
 
 
 
 
 
 
 
2237b4d
c5a97b3
2fe5b5e
d3db32a
 
601b79b
d3db32a
 
601b79b
 
99aed09
601b79b
aff5c22
f308877
 
f3ff845
6baa204
2237b4d
78633a0
6baa204
 
 
 
 
 
 
f555ad2
6baa204
 
f555ad2
6baa204
 
 
f555ad2
 
5ef4720
6baa204
 
 
5ef4720
f555ad2
 
6baa204
 
2237b4d
5ef4720
2237b4d
2c05d82
 
2237b4d
 
2c05d82
 
 
2237b4d
 
2c05d82
2237b4d
c1badbd
2fe5b5e
2237b4d
5ef4720
074db95
5ef4720
2237b4d
074db95
2598bbd
c5a97b3
2c05d82
 
 
 
2237b4d
 
78ac961
 
f2141ad
2c05d82
2237b4d
 
601b79b
2237b4d
601b79b
 
99aed09
3bd1e98
f308877
601b79b
4d527e1
601b79b
c4f8c41
205d4a0
601b79b
 
3bd1e98
78ac961
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
import gradio as gr
import requests
import pandas as pd
from rapidfuzz import process, fuzz

bens_df = pd.read_excel('bens_tab.xlsx')

data_crawler = pd.read_csv('data_crawler.csv', index_col=False)
data_crawler = data_crawler[['Title', 'Price', 'Currency', 'Condition', 'Link', 'Marketplace']]

def fetch_data_to_dataframe(query, limit=50, source="mercadolibre"):
    if source == "mercadolibre":
        BASE_URL = "https://api.mercadolibre.com/sites/MLB/search"
        params = {'q': query, 'limit': limit}
        response = requests.get(BASE_URL, params=params)
        if response.status_code == 200:
            data = response.json()
            items = data.get('results', [])
            df = pd.DataFrame(items)[['title', 'price', 'currency_id', 'condition', 'permalink']]
            df.columns = ['Title', 'Price', 'Currency', 'Condition', 'Link']
            df['Marketplace'] = "Mercado Livre"
            return df
    return pd.DataFrame()

def refinar_resultados(df, exclude_word="conjunto", include_word=False):
    df['Title'] = df['Title'].astype(str)
    df_refinado = df[~df['Title'].str.contains("kit", case=False, na=False)]
    df_refinado = df[~df['Title'].str.contains("conj", case=False, na=False)]
    df_refinado = df[~df['Title'].str.contains(" e ", case=False, na=False)]
    padrao_unidades = r'\b(\d+)\s*(unidade|unidades|pacote|pacotes|caixa|caixas)\b'
    df_refinado = df_refinado[~df_refinado['Title'].str.contains(padrao_unidades, case=False, regex=True)]

    if not include_word:
        # Exclude results containing "conjunto" if it's not part of the original query
        df_refinado = df_refinado[~df_refinado['Title'].str.contains(exclude_word, case=False)]
    
    return df_refinado

def get_best_match(query, choices, limit=50):
    # Using RapidFuzz for improved performance and fuzzy matching
    matches = process.extract(query, choices, scorer=fuzz.WRatio, limit=limit)
    return [match[0] for match in matches if match[1] > 85]
    
def match_query_words_in_titles(query, title):
    """
    Check if all words in the query have a close match within the title.
    Returns True if all words match to a certain degree; False otherwise.
    """
    query_words = query.lower().split()
    match_threshold = 80  # Adjust this threshold as needed

    for word in query_words:
        # Find the best match for each word in the query within the title
        match_score = fuzz.partial_ratio(word, title.lower())
        if match_score < match_threshold:
            return False  # If any word doesn't match well enough, return False

    return True  # All words matched well enough

def filtrar_itens_similares(df, termo_pesquisa, limit=50):
    # Apply the match function to each title, filtering for those that match the query words
    matches = df['Title'].apply(lambda title: match_query_words_in_titles(termo_pesquisa, title))
    df_filtrado = df[matches]

    # Further refine the list to the top N matches based on overall similarity to the query
    df_filtrado['Overall_Similarity'] = df_filtrado['Title'].apply(lambda title: fuzz.WRatio(termo_pesquisa, title))
    df_filtrado = df_filtrado.sort_values('Overall_Similarity', ascending=False).head(limit)

    return df_filtrado

def calcular_fator_avaliacao(titulo, EC, PU):
    filtered_df = bens_df[bens_df['TITULO'] == titulo]
    if filtered_df.empty:
        return None  # Or handle the error as needed

    bem_info = filtered_df.iloc[0]
    VU, VR = bem_info['VIDA_UTIL'], bem_info['VALOR_RESIDUAL']
    ec_pontuacao = {'Excelente': 10, 'Bom': 8, 'Regular': 5, 'Péssimo': 2}[EC]

    PU, PVU, PUB = float(PU), min(10 - ((PU - 1) * (10 / VU)), 10), min(10 - (((VU - PU) - 1) * (10 / VU)), 10)
    fator_avaliacao = max((4 * ec_pontuacao + 6 * PVU - 3 * PUB) / 100, VR)
    return fator_avaliacao

def select_nearest_items(df, query):
    # Lower the title similarity threshold if necessary
    df['Title_Similarity'] = df['Title'].apply(lambda x: fuzz.WRatio(query, x))
    df_filtered = df[df['Title_Similarity'] > 85]  # Adjusted threshold

    # Calculate mode price in a more inclusive manner
    mode_price = df_filtered['Price'].mode()
    if mode_price.empty:
        target_price = df_filtered['Price'].median()
    else:
        target_price = mode_price.min()

    df_filtered['Distance'] = (df_filtered['Price'] - target_price).abs()
    df_sorted = df_filtered.sort_values(['Distance', 'Title_Similarity'], ascending=[True, False])

    # Ensure diversity in marketplaces
    marketplaces_selected = set()
    results = []

    for _, row in df_sorted.iterrows():
        if row['Marketplace'] not in marketplaces_selected and len(marketplaces_selected) < 5:
            results.append(row)
            marketplaces_selected.add(row['Marketplace'])

        if len(results) >= 5:
            break

    return pd.DataFrame(results)


def search_with_fallback(query, df, limit=50):
    query_parts = query.split()
    include_conjunto = "conjunto" in query.lower()
    
    for i in range(len(query_parts), 0, -1):
        simplified_query = " ".join(query_parts[:i])
        df_refinado = refinar_resultados(df, include_word=include_conjunto)
        df_filtrado = filtrar_itens_similares(df_refinado, simplified_query, limit=limit)
        
        if not df_filtrado.empty:
            return df_filtrado

    return pd.DataFrame()

def integrated_app(query, titulo, EC, PU):
    df_mercadolibre = fetch_data_to_dataframe(query, 50, "mercadolibre")
    print(df_mercadolibre)
    df_combined = pd.concat([df_mercadolibre, data_crawler], ignore_index=True)
    print(df_combined)

    if df_combined.empty:
        return "Nenhum dado encontrado. Tente uma consulta diferente.", pd.DataFrame()

    # Pass whether "conjunto" is part of the original query
    include_conjunto = "conjunto" in query.lower()
    df_refined = refinar_resultados(df_combined, include_word=include_conjunto)
    
    df_similares = search_with_fallback(query, df_refined)

    if df_similares.empty:
        return "Nenhum item similar encontrado.", pd.DataFrame()

    df_nearest = select_nearest_items(df_similares, query)
    if df_nearest.empty:
        return "Nenhum resultado próximo encontrado.", pd.DataFrame()

    fator_avaliacao = calcular_fator_avaliacao(titulo, EC, PU)
    valor_avaliacao = df_nearest['Price'].mean() * fator_avaliacao
    return f"Valor Médio do Bem: R$ {df_nearest['Price'].mean():.2f}, Fator de Avaliação: {fator_avaliacao*100:.2f}%, Valor de Avaliação: R$ {valor_avaliacao:.2f}", df_nearest

iface = gr.Interface(fn=integrated_app,
                     inputs=[gr.Textbox(label="Digite sua consulta"),
                             gr.Dropdown(label="Classificação Contábil do Bem", choices=bens_df['TITULO'].unique().tolist(), value="MOBILIÁRIO EM GERAL"),
                             gr.Radio(label="Estado de Conservação do Bem", choices=['Excelente', 'Bom', 'Regular', 'Péssimo'], value="Excelente"),
                             gr.Number(label="Período utilizado (anos)", value=1)],
                     outputs=[gr.Textbox(label="Cálculo"), gr.Dataframe(label="Resultados da Pesquisa")],
                     theme=gr.themes.Monochrome(),
                     title="<span style='color: gray; font-size: 48px;'>Avaliação de Bens Móveis</span>",
                     description="""<p style="text-align: left;"><b><span style='color: gray; font-size: 40px;'>aval</span><span style='color: black; font-size: 40px;'>ia</span><span style='color: gray; font-size: 40px;'>.se</b></p>""")

iface.launch()