import gradio as gr
import requests
import pandas as pd
from rapidfuzz import process, fuzz

bens_df = pd.read_excel('bens_tab.xlsx')

data_crawler = pd.read_csv('data_crawler.csv', index_col=False)
data_crawler = data_crawler[['Title', 'Price', 'Currency', 'Condition', 'Link', 'Marketplace']]

def fetch_data_to_dataframe(query, limit=50, source="mercadolibre"):
    if source == "mercadolibre":
        BASE_URL = "https://api.mercadolibre.com/sites/MLB/search"
        params = {'q': query, 'limit': limit}
        response = requests.get(BASE_URL, params=params)
        if response.status_code == 200:
            data = response.json()
            items = data.get('results', [])
            df = pd.DataFrame(items)[['title', 'price', 'currency_id', 'condition', 'permalink']]
            df.columns = ['Title', 'Price', 'Currency', 'Condition', 'Link']
            df['Marketplace'] = "Mercado Livre"
            return df
    return pd.DataFrame()

def refinar_resultados(df, include_words=[]):
    df['Title'] = df['Title'].astype(str)

    # Define a list of keywords to exclude, indicating multiples
    exclude_keywords = ["kit", "conjunto", "pacote", "caixa", "unidades"]

    # Add conditional exclusion for words not included in the query
    exclude_patterns = [keyword for keyword in exclude_keywords if keyword not in include_words]

    # Combine all exclude patterns into a single regex pattern
    exclude_pattern = r'\b(' + '|'.join(exclude_patterns) + r')\b|\b(\d+)\s*(unidade|pacotes|caixas)\b'

    # Perform the filtering in one operation
    df_refinado = df[~df['Title'].str.contains(exclude_pattern, case=False, regex=True, na=False)]

    return df_refinado


def get_best_match(query, choices, limit=50):
    # Using RapidFuzz for improved performance and fuzzy matching
    matches = process.extract(query, choices, scorer=fuzz.WRatio, limit=limit)
    return [match[0] for match in matches if match[1] > 65]
    
def match_query_words_in_titles(query, title):
    """
    Check if all words in the query have a close match within the title.
    Returns True if all words match to a certain degree; False otherwise.
    """
    query_words = query.lower().split()
    match_threshold = 80  # Adjust this threshold as needed

    for word in query_words:
        # Find the best match for each word in the query within the title
        match_score = fuzz.partial_ratio(word, title.lower())
        if match_score < match_threshold:
            return False  # If any word doesn't match well enough, return False

    return True  # All words matched well enough

def filtrar_itens_similares(df, termo_pesquisa, limit=50):
    # Apply the match function to each title, filtering for those that match the query words
    matches = df['Title'].apply(lambda title: match_query_words_in_titles(termo_pesquisa, title))
    df_filtrado = df[matches]

    # Further refine the list to the top N matches based on overall similarity to the query
    df_filtrado['Overall_Similarity'] = df_filtrado['Title'].apply(lambda title: fuzz.WRatio(termo_pesquisa, title))
    df_filtrado = df_filtrado.sort_values('Overall_Similarity', ascending=False).head(limit)

    return df_filtrado

def calcular_fator_avaliacao(titulo, EC, PU):
    filtered_df = bens_df[bens_df['TITULO'] == titulo]
    if filtered_df.empty:
        return None  # Or handle the error as needed

    bem_info = filtered_df.iloc[0]
    VU, VR = bem_info['VIDA_UTIL'], bem_info['VALOR_RESIDUAL']
    ec_pontuacao = {'Excelente': 10, 'Bom': 8, 'Regular': 5, 'Péssimo': 2}[EC]

    PU, PVU, PUB = float(PU), min(10 - ((PU - 1) * (10 / VU)), 10), min(10 - (((VU - PU) - 1) * (10 / VU)), 10)
    fator_avaliacao = max((4 * ec_pontuacao + 6 * PVU - 3 * PUB) / 100, VR)
    return fator_avaliacao

def select_nearest_items(df, query):
    # Lower the title similarity threshold if necessary
    df['Title_Similarity'] = df['Title'].apply(lambda x: fuzz.WRatio(query, x))
    df_filtered = df[df['Title_Similarity'] > 65]  # Adjusted threshold

    # Calculate mode price in a more inclusive manner
    mode_price = df_filtered['Price'].mode()
    if mode_price.empty:
        target_price = df_filtered['Price'].median()
    else:
        target_price = mode_price.min()

    df_filtered['Distance'] = (df_filtered['Price'] - target_price).abs()
    df_sorted = df_filtered.sort_values(['Distance', 'Title_Similarity'], ascending=[True, False])

    # Ensure diversity in marketplaces
    marketplaces_selected = set()
    results = []

    for _, row in df_sorted.iterrows():
        if row['Marketplace'] not in marketplaces_selected and len(marketplaces_selected) < 5:
            results.append(row)
            marketplaces_selected.add(row['Marketplace'])

        if len(results) >= 5:
            break

    return pd.DataFrame(results)


def search_with_fallback(query, df, limit=50):
    query_parts = query.split()
    include_conjunto = "conjunto" in query.lower()
    
    for i in range(len(query_parts), 0, -1):
        simplified_query = " ".join(query_parts[:i])
        df_refinado = refinar_resultados(df, include_word=include_conjunto)
        df_filtrado = filtrar_itens_similares(df_refinado, simplified_query, limit=limit)
        
        if not df_filtrado.empty:
            return df_filtrado

    return pd.DataFrame()

def integrated_app(query, titulo, EC, PU):
    df_mercadolibre = fetch_data_to_dataframe(query, 50, "mercadolibre")
    print(df_mercadolibre)
    df_combined = pd.concat([df_mercadolibre, data_crawler], ignore_index=True)
    print(df_combined)

    if df_combined.empty:
        return "Nenhum dado encontrado. Tente uma consulta diferente.", pd.DataFrame()

    # Pass whether "conjunto" is part of the original query
    include_conjunto = "conjunto" in query.lower()
    df_refined = refinar_resultados(df_combined, include_word=include_conjunto)
    
    df_similares = search_with_fallback(query, df_refined)

    if df_similares.empty:
        return "Nenhum item similar encontrado.", pd.DataFrame()

    df_nearest = select_nearest_items(df_similares, query)
    if df_nearest.empty:
        return "Nenhum resultado próximo encontrado.", pd.DataFrame()

    fator_avaliacao = calcular_fator_avaliacao(titulo, EC, PU)
    valor_avaliacao = df_nearest['Price'].mean() * fator_avaliacao
    return f"Valor Médio do Bem: R$ {df_nearest['Price'].mean():.2f}, Fator de Avaliação: {fator_avaliacao*100:.2f}%, Valor de Avaliação: R$ {valor_avaliacao:.2f}", df_nearest

iface = gr.Interface(fn=integrated_app,
                     inputs=[gr.Textbox(label="Digite sua consulta"),
                             gr.Dropdown(label="Classificação Contábil do Bem", choices=bens_df['TITULO'].unique().tolist(), value="MOBILIÁRIO EM GERAL"),
                             gr.Radio(label="Estado de Conservação do Bem", choices=['Excelente', 'Bom', 'Regular', 'Péssimo'], value="Excelente"),
                             gr.Number(label="Período utilizado (anos)", value=1)],
                     outputs=[gr.Textbox(label="Cálculo"), gr.Dataframe(label="Resultados da Pesquisa")],
                     theme=gr.themes.Monochrome(),
                     title="<span style='color: gray; font-size: 48px;'>Avaliação de Bens Móveis</span>",
                     description="""<p style="text-align: left;"><b><span style='color: gray; font-size: 40px;'>aval</span><span style='color: black; font-size: 40px;'>ia</span><span style='color: gray; font-size: 40px;'>.se</b></p>""")

iface.launch()