import os
import uuid
import numpy as np
import pandas as pd
import streamlit as st
import huggingface_hub as hh
from datetime import datetime

# read files from HF
OWNER = "Booking-com"
MAX_SUBMISSIONS = 100

REPO_ID = f"{OWNER}/streamlit-review-ranking-leaderboard"
RESULTS_REPO = f"{OWNER}/results"
GT_REPO = f"{OWNER}/accommodation-reviews-gt"
GROUPS_INFO_REPO = f"{OWNER}/rectour2024-groups"

TOKEN = os.environ.get("HF_TOKEN")
CACHE_PATH = os.getenv("HF_HOME", ".")

EVAL_RESULTS_PATH = os.path.join(CACHE_PATH, "eval-results")
TEMP_RESULTS_PATH = os.path.join(CACHE_PATH, "temp-results")
GT_PATH = os.path.join(CACHE_PATH, "gt")
GROUPS_INFO_PATH = os.path.join(CACHE_PATH, "groups-info")

REQUIRED_COLUMNS = ['accommodation_id', 'user_id'] + [f'review_{i}' for i in range(1, 11)]

API = hh.HfApi(token=TOKEN)


def restart_space():
    API.restart_space(repo_id=REPO_ID)


# download the GT - shouldn't update too frequent
hh.snapshot_download(
    repo_id=GT_REPO, local_dir=GT_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30,
    token=TOKEN
)


def refresh_data():
    hh.snapshot_download(
        repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30,
        token=TOKEN
    )

    hh.snapshot_download(
        repo_id=GROUPS_INFO_REPO, local_dir=GROUPS_INFO_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30,
        token=TOKEN
    )


refresh_data()


def get_match_index(row):
    for i in range(1, 11):
        if row['review_id'] == row[f'review_{i}']:
            return i
    return np.inf


def calculate_metrics(df_pred):
    df_gt = pd.read_csv(os.path.join(GT_PATH, 'test_matches.csv'))
    if len(df_pred) != len(df_gt):
        raise Exception("Your predictions file should contain {} rows, only {} rows were found in the file".format(
            len(df_gt), len(df_pred)
        ))

    df_merged = pd.merge(df_gt, df_pred, how='left', on=['accommodation_id', 'user_id']).fillna('')
    df_merged['match_index'] = df_merged.apply(get_match_index, axis=1)
    df_merged['mrr10'] = df_merged['match_index'].apply(lambda x: 1/x)
    df_merged['precision10'] = df_merged['match_index'].apply(lambda x: 1 if x != np.inf else 0)

    return df_merged['mrr10'].mean(), df_merged['precision10'].mean()


def get_group_name_by_email(email):
    df = pd.read_csv(os.path.join(GROUPS_INFO_PATH, 'groups_data.csv'))
    df_email = df[df['email'] == email].reset_index(drop=True)
    if len(df_email) > 0:
        return df_email.iloc[0]['group_name']
    else:
        raise Exception("E-mail is not valid")


def validate_pred_file(df_pred):
    for col in REQUIRED_COLUMNS:
        if col not in df_pred.columns:
            raise Exception(f"Column {col} not in prediction file")


def get_revision(df_results, email):
    df_group_data = df_results[df_results['email'] == email]
    curr_revision = 0
    if len(df_group_data) > 0:
        curr_revision = df_group_data['revision'].max()
        if curr_revision >= MAX_SUBMISSIONS:
            raise Exception("We're sorry but you reached your maximal number of submissions")
    return curr_revision


def get_results_dataframe():
    dfs = []
    for f in os.listdir(EVAL_RESULTS_PATH):
        if f.endswith('.csv'):
            dfs.append(pd.read_csv(os.path.join(EVAL_RESULTS_PATH, f)))
    return pd.concat(dfs)


def upload_results(group_email, group_name, model_name, revision, mrr10, precision10):
    submission_date = datetime.now().strftime("%Y-%m-%d %H:%M:%S")

    if not os.path.exists(TEMP_RESULTS_PATH):
        os.mkdir(TEMP_RESULTS_PATH)

    df_temp_results = pd.DataFrame({'email': [group_email],  'set_name': ["test set"], 'group_name': [group_name],
                                    "model_name": [model_name], "submission_date": [submission_date],
                                    "revision": [revision], "MRR@10": [mrr10], "Precision@10": [precision10]})
    temp_results_fn = str(uuid.uuid4()) + '.csv'
    temp_path = os.path.join(TEMP_RESULTS_PATH, temp_results_fn)
    df_temp_results.to_csv(temp_path, index=False)
    hh.upload_file(path_or_fileobj=temp_path, repo_id=RESULTS_REPO, token=TOKEN, repo_type="dataset",
                   path_in_repo=temp_results_fn)


def render():
    st.set_page_config(page_title="RecTour2024 - Booking.com Review Ranking Challenge Leaderboard", layout="wide")
    st.title("🏆 RecTour2024 Leaderboard")

    leaderboard_tab, submission_tab = st.tabs(["Leaderboard", "Submission"])

    # leaderboard area
    if leaderboard_tab.button("Refresh"):
        refresh_data()

    df_results = get_results_dataframe()
    leaderboard_tab.dataframe(df_results.drop(columns=['email']).sort_values(['set_name', 'MRR@10'],
                                                                             ascending=[True, False]))

    # submission area
    group_email = submission_tab.text_input(label="Group email", value="")
    model_name = submission_tab.text_input(label="Model name", value="")
    pred_file = submission_tab.file_uploader(label="Upload your prediction file",
                                             help="Upload a csv.zip file, in pandas this can be achieved "
                                                  "with df.to_csv(<file_path>, compression='zip')",)
    if submission_tab.button("Upload"):
        if not pred_file:
            submission_tab.markdown("no file was submitted!")
        else:
            try:
                group_name = get_group_name_by_email(group_email)
                df_pred = pd.read_csv(pred_file, compression='zip')
                validate_pred_file(df_pred)
                mrr10, precision10 = calculate_metrics(df_pred)
                revision = get_revision(df_results=df_results, email=group_email) + 1  # generate next revision id
                upload_results(group_email=group_email, group_name=group_name, model_name=model_name, revision=revision,
                               mrr10=mrr10, precision10=precision10)

                submission_tab.markdown("## THANK YOU FOR YOUR SUBMISSION!")
                submission_tab.markdown("Here are your submission details:")
                submission_tab.markdown("**Group name:** " + group_name)
                submission_tab.markdown("**Model name:** " + model_name)
                submission_tab.markdown("**Revision:** " + str(revision) +
                                        f" (out of {MAX_SUBMISSIONS} allowed submissions)")

                submission_tab.write("### Submission results")
                submission_tab.markdown("**MRR@10:** {:.4f}".format(mrr10))
                submission_tab.markdown("**Precision@10:** {:.4f}".format(precision10))
            except Exception as e:
                submission_tab.markdown(e)


if __name__ == "__main__":
    render()