File size: 8,848 Bytes
d41bb77
 
 
 
 
 
 
511da63
 
 
 
21ee22a
 
 
 
d41bb77
511da63
 
 
d41bb77
511da63
d41bb77
511da63
 
 
d41bb77
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
511da63
 
d41bb77
 
 
 
 
 
 
 
 
511da63
d41bb77
 
 
 
511da63
d41bb77
 
 
 
 
 
 
 
 
511da63
d41bb77
 
 
511da63
d41bb77
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
import pandas as pd
import streamlit as st
import numpy as np
import plotly.express as px
from yaml import safe_load
import user_evaluation_variables
databaseDF = None
from pathlib import Path
EVAL_DATABASE_DIR = Path("data")
EVAL_DATABASE_DIR.mkdir(parents=True, exist_ok=True)

# GEN_EVAL_DATABASE_PATH = EVAL_DATABASE_DIR / f"general_eval_database.yaml"
# TASK_EVAL_DATABASE_PATH = EVAL_DATABASE_DIR / f"task_oriented_eval_database.yaml"
GEN_EVAL_DATABASE_PATH = 'user_data/general_eval_database.yaml'
TASK_EVAL_DATABASE_PATH = 'user_data/general_eval_database.yaml'
def get_evaluation_id(evalType, debugging):
    global GEN_EVAL_DATABASE_PATH
    global TASK_EVAL_DATABASE_PATH

    if evalType == 'general':
        DFPath = GEN_EVAL_DATABASE_PATH
    else:
        DFPath = TASK_EVAL_DATABASE_PATH

    df = add_user_evalID_columns_to_df(None, DFPath, False)
    evalColumn = [int(x.split('_')[1]) for x in list(df['Eval. ID'])]

    newEvalID = max(evalColumn) + 1
    if evalType == 'general':
        newEvalID = 'G_'+str(newEvalID).zfill(len(list(df['Eval. ID'])[0].split('_')[1]))
    else:
        newEvalID = 'T_' + str(newEvalID).zfill(len(list(df['Eval. ID'])[0].split('_')[1]))

    if debugging:
        st.write(df['Eval. ID'])
        st.write(evalColumn)
        st.write("current last EVAL ID:", df['Eval. ID'].iloc[-1])
        st.write("NEW EVAL ID:", newEvalID)
    return newEvalID


def dataframe_with_selections(df):
    df_with_selections = df.copy()
    df_with_selections.insert(0, "Select", True)

    # Get dataframe row-selections from user with st.data_editor
    edited_df = st.data_editor(
        df_with_selections,
        hide_index=True,
        column_config={"Select": st.column_config.CheckboxColumn(required=True)},
        disabled=df.columns,
    )

    # Filter the dataframe using the temporary column, then drop the column
    selected_rows = edited_df[edited_df.Select]
    return selected_rows.drop('Select', axis=1)
def add_user_evalID_columns_to_df(df, evalDataPath, personalFLAG):
    with open(evalDataPath, 'r') as f:
        yamlData = safe_load(f)
        for user in yamlData['evaluations']['username']:
            if df is None:
                df = pd.DataFrame(yamlData['evaluations']['username'][user]).T
                df.insert(0, "Eval. ID", list(yamlData['evaluations']['username'][user].keys()), True)
                df.insert(0, "User", [user for i in range(len(yamlData['evaluations']['username'][user]))],
                                  True)
            else:
                df = pd.concat([df, pd.DataFrame(yamlData['evaluations']['username'][user]).T],
                                       ignore_index=True)
            evalIDIterator = 0
            for index, row in df.iterrows():
                if row['User'] is np.nan:
                    df.loc[index, 'User'] = user
                if row['Eval. ID'] is np.nan:
                    df.loc[index, 'Eval. ID'] = list(yamlData['evaluations']['username'][user].keys())[
                        evalIDIterator]
                    evalIDIterator += 1
        if personalFLAG:
            df.drop(df[df['User'] != user_evaluation_variables.USERNAME].index, inplace=True)
            if len(df) == 0:
                st.warning("It looks like you haven't conducted any evaluations! Run some evaluations and refresh this page."
                           "If the problem persists, please contact support. ", icon="⚠️")

    return df
def initialise_page(tab):
    global databaseDF
    global GEN_EVAL_DATABASE_PATH
    global TASK_EVAL_DATABASE_PATH
    with tab:
        c1, c2 = st.columns(2)
        with c1:
            st.subheader("\U0001F30E General Bias")
            with st.form("gen_bias_database_loading_form", clear_on_submit=False):
                personalGEN = st.form_submit_button("Personal Evaluations")
                communityGEN = st.form_submit_button("TBYB Community Evaluations")
                if personalGEN:
                    databaseDF = None
                    databaseDF = add_user_evalID_columns_to_df(databaseDF, GEN_EVAL_DATABASE_PATH,True)[["User", "Eval. ID", "Model", "Model Type", "Resolution", "No. Samples", "Inference Steps",
                             "Objects", "Actions", "Occupations", "Dist. Bias", "Hallucination", "Gen. Miss Rate",
                             "Run Time", "Date", "Time"]]
                if communityGEN:
                    databaseDF = None
                    databaseDF = add_user_evalID_columns_to_df(databaseDF, GEN_EVAL_DATABASE_PATH, False)[["User", "Eval. ID", "Model", "Model Type", "Resolution", "No. Samples", "Inference Steps",
                             "Objects", "Actions", "Occupations", "Dist. Bias", "Hallucination", "Gen. Miss Rate",
                             "Run Time", "Date", "Time"]]
        with c2:
            st.subheader("\U0001F3AF Task-Oriented Bias")
            with st.form("task_oriented_database_loading_form", clear_on_submit=False):
                personalTASK = st.form_submit_button("Personal Evaluations")
                communityTASK = st.form_submit_button("TBYB Community Evaluations")
                if personalTASK:
                    databaseDF = None
                    databaseDF = add_user_evalID_columns_to_df(databaseDF, TASK_EVAL_DATABASE_PATH, True)[["User", "Eval. ID", "Model", "Model Type", "Resolution", "No. Samples", "Inference Steps",
                                             "Target", "Dist. Bias", "Hallucination", "Gen. Miss Rate", "Run Time", "Date", "Time"]]
                if communityTASK:
                    databaseDF = None
                    databaseDF = add_user_evalID_columns_to_df(databaseDF, TASK_EVAL_DATABASE_PATH,False)[["User", "Eval. ID", "Model", "Model Type", "Resolution", "No. Samples", "Inference Steps",
                                             "Target", "Dist. Bias", "Hallucination", "Gen. Miss Rate", "Run Time", "Date", "Time"]]
        if databaseDF is not None:
            selection = dataframe_with_selections(databaseDF)
            normalised = st.toggle('Normalize Data (better for direct comparisons)')
            submitCOMPARE = st.button("Compare Selected Models")

            if submitCOMPARE:
                plot_comparison_graphs(tab, selection, normalised)

def normalise_data(rawValues, metric):
    rawValues = list(map(float, rawValues))
    normalisedValues = []
    # Normalise the raw data
    for x in rawValues:
        if (max(rawValues) - min(rawValues)) == 0:
            normX = 1
        else:
            if metric in ['HJ','MG']:
                normX = (x - min(rawValues)) / (max(rawValues) - min(rawValues))
            else:
                normX = 1 - ((x - min(rawValues)) / (max(rawValues) - min(rawValues)))
        normalisedValues.append(normX)

    return normalisedValues
def plot_comparison_graphs(tab, data,normalise):
    BDColor = ['#59DC23', ] * len(data['Dist. Bias'].tolist())
    HJColor = ['#2359DC', ] * len(data['Hallucination'].tolist())
    MGColor = ['#DC2359', ] * len(data['Gen. Miss Rate'].tolist())
    if not normalise:
        BDData = data['Dist. Bias']
        HJData = data['Hallucination']
        MGData = data['Gen. Miss Rate']
    else:
        data['Dist. Bias'] = normalise_data(data['Dist. Bias'], 'BD')
        data['Hallucination'] = normalise_data(data['Hallucination'], 'HJ')
        data['Gen. Miss Rate'] = normalise_data(data['Gen. Miss Rate'], 'MG')
    with tab:
        st.write("Selected evaluations for comparison:")
        st.write(data)

        BDFig = px.bar(x=data['Eval. ID'], y=data['Dist. Bias'],color_discrete_sequence=BDColor).update_layout(
                       xaxis_title=r'Evaluation ID', yaxis_title=r'Distribution Bias', title=r'Distribution Bias Comparison')
        st.plotly_chart(BDFig, theme="streamlit",use_container_width=True)

        HJFig = px.bar(x=data['Eval. ID'], y=data['Hallucination'],color_discrete_sequence=HJColor).update_layout(
                       xaxis_title=r'Evaluation ID', yaxis_title=r'Jaccard Hallucination', title=r'Jaccard Hallucination Comparison')
        st.plotly_chart(HJFig, theme="streamlit",use_container_width=True)

        MGFig = px.bar(x=data['Eval. ID'], y=data['Gen. Miss Rate'],color_discrete_sequence=MGColor).update_layout(
                       xaxis_title=r'Evaluation ID', yaxis_title=r'Generative Miss Rate', title=r'Generative Miss Rate Comparison')
        st.plotly_chart(MGFig, theme="streamlit",use_container_width=True)
        if normalise:

            Full3DFig = px.scatter_3d(data, x='Dist. Bias', y='Hallucination', z='Gen. Miss Rate',
                                      width=800, height=800,color='Eval. ID',title='3D Text-to-Image Model Bias Comparison')
            st.plotly_chart(Full3DFig, theme="streamlit",use_container_width=True)