Spaces:
Running
Running
Updated for V2.0
Browse files- model_comparison.py +26 -44
model_comparison.py
CHANGED
@@ -9,35 +9,37 @@ from huggingface_hub import snapshot_download
|
|
9 |
from profanity_check import predict
|
10 |
|
11 |
databaseDF = None
|
|
|
12 |
EVAL_DATABASE_DIR = Path("data")
|
13 |
EVAL_DATABASE_DIR.mkdir(parents=True, exist_ok=True)
|
14 |
|
15 |
GEN_EVAL_DATABASE_PATH = 'user_data/data/general_eval_database.yaml'
|
16 |
TASK_EVAL_DATABASE_PATH = 'user_data/data/task_oriented_eval_database.yaml'
|
17 |
-
def get_evaluation_id(evalType, debugging):
|
18 |
-
global GEN_EVAL_DATABASE_PATH
|
19 |
-
global TASK_EVAL_DATABASE_PATH
|
20 |
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
DFPath = TASK_EVAL_DATABASE_PATH
|
25 |
|
26 |
-
|
27 |
-
|
|
|
|
|
28 |
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
|
|
|
|
|
|
34 |
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
|
42 |
def check_profanity(df):
|
43 |
cleanedDF = df
|
@@ -64,33 +66,24 @@ def dataframe_with_selections(df):
|
|
64 |
# Filter the dataframe using the temporary column, then drop the column
|
65 |
selected_rows = edited_df[edited_df.Select]
|
66 |
return selected_rows.drop('Select', axis=1)
|
67 |
-
def add_user_evalID_columns_to_df(df, evalDataPath
|
68 |
with open(evalDataPath, 'r') as f:
|
69 |
yamlData = safe_load(f)
|
70 |
for user in yamlData['evaluations']['username']:
|
71 |
if df is None:
|
72 |
df = pd.DataFrame(yamlData['evaluations']['username'][user]).T
|
73 |
df.insert(0, "Eval. ID", list(yamlData['evaluations']['username'][user].keys()), True)
|
74 |
-
df.insert(0, "User", [user for i in range(len(yamlData['evaluations']['username'][user]))],
|
75 |
-
True)
|
76 |
else:
|
77 |
df = pd.concat([df, pd.DataFrame(yamlData['evaluations']['username'][user]).T],
|
78 |
ignore_index=True)
|
79 |
evalIDIterator = 0
|
80 |
for index, row in df.iterrows():
|
81 |
-
if row['User'] is np.nan:
|
82 |
-
df.loc[index, 'User'] = user
|
83 |
if row['Eval. ID'] is np.nan:
|
84 |
df.loc[index, 'Eval. ID'] = list(yamlData['evaluations']['username'][user].keys())[
|
85 |
evalIDIterator]
|
86 |
evalIDIterator += 1
|
87 |
-
if personalFLAG:
|
88 |
-
df.drop(df[df['User'] != user_evaluation_variables.USERNAME].index, inplace=True)
|
89 |
-
if len(df) == 0:
|
90 |
-
st.warning("It looks like you haven't conducted any evaluations! Run some evaluations and refresh this page."
|
91 |
-
"If the problem persists, please contact support. ", icon="⚠️")
|
92 |
-
|
93 |
return df
|
|
|
94 |
def initialise_page(tab):
|
95 |
global databaseDF
|
96 |
global GEN_EVAL_DATABASE_PATH
|
@@ -100,30 +93,19 @@ def initialise_page(tab):
|
|
100 |
with c1:
|
101 |
st.subheader("\U0001F30E General Bias")
|
102 |
with st.form("gen_bias_database_loading_form", clear_on_submit=False):
|
103 |
-
personalGEN = st.form_submit_button("Personal Evaluations")
|
104 |
communityGEN = st.form_submit_button("TBYB Community Evaluations")
|
105 |
-
if personalGEN:
|
106 |
-
databaseDF = None
|
107 |
-
databaseDF = add_user_evalID_columns_to_df(databaseDF, GEN_EVAL_DATABASE_PATH,True)[["User", "Eval. ID", "Model", "Model Type", "Resolution", "No. Samples", "Inference Steps",
|
108 |
-
"Objects", "Actions", "Occupations", "Dist. Bias", "Hallucination", "Gen. Miss Rate",
|
109 |
-
"Run Time", "Date", "Time"]]
|
110 |
if communityGEN:
|
111 |
databaseDF = None
|
112 |
-
databaseDF = add_user_evalID_columns_to_df(databaseDF, GEN_EVAL_DATABASE_PATH
|
113 |
"Objects", "Actions", "Occupations", "Dist. Bias", "Hallucination", "Gen. Miss Rate",
|
114 |
"Run Time", "Date", "Time"]]
|
115 |
with c2:
|
116 |
st.subheader("\U0001F3AF Task-Oriented Bias")
|
117 |
with st.form("task_oriented_database_loading_form", clear_on_submit=False):
|
118 |
-
personalTASK = st.form_submit_button("Personal Evaluations")
|
119 |
communityTASK = st.form_submit_button("TBYB Community Evaluations")
|
120 |
-
if personalTASK:
|
121 |
-
databaseDF = None
|
122 |
-
databaseDF = add_user_evalID_columns_to_df(databaseDF, TASK_EVAL_DATABASE_PATH, True)[["User", "Eval. ID", "Model", "Model Type", "Resolution", "No. Samples", "Inference Steps",
|
123 |
-
"Target", "Dist. Bias", "Hallucination", "Gen. Miss Rate", "Run Time", "Date", "Time"]]
|
124 |
if communityTASK:
|
125 |
databaseDF = None
|
126 |
-
databaseDF = add_user_evalID_columns_to_df(databaseDF, TASK_EVAL_DATABASE_PATH
|
127 |
"Target", "Dist. Bias", "Hallucination", "Gen. Miss Rate", "Run Time", "Date", "Time"]]
|
128 |
if databaseDF is not None:
|
129 |
selection = dataframe_with_selections(databaseDF)
|
|
|
9 |
from profanity_check import predict
|
10 |
|
11 |
databaseDF = None
|
12 |
+
|
13 |
EVAL_DATABASE_DIR = Path("data")
|
14 |
EVAL_DATABASE_DIR.mkdir(parents=True, exist_ok=True)
|
15 |
|
16 |
GEN_EVAL_DATABASE_PATH = 'user_data/data/general_eval_database.yaml'
|
17 |
TASK_EVAL_DATABASE_PATH = 'user_data/data/task_oriented_eval_database.yaml'
|
|
|
|
|
|
|
18 |
|
19 |
+
# def get_evaluation_id(evalType, debugging):
|
20 |
+
# global GEN_EVAL_DATABASE_PATH
|
21 |
+
# global TASK_EVAL_DATABASE_PATH
|
|
|
22 |
|
23 |
+
# if evalType == 'general':
|
24 |
+
# DFPath = GEN_EVAL_DATABASE_PATH
|
25 |
+
# else:
|
26 |
+
# DFPath = TASK_EVAL_DATABASE_PATH
|
27 |
|
28 |
+
# df = add_user_evalID_columns_to_df(None, DFPath, False)
|
29 |
+
# evalColumn = [int(x.split('_')[1]) for x in list(df['Eval. ID'])]
|
30 |
+
|
31 |
+
# newEvalID = max(evalColumn) + 1
|
32 |
+
# if evalType == 'general':
|
33 |
+
# newEvalID = 'G_'+str(newEvalID).zfill(len(list(df['Eval. ID'])[0].split('_')[1]))
|
34 |
+
# else:
|
35 |
+
# newEvalID = 'T_' + str(newEvalID).zfill(len(list(df['Eval. ID'])[0].split('_')[1]))
|
36 |
|
37 |
+
# if debugging:
|
38 |
+
# st.write(df['Eval. ID'])
|
39 |
+
# st.write(evalColumn)
|
40 |
+
# st.write("current last EVAL ID:", df['Eval. ID'].iloc[-1])
|
41 |
+
# st.write("NEW EVAL ID:", newEvalID)
|
42 |
+
# return newEvalID
|
43 |
|
44 |
def check_profanity(df):
|
45 |
cleanedDF = df
|
|
|
66 |
# Filter the dataframe using the temporary column, then drop the column
|
67 |
selected_rows = edited_df[edited_df.Select]
|
68 |
return selected_rows.drop('Select', axis=1)
|
69 |
+
def add_user_evalID_columns_to_df(df, evalDataPath):
|
70 |
with open(evalDataPath, 'r') as f:
|
71 |
yamlData = safe_load(f)
|
72 |
for user in yamlData['evaluations']['username']:
|
73 |
if df is None:
|
74 |
df = pd.DataFrame(yamlData['evaluations']['username'][user]).T
|
75 |
df.insert(0, "Eval. ID", list(yamlData['evaluations']['username'][user].keys()), True)
|
|
|
|
|
76 |
else:
|
77 |
df = pd.concat([df, pd.DataFrame(yamlData['evaluations']['username'][user]).T],
|
78 |
ignore_index=True)
|
79 |
evalIDIterator = 0
|
80 |
for index, row in df.iterrows():
|
|
|
|
|
81 |
if row['Eval. ID'] is np.nan:
|
82 |
df.loc[index, 'Eval. ID'] = list(yamlData['evaluations']['username'][user].keys())[
|
83 |
evalIDIterator]
|
84 |
evalIDIterator += 1
|
|
|
|
|
|
|
|
|
|
|
|
|
85 |
return df
|
86 |
+
|
87 |
def initialise_page(tab):
|
88 |
global databaseDF
|
89 |
global GEN_EVAL_DATABASE_PATH
|
|
|
93 |
with c1:
|
94 |
st.subheader("\U0001F30E General Bias")
|
95 |
with st.form("gen_bias_database_loading_form", clear_on_submit=False):
|
|
|
96 |
communityGEN = st.form_submit_button("TBYB Community Evaluations")
|
|
|
|
|
|
|
|
|
|
|
97 |
if communityGEN:
|
98 |
databaseDF = None
|
99 |
+
databaseDF = add_user_evalID_columns_to_df(databaseDF, GEN_EVAL_DATABASE_PATH)[["Eval. ID", "Model", "Model Type", "Resolution", "No. Samples", "Inference Steps",
|
100 |
"Objects", "Actions", "Occupations", "Dist. Bias", "Hallucination", "Gen. Miss Rate",
|
101 |
"Run Time", "Date", "Time"]]
|
102 |
with c2:
|
103 |
st.subheader("\U0001F3AF Task-Oriented Bias")
|
104 |
with st.form("task_oriented_database_loading_form", clear_on_submit=False):
|
|
|
105 |
communityTASK = st.form_submit_button("TBYB Community Evaluations")
|
|
|
|
|
|
|
|
|
106 |
if communityTASK:
|
107 |
databaseDF = None
|
108 |
+
databaseDF = add_user_evalID_columns_to_df(databaseDF, TASK_EVAL_DATABASE_PATH)[["Eval. ID", "Model", "Model Type", "Resolution", "No. Samples", "Inference Steps",
|
109 |
"Target", "Dist. Bias", "Hallucination", "Gen. Miss Rate", "Run Time", "Date", "Time"]]
|
110 |
if databaseDF is not None:
|
111 |
selection = dataframe_with_selections(databaseDF)
|