Corey Morris commited on
Commit
843a5ef
·
1 Parent(s): 03ade34

Refactoring. Moved ResultDataProcessor class to a separate file to make it easier to use with experimentation in a jupyter notebook

Browse files
Files changed (2) hide show
  1. app.py +4 -72
  2. result_data_processor.py +68 -0
app.py CHANGED
@@ -1,73 +1,7 @@
1
  import streamlit as st
2
  import pandas as pd
3
- import os
4
- import fnmatch
5
- import json
6
  import plotly.express as px
7
-
8
- class ResultDataProcessor:
9
- def __init__(self):
10
- self.data = self.process_data()
11
-
12
- def process_data(self):
13
- dataframes = []
14
-
15
- def find_files(directory, pattern):
16
- for root, dirs, files in os.walk(directory):
17
- for basename in files:
18
- if fnmatch.fnmatch(basename, pattern):
19
- filename = os.path.join(root, basename)
20
- yield filename
21
-
22
- for filename in find_files('results', 'results*.json'):
23
- model_name = filename.split('/')[2]
24
- with open(filename) as f:
25
- data = json.load(f)
26
- df = pd.DataFrame(data['results']).T
27
-
28
-
29
- # data cleanup
30
- df = df.rename(columns={'acc': model_name})
31
- # Replace 'hendrycksTest-' with a more descriptive column name
32
- df.index = df.index.str.replace('hendrycksTest-', 'MMLU_', regex=True)
33
- df.index = df.index.str.replace('harness\|', '', regex=True)
34
- # remove |5 from the index
35
- df.index = df.index.str.replace('\|5', '', regex=True)
36
-
37
-
38
- dataframes.append(df[[model_name]])
39
-
40
- data = pd.concat(dataframes, axis=1)
41
-
42
- data = data.transpose()
43
- data['Model Name'] = data.index
44
- cols = data.columns.tolist()
45
- cols = cols[-1:] + cols[:-1]
46
- data = data[cols]
47
-
48
- # remove the Model Name column
49
- data = data.drop(['Model Name'], axis=1)
50
-
51
- # remove the all column
52
- data = data.drop(['all'], axis=1)
53
-
54
- # remove the truthfulqa:mc|0 column
55
- data = data.drop(['truthfulqa:mc|0'], axis=1)
56
-
57
- # create a new column that averages the results from each of the columns with a name that start with MMLU
58
- data['MMLU_average'] = data.filter(regex='MMLU').mean(axis=1)
59
-
60
- # move the MMLU_average column to the third column in the dataframe
61
- cols = data.columns.tolist()
62
- cols = cols[:2] + cols[-1:] + cols[2:-1]
63
- data = data[cols]
64
-
65
- return data
66
-
67
- # filter data based on the index
68
- def get_data(self, selected_models):
69
- filtered_data = self.data[self.data.index.isin(selected_models)]
70
- return filtered_data
71
 
72
  data_provider = ResultDataProcessor()
73
 
@@ -131,10 +65,6 @@ def create_plot(df, arc_column, moral_column, models=None):
131
 
132
  return fig
133
 
134
-
135
-
136
- st.header('Overall benchmark comparison')
137
-
138
  st.header('Custom scatter plots')
139
  selected_x_column = st.selectbox('Select x-axis', filtered_data.columns.tolist(), index=0)
140
  selected_y_column = st.selectbox('Select y-axis', filtered_data.columns.tolist(), index=1)
@@ -145,6 +75,8 @@ if selected_x_column != selected_y_column: # Avoid creating a plot with the s
145
  else:
146
  st.write("Please select different columns for the x and y axes.")
147
 
 
 
148
  fig = create_plot(filtered_data, 'arc:challenge|25', 'hellaswag|10')
149
  st.plotly_chart(fig)
150
 
@@ -159,7 +91,7 @@ top_50 = filtered_data.nlargest(50, 'MMLU_average')
159
  fig = create_plot(top_50, 'arc:challenge|25', 'MMLU_average')
160
  st.plotly_chart(fig)
161
 
162
- st.header('Moral Scenarios')
163
 
164
  fig = create_plot(filtered_data, 'arc:challenge|25', 'MMLU_moral_scenarios')
165
  st.plotly_chart(fig)
 
1
  import streamlit as st
2
  import pandas as pd
 
 
 
3
  import plotly.express as px
4
+ from result_data_processor import ResultDataProcessor
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
  data_provider = ResultDataProcessor()
7
 
 
65
 
66
  return fig
67
 
 
 
 
 
68
  st.header('Custom scatter plots')
69
  selected_x_column = st.selectbox('Select x-axis', filtered_data.columns.tolist(), index=0)
70
  selected_y_column = st.selectbox('Select y-axis', filtered_data.columns.tolist(), index=1)
 
75
  else:
76
  st.write("Please select different columns for the x and y axes.")
77
 
78
+ st.header('Overall evaluation comparisons')
79
+
80
  fig = create_plot(filtered_data, 'arc:challenge|25', 'hellaswag|10')
81
  st.plotly_chart(fig)
82
 
 
91
  fig = create_plot(top_50, 'arc:challenge|25', 'MMLU_average')
92
  st.plotly_chart(fig)
93
 
94
+ st.header('Moral Reasoning')
95
 
96
  fig = create_plot(filtered_data, 'arc:challenge|25', 'MMLU_moral_scenarios')
97
  st.plotly_chart(fig)
result_data_processor.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import os
3
+ import fnmatch
4
+ import json
5
+
6
+ class ResultDataProcessor:
7
+ def __init__(self):
8
+ self.data = self.process_data()
9
+
10
+ def process_data(self):
11
+ dataframes = []
12
+
13
+ def find_files(directory, pattern):
14
+ for root, dirs, files in os.walk(directory):
15
+ for basename in files:
16
+ if fnmatch.fnmatch(basename, pattern):
17
+ filename = os.path.join(root, basename)
18
+ yield filename
19
+
20
+ for filename in find_files('results', 'results*.json'):
21
+ model_name = filename.split('/')[2]
22
+ with open(filename) as f:
23
+ data = json.load(f)
24
+ df = pd.DataFrame(data['results']).T
25
+
26
+
27
+ # data cleanup
28
+ df = df.rename(columns={'acc': model_name})
29
+ # Replace 'hendrycksTest-' with a more descriptive column name
30
+ df.index = df.index.str.replace('hendrycksTest-', 'MMLU_', regex=True)
31
+ df.index = df.index.str.replace('harness\|', '', regex=True)
32
+ # remove |5 from the index
33
+ df.index = df.index.str.replace('\|5', '', regex=True)
34
+
35
+
36
+ dataframes.append(df[[model_name]])
37
+
38
+ data = pd.concat(dataframes, axis=1)
39
+
40
+ data = data.transpose()
41
+ data['Model Name'] = data.index
42
+ cols = data.columns.tolist()
43
+ cols = cols[-1:] + cols[:-1]
44
+ data = data[cols]
45
+
46
+ # remove the Model Name column
47
+ data = data.drop(['Model Name'], axis=1)
48
+
49
+ # remove the all column
50
+ data = data.drop(['all'], axis=1)
51
+
52
+ # remove the truthfulqa:mc|0 column
53
+ data = data.drop(['truthfulqa:mc|0'], axis=1)
54
+
55
+ # create a new column that averages the results from each of the columns with a name that start with MMLU
56
+ data['MMLU_average'] = data.filter(regex='MMLU').mean(axis=1)
57
+
58
+ # move the MMLU_average column to the third column in the dataframe
59
+ cols = data.columns.tolist()
60
+ cols = cols[:2] + cols[-1:] + cols[2:-1]
61
+ data = data[cols]
62
+
63
+ return data
64
+
65
+ # filter data based on the index
66
+ def get_data(self, selected_models):
67
+ filtered_data = self.data[self.data.index.isin(selected_models)]
68
+ return filtered_data