felix commited on
Commit
0b04027
1 Parent(s): ee1c446

add side by side compare

Browse files
Files changed (1) hide show
  1. app.py +148 -72
app.py CHANGED
@@ -20,40 +20,6 @@ def format_dir_date(data_dir):
20
  # Formatting the parsed date
21
  return parsed_date.strftime("%b %d, %Y %H:%M")
22
 
23
- col1, col2 = st.columns(2)
24
-
25
- with col1:
26
- data_dir = st.selectbox(
27
- 'Select different data generation date',
28
- directories,
29
- format_func=format_dir_date,
30
- index=len(directories)-1,
31
- )
32
-
33
- captions_map = {
34
- "hg_average_to_agentbench_compare.png": "HF to AgentBench compare",
35
- "hg_average_to_opencompass_compare.png": "HF to OpenCompass compare",
36
- "hg_average_to_mt_bench_compare.png": "HF to MT-Bench compare",
37
- "hg_average_to_mosaic_compare.png": "HF to MosaicML compare",
38
- "hg_average_to_alpacaeval_compare.png": "HF to AlpacaEval compare"
39
- }
40
- with col2:
41
- st.write("<div style=\"text-align: center\" >Generated on: <b>" + format_dir_date(data_dir) + "</b></div>", unsafe_allow_html=True)
42
-
43
-
44
- data_path = './data/' + data_dir
45
-
46
- imgs = glob.glob(os.path.join(data_path, '*.png'))
47
-
48
- # Extracting images that start with "hf_llm_diagram"
49
- hf_llm_diagrams = [img for img in imgs if 'hf_llm_diagram' in os.path.basename(img)]
50
- bigcode_diagrams = [img for img in imgs if 'bigcode' in os.path.basename(img)]
51
- mt_bench_diagrams = [img for img in imgs if 'mt_bench_leaderboard' in os.path.basename(img)]
52
- opencompass_diagrams = [img for img in imgs if 'opencompass_leaderboard' in os.path.basename(img)]
53
-
54
- # Getting the remaining images
55
- remaining_imgs = list(set(imgs) - set(hf_llm_diagrams) - set(bigcode_diagrams) - set(mt_bench_diagrams) - set(opencompass_diagrams))
56
-
57
  def print_model_list(file_name, st, split_into_two=False):
58
  file_path = file_name[:-4] + '.json'
59
  # Read the list from the JSON file
@@ -96,66 +62,176 @@ def print_model_list(file_name, st, split_into_two=False):
96
  final_html += "</ul>"
97
  st.write(final_html, unsafe_allow_html=True)
98
 
 
99
 
100
- st.subheader("HuggingFace Open LLM leaderboard by Model Size", divider=True)
101
- cols = st.columns(2)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
 
103
- cols[0].image(hf_llm_diagrams[0], caption="Main chart using all the models", use_column_width="auto")
 
 
 
 
 
 
104
 
105
- print_model_list(hf_llm_diagrams[0],st, True)
106
- st.write("<nbsp/>", unsafe_allow_html=True)
107
 
108
- cols = st.columns(2)
109
 
110
- cols[0].image(hf_llm_diagrams[1],caption="Other or commercially permissive licenses only", use_column_width="auto")
111
- print_model_list(hf_llm_diagrams[1],cols[0])
112
 
113
- cols[1].image(hf_llm_diagrams[2],caption="Commercially permissive license only", use_column_width="auto")
114
- print_model_list(hf_llm_diagrams[2],cols[1])
115
 
116
- st.write("<nbsp/>", unsafe_allow_html=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
 
118
- cols = st.columns(2)
119
- cols[0].image(hf_llm_diagrams[3],caption="TruthfulQA at 10% for HuggingFace Open LLM leaderboard by Model Size", use_column_width="auto")
120
- print_model_list(hf_llm_diagrams[3],cols[0],False)
121
 
122
- cols[1].image(hf_llm_diagrams[4],caption="ARC at 50% and MMLU at 50% for HuggingFace Open LLM leaderboard by Model Size", use_column_width="auto")
123
- print_model_list(hf_llm_diagrams[4],cols[1],False)
 
124
 
 
 
125
 
126
 
127
- st.subheader("Big Code Models Leaderboard", divider=True)
128
- cols = st.columns(2)
129
- cols[0].image(bigcode_diagrams[0], use_column_width="auto")
130
 
131
 
132
- print_model_list(bigcode_diagrams[0],st,True)
133
 
134
- st.subheader("MT-Bench Models Leaderboard", divider=True)
135
- cols = st.columns(2)
136
- cols[0].image(mt_bench_diagrams[0], use_column_width="auto")
137
 
138
- print_model_list(mt_bench_diagrams[0],st,True)
139
 
140
- st.subheader("OpenCompass Models Leaderboard", divider=True)
141
- cols = st.columns(2)
142
- cols[0].image(opencompass_diagrams[0], use_column_width="auto")
143
- print_model_list(opencompass_diagrams[0],st,True)
144
 
145
- st.subheader("HuggingFace and Other Leaderboards: A Comparative Model Evaluation", divider=True)
146
- st.caption("Only models evaluated on both leaderboards are included.")
147
 
148
- cols = st.columns(2)
149
 
150
- for i, img in enumerate(remaining_imgs):
151
- # Extract the filename from the full image path
152
- filename = os.path.basename(img)
153
 
154
- # Get the caption from the captions_map dictionary
155
- caption = captions_map.get(filename, "") # If no caption is found, it will default to an empty string
156
 
157
- # Display the image with the caption
158
- cols[i % 2].image(img, caption=caption, width=None)
159
 
160
  st.write(
161
  """
 
20
  # Formatting the parsed date
21
  return parsed_date.strftime("%b %d, %Y %H:%M")
22
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  def print_model_list(file_name, st, split_into_two=False):
24
  file_path = file_name[:-4] + '.json'
25
  # Read the list from the JSON file
 
62
  final_html += "</ul>"
63
  st.write(final_html, unsafe_allow_html=True)
64
 
65
+ col1, col2 = st.columns(2)
66
 
67
+ with col1:
68
+ data_dir = st.selectbox(
69
+ 'Select different data generation date',
70
+ directories,
71
+ format_func=format_dir_date,
72
+ index=len(directories)-1,
73
+ )
74
+ with col2:
75
+ compare_mode = st.checkbox('Enable compare to different date')
76
+ if compare_mode:
77
+ compare_data_dir = st.selectbox(
78
+ 'Select date for comparison',
79
+ directories,
80
+ format_func=format_dir_date,
81
+ index=len(directories)-1,
82
+ )
83
 
84
+ captions_map = {
85
+ "hg_average_to_agentbench_compare.png": "HF to AgentBench compare",
86
+ "hg_average_to_opencompass_compare.png": "HF to OpenCompass compare",
87
+ "hg_average_to_mt_bench_compare.png": "HF to MT-Bench compare",
88
+ "hg_average_to_mosaic_compare.png": "HF to MosaicML compare",
89
+ "hg_average_to_alpacaeval_compare.png": "HF to AlpacaEval compare"
90
+ }
91
 
92
+ with col1:
93
+ st.write("<div style=\"text-align: center\" >Generated on: <b>" + format_dir_date(data_dir) + "</b></div>", unsafe_allow_html=True)
94
 
 
95
 
96
+ data_path = './data/' + data_dir
 
97
 
98
+ # Adjust the data path loading logic
99
+ if compare_mode:
100
 
101
+ # Side by side compare:
102
+ compare_data_path = './data/' + compare_data_dir
103
+
104
+ # Load images from both directories
105
+ imgs = glob.glob(os.path.join(data_path, '*.png'))
106
+ compare_imgs = glob.glob(os.path.join(compare_data_path, '*.png'))
107
+
108
+ # Extracting images that start with specific keywords from both sets
109
+ def extract_images(keyword, img_list):
110
+ return [img for img in img_list if keyword in os.path.basename(img)]
111
+
112
+ hf_llm_diagrams = extract_images('hf_llm_diagram', imgs)
113
+ bigcode_diagrams = extract_images('bigcode', imgs)
114
+ mt_bench_diagrams = extract_images('mt_bench_leaderboard', imgs)
115
+ opencompass_diagrams = extract_images('opencompass_leaderboard', imgs)
116
+
117
+ compare_hf_llm_diagrams = extract_images('hf_llm_diagram', compare_imgs)
118
+ compare_bigcode_diagrams = extract_images('bigcode', compare_imgs)
119
+ compare_mt_bench_diagrams = extract_images('mt_bench_leaderboard', compare_imgs)
120
+ compare_opencompass_diagrams = extract_images('opencompass_leaderboard', compare_imgs)
121
+
122
+ # Display each category side by side
123
+ def display_side_by_side(diagrams1, diagrams2, title):
124
+ st.subheader(title, divider=True)
125
+ for d1, d2 in zip(diagrams1, diagrams2):
126
+ cols = st.columns(2)
127
+ cols[0].image(d1, use_column_width="auto")
128
+ cols[1].image(d2, use_column_width="auto")
129
+
130
+ # Displaying HuggingFace LLM Leaderboard
131
+ display_side_by_side(hf_llm_diagrams, compare_hf_llm_diagrams, "HuggingFace Open LLM leaderboard by Model Size")
132
+
133
+ # Displaying Big Code Models Leaderboard
134
+ display_side_by_side(bigcode_diagrams, compare_bigcode_diagrams, "Big Code Models Leaderboard")
135
+
136
+ # Displaying MT-Bench Models Leaderboard
137
+ display_side_by_side(mt_bench_diagrams, compare_mt_bench_diagrams, "MT-Bench Models Leaderboard")
138
+
139
+ # Displaying OpenCompass Models Leaderboard
140
+ display_side_by_side(opencompass_diagrams, compare_opencompass_diagrams, "OpenCompass Models Leaderboard")
141
+
142
+ # Extracting remaining images from both sets
143
+ remaining_imgs = list(set(imgs) - set(hf_llm_diagrams) - set(bigcode_diagrams) - set(mt_bench_diagrams) - set(opencompass_diagrams))
144
+ compare_remaining_imgs = list(set(compare_imgs) - set(compare_hf_llm_diagrams) - set(compare_bigcode_diagrams) - set(compare_mt_bench_diagrams) - set(compare_opencompass_diagrams))
145
+
146
+ st.subheader("HuggingFace and Other Leaderboards: A Comparative Model Evaluation", divider=True)
147
+ st.caption("Only models evaluated on both leaderboards are included.")
148
+
149
+ # Display remaining images side by side
150
+ for img, compare_img in zip(remaining_imgs, compare_remaining_imgs):
151
+ cols = st.columns(2)
152
+
153
+ # Extract the filename and caption for the first image
154
+ filename = os.path.basename(img)
155
+ caption = captions_map.get(filename, "")
156
+
157
+ # Extract the filename and caption for the comparison image
158
+ compare_filename = os.path.basename(compare_img)
159
+ compare_caption = captions_map.get(compare_filename, "")
160
+
161
+ # Display the images with captions
162
+ cols[0].image(img, caption=caption, width=None)
163
+ cols[1].image(compare_img, caption=compare_caption, width=None)
164
+
165
+ else:
166
+ imgs = glob.glob(os.path.join(data_path, '*.png'))
167
+
168
+ # Extracting images that start with "hf_llm_diagram"
169
+ hf_llm_diagrams = [img for img in imgs if 'hf_llm_diagram' in os.path.basename(img)]
170
+ bigcode_diagrams = [img for img in imgs if 'bigcode' in os.path.basename(img)]
171
+ mt_bench_diagrams = [img for img in imgs if 'mt_bench_leaderboard' in os.path.basename(img)]
172
+ opencompass_diagrams = [img for img in imgs if 'opencompass_leaderboard' in os.path.basename(img)]
173
+
174
+ # Getting the remaining images
175
+ remaining_imgs = list(set(imgs) - set(hf_llm_diagrams) - set(bigcode_diagrams) - set(mt_bench_diagrams) - set(opencompass_diagrams))
176
+
177
+ st.subheader("HuggingFace Open LLM leaderboard by Model Size", divider=True)
178
+ cols = st.columns(2)
179
+
180
+ cols[0].image(hf_llm_diagrams[0], caption="Main chart using all the models", use_column_width="auto")
181
+
182
+ print_model_list(hf_llm_diagrams[0],st, True)
183
+ st.write("<nbsp/>", unsafe_allow_html=True)
184
+
185
+ cols = st.columns(2)
186
+
187
+ cols[0].image(hf_llm_diagrams[1],caption="Other or commercially permissive licenses only", use_column_width="auto")
188
+ print_model_list(hf_llm_diagrams[1],cols[0])
189
+
190
+ cols[1].image(hf_llm_diagrams[2],caption="Commercially permissive license only", use_column_width="auto")
191
+ print_model_list(hf_llm_diagrams[2],cols[1])
192
 
193
+ st.write("<nbsp/>", unsafe_allow_html=True)
 
 
194
 
195
+ cols = st.columns(2)
196
+ cols[0].image(hf_llm_diagrams[3],caption="TruthfulQA at 10% for HuggingFace Open LLM leaderboard by Model Size", use_column_width="auto")
197
+ print_model_list(hf_llm_diagrams[3],cols[0],False)
198
 
199
+ cols[1].image(hf_llm_diagrams[4],caption="ARC at 50% and MMLU at 50% for HuggingFace Open LLM leaderboard by Model Size", use_column_width="auto")
200
+ print_model_list(hf_llm_diagrams[4],cols[1],False)
201
 
202
 
203
+ st.subheader("Big Code Models Leaderboard", divider=True)
204
+ cols = st.columns(2)
205
+ cols[0].image(bigcode_diagrams[0], use_column_width="auto")
206
 
207
 
208
+ print_model_list(bigcode_diagrams[0],st,True)
209
 
210
+ st.subheader("MT-Bench Models Leaderboard", divider=True)
211
+ cols = st.columns(2)
212
+ cols[0].image(mt_bench_diagrams[0], use_column_width="auto")
213
 
214
+ print_model_list(mt_bench_diagrams[0],st,True)
215
 
216
+ st.subheader("OpenCompass Models Leaderboard", divider=True)
217
+ cols = st.columns(2)
218
+ cols[0].image(opencompass_diagrams[0], use_column_width="auto")
219
+ print_model_list(opencompass_diagrams[0],st,True)
220
 
221
+ st.subheader("HuggingFace and Other Leaderboards: A Comparative Model Evaluation", divider=True)
222
+ st.caption("Only models evaluated on both leaderboards are included.")
223
 
224
+ cols = st.columns(2)
225
 
226
+ for i, img in enumerate(remaining_imgs):
227
+ # Extract the filename from the full image path
228
+ filename = os.path.basename(img)
229
 
230
+ # Get the caption from the captions_map dictionary
231
+ caption = captions_map.get(filename, "") # If no caption is found, it will default to an empty string
232
 
233
+ # Display the image with the caption
234
+ cols[i % 2].image(img, caption=caption, width=None)
235
 
236
  st.write(
237
  """