Spaces:
Sleeping
Sleeping
felix
commited on
Commit
•
5d6c941
1
Parent(s):
0f541ca
add arena
Browse files
app.py
CHANGED
@@ -111,12 +111,14 @@ if compare_mode:
|
|
111 |
|
112 |
hf_llm_diagrams = extract_images('hf_llm_diagram', imgs)
|
113 |
bigcode_diagrams = extract_images('bigcode', imgs)
|
114 |
-
mt_bench_diagrams = extract_images('
|
|
|
115 |
opencompass_diagrams = extract_images('opencompass_leaderboard', imgs)
|
116 |
|
117 |
compare_hf_llm_diagrams = extract_images('hf_llm_diagram', compare_imgs)
|
118 |
compare_bigcode_diagrams = extract_images('bigcode', compare_imgs)
|
119 |
-
compare_mt_bench_diagrams = extract_images('
|
|
|
120 |
compare_opencompass_diagrams = extract_images('opencompass_leaderboard', compare_imgs)
|
121 |
|
122 |
# Display each category side by side
|
@@ -136,6 +138,9 @@ if compare_mode:
|
|
136 |
# Displaying MT-Bench Models Leaderboard
|
137 |
display_side_by_side(mt_bench_diagrams, compare_mt_bench_diagrams, "MT-Bench Models Leaderboard")
|
138 |
|
|
|
|
|
|
|
139 |
# Displaying OpenCompass Models Leaderboard
|
140 |
display_side_by_side(opencompass_diagrams, compare_opencompass_diagrams, "OpenCompass Models Leaderboard")
|
141 |
|
@@ -168,11 +173,12 @@ else:
|
|
168 |
# Extracting images that start with "hf_llm_diagram"
|
169 |
hf_llm_diagrams = [img for img in imgs if 'hf_llm_diagram' in os.path.basename(img)]
|
170 |
bigcode_diagrams = [img for img in imgs if 'bigcode' in os.path.basename(img)]
|
171 |
-
mt_bench_diagrams = [img for img in imgs if '
|
|
|
172 |
opencompass_diagrams = [img for img in imgs if 'opencompass_leaderboard' in os.path.basename(img)]
|
173 |
|
174 |
# Getting the remaining images
|
175 |
-
remaining_imgs = list(set(imgs) - set(hf_llm_diagrams) - set(bigcode_diagrams) - set(mt_bench_diagrams) - set(opencompass_diagrams))
|
176 |
|
177 |
st.subheader("HuggingFace Open LLM leaderboard by Model Size", divider=True)
|
178 |
cols = st.columns(2)
|
@@ -213,6 +219,12 @@ else:
|
|
213 |
|
214 |
print_model_list(mt_bench_diagrams[0],st,True)
|
215 |
|
|
|
|
|
|
|
|
|
|
|
|
|
216 |
st.subheader("OpenCompass Models Leaderboard", divider=True)
|
217 |
cols = st.columns(2)
|
218 |
cols[0].image(opencompass_diagrams[0], use_column_width="auto")
|
@@ -238,7 +250,7 @@ st.write(
|
|
238 |
<p>Leaderboards tracked:</p>
|
239 |
<ul>
|
240 |
<li><a href="https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard">Hugging Face Open LLM</a></li>
|
241 |
-
<li><a href="https://huggingface.co/spaces/lmsys/chatbot-arena-leaderboard">MT-Bench</a> GPT4 judged evaluation of models
|
242 |
<li><a href="https://tatsu-lab.github.io/alpaca_eval/">AlpacaEval</a> GPT4 judged evaluation of models</li>
|
243 |
<li><a href="https://www.mosaicml.com/llm-evaluation">MosaicML</a> Balanced set of static benchmarks</li>
|
244 |
<li><a href="https://opencompass.org.cn/leaderboard-llm">OpenCompass</a> Balanced set of static benchmarks</li>
|
|
|
111 |
|
112 |
hf_llm_diagrams = extract_images('hf_llm_diagram', imgs)
|
113 |
bigcode_diagrams = extract_images('bigcode', imgs)
|
114 |
+
mt_bench_diagrams = extract_images('lmsys_leaderboard_mt_bench', imgs)
|
115 |
+
arena_diagrams = extract_images('lmsys_leaderboard_arena', imgs)
|
116 |
opencompass_diagrams = extract_images('opencompass_leaderboard', imgs)
|
117 |
|
118 |
compare_hf_llm_diagrams = extract_images('hf_llm_diagram', compare_imgs)
|
119 |
compare_bigcode_diagrams = extract_images('bigcode', compare_imgs)
|
120 |
+
compare_mt_bench_diagrams = extract_images('lmsys_leaderboard_mt_bench', compare_imgs)
|
121 |
+
compare_arena_diagrams = extract_images('lmsys_leaderboard_arena', compare_imgs)
|
122 |
compare_opencompass_diagrams = extract_images('opencompass_leaderboard', compare_imgs)
|
123 |
|
124 |
# Display each category side by side
|
|
|
138 |
# Displaying MT-Bench Models Leaderboard
|
139 |
display_side_by_side(mt_bench_diagrams, compare_mt_bench_diagrams, "MT-Bench Models Leaderboard")
|
140 |
|
141 |
+
# Displaying Arena Models Leaderboard
|
142 |
+
display_side_by_side(arena_diagrams, compare_arena_diagrams, "LMSYS Arena Elo Models Leaderboard")
|
143 |
+
|
144 |
# Displaying OpenCompass Models Leaderboard
|
145 |
display_side_by_side(opencompass_diagrams, compare_opencompass_diagrams, "OpenCompass Models Leaderboard")
|
146 |
|
|
|
173 |
# Extracting images that start with "hf_llm_diagram"
|
174 |
hf_llm_diagrams = [img for img in imgs if 'hf_llm_diagram' in os.path.basename(img)]
|
175 |
bigcode_diagrams = [img for img in imgs if 'bigcode' in os.path.basename(img)]
|
176 |
+
mt_bench_diagrams = [img for img in imgs if 'lmsys_leaderboard_mt_bench' in os.path.basename(img)]
|
177 |
+
arena_diagrams = [img for img in imgs if 'lmsys_leaderboard_arena' in os.path.basename(img)]
|
178 |
opencompass_diagrams = [img for img in imgs if 'opencompass_leaderboard' in os.path.basename(img)]
|
179 |
|
180 |
# Getting the remaining images
|
181 |
+
remaining_imgs = list(set(imgs) - set(hf_llm_diagrams) - set(bigcode_diagrams) - set(mt_bench_diagrams) - set(arena_diagrams) - set(opencompass_diagrams))
|
182 |
|
183 |
st.subheader("HuggingFace Open LLM leaderboard by Model Size", divider=True)
|
184 |
cols = st.columns(2)
|
|
|
219 |
|
220 |
print_model_list(mt_bench_diagrams[0],st,True)
|
221 |
|
222 |
+
st.subheader("LMSYS Arena Elo Models Leaderboard", divider=True)
|
223 |
+
cols = st.columns(2)
|
224 |
+
cols[0].image(arena_diagrams[0], use_column_width="auto")
|
225 |
+
|
226 |
+
print_model_list(arena_diagrams[0],st,True)
|
227 |
+
|
228 |
st.subheader("OpenCompass Models Leaderboard", divider=True)
|
229 |
cols = st.columns(2)
|
230 |
cols[0].image(opencompass_diagrams[0], use_column_width="auto")
|
|
|
250 |
<p>Leaderboards tracked:</p>
|
251 |
<ul>
|
252 |
<li><a href="https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard">Hugging Face Open LLM</a></li>
|
253 |
+
<li><a href="https://huggingface.co/spaces/lmsys/chatbot-arena-leaderboard">MT-Bench and Arena Elo</a>MT-Bench is GPT4 judged evaluation of models, Arena Elo is users ranking outputs between models.</li>
|
254 |
<li><a href="https://tatsu-lab.github.io/alpaca_eval/">AlpacaEval</a> GPT4 judged evaluation of models</li>
|
255 |
<li><a href="https://www.mosaicml.com/llm-evaluation">MosaicML</a> Balanced set of static benchmarks</li>
|
256 |
<li><a href="https://opencompass.org.cn/leaderboard-llm">OpenCompass</a> Balanced set of static benchmarks</li>
|