felix
commited on
Commit
•
0110cd9
1
Parent(s):
2d98e01
updates
Browse files
app.py
CHANGED
@@ -48,6 +48,8 @@ imgs = glob.glob(os.path.join(data_path, '*.png'))
|
|
48 |
# Extracting images that start with "hf_llm_diagram"
|
49 |
hf_llm_diagrams = [img for img in imgs if 'hf_llm_diagram' in os.path.basename(img)]
|
50 |
bigcode_diagrams = [img for img in imgs if 'bigcode' in os.path.basename(img)]
|
|
|
|
|
51 |
|
52 |
# Getting the remaining images
|
53 |
remaining_imgs = list(set(imgs) - set(hf_llm_diagrams) - set(bigcode_diagrams))
|
@@ -129,7 +131,16 @@ cols[0].image(bigcode_diagrams[0], use_column_width="auto")
|
|
129 |
|
130 |
print_model_list(bigcode_diagrams[0],st,True)
|
131 |
|
|
|
|
|
|
|
|
|
|
|
132 |
|
|
|
|
|
|
|
|
|
133 |
|
134 |
st.subheader("HuggingFace and Other Leaderboards: A Comparative Model Evaluation", divider=True)
|
135 |
st.caption("Only models evaluated on both leaderboards are included.")
|
|
|
48 |
# Extracting images that start with "hf_llm_diagram"
|
49 |
hf_llm_diagrams = [img for img in imgs if 'hf_llm_diagram' in os.path.basename(img)]
|
50 |
bigcode_diagrams = [img for img in imgs if 'bigcode' in os.path.basename(img)]
|
51 |
+
mt_bench_diagrams = [img for img in imgs if 'mt_bench_leaderboard' in os.path.basename(img)]
|
52 |
+
opencompass_diagrams = [img for img in imgs if 'opencompass_leaderboard' in os.path.basename(img)]
|
53 |
|
54 |
# Getting the remaining images
|
55 |
remaining_imgs = list(set(imgs) - set(hf_llm_diagrams) - set(bigcode_diagrams))
|
|
|
131 |
|
132 |
print_model_list(bigcode_diagrams[0],st,True)
|
133 |
|
134 |
+
st.subheader("MT-Bench Models Leaderboard", divider=True)
|
135 |
+
cols = st.columns(2)
|
136 |
+
cols[0].image(mt_bench_diagrams[0], use_column_width="auto")
|
137 |
+
|
138 |
+
print_model_list(mt_bench_diagrams[0],st,True)
|
139 |
|
140 |
+
st.subheader("OpenCompass Models Leaderboard", divider=True)
|
141 |
+
cols = st.columns(2)
|
142 |
+
cols[0].image(opencompass_diagrams[0], use_column_width="auto")
|
143 |
+
print_model_list(opencompass_diagrams[0],st,True)
|
144 |
|
145 |
st.subheader("HuggingFace and Other Leaderboards: A Comparative Model Evaluation", divider=True)
|
146 |
st.caption("Only models evaluated on both leaderboards are included.")
|
data/20230907_1306/mt_bench_leaderboard.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
["WizardLM/WizardLM-70B-V1.0", "lmsys/vicuna-33b-v1.3", "WizardLM/WizardLM-13B-V1.2", "meta-llama/Llama-2-7b-chat-hf", "THUDM/chatglm2-6b", "lmsys/fastchat-t5-3b-v1.0", "BlinkDL/rwkv-4-raven"]
|
data/20230907_1306/mt_bench_leaderboard.png
ADDED