benchbench

Running

App Files Files Community

Yotam Perlitz commited on Jul 9

Commit

ecb1e20

•

1 Parent(s): 3574021

Add application file

Browse files

Files changed (1) hide show

app.py +60 -0

app.py ADDED Viewed

	@@ -0,0 +1,60 @@

+import streamlit as st
+import pandas as pd
+st.title("‎‎‎  ‎‎ ‎ ‎ ‎ ‎ ‎ ‎🏋️‍♂️ benchbench-Leaderboard 🏋️‍♂️")
+# df = pd.read_csv("BAT_w_arena_10_random.csv")
+# df = (
+#     (
+#         df.rename(
+#             columns={
+#                 "z_score": "Z_Score",
+#                 "benchmark": "Benchmark",
+#             }
+#         ).drop(
+#             columns=[
+#                 "Unnamed: 0",
+#                 "z_test_pass",
+#             ]
+#         )
+#     )
+#     .sort_values("Z_Score", ascending=False)
+#     .query(
+#         'Benchmark!="Aggregate" and Benchmark!="MAGI" and Benchmark!="Alpaca(v2, len adj)" and Benchmark!="GPT4All"'
+#     )
+# )
+# df.replace(
+#     {
+#         "Arena Elo": "LMSys Arena",
+#         "Hugging-6": "HF OpenLLM",
+#         "Alpaca(v2)": "Alpaca v2",
+#         "Alpaca(v1)": "Alpaca v1",
+#         "EQ-Bench(v2)": "EQ-Bench v2",
+#     },
+#     inplace=True,
+# )
+# col1, col2, col3 = st.columns(3)
+# with col1:
+#     st.header("‎ ‎ ‎ ‎ ‎ ‎ ‎ ‎ Agree")
+#     st.dataframe(df.query("Z_Score>=0"), hide_index=True)
+# with col2:
+#     st.header("‎ ‎‎  ‎ Disagree")
+#     st.dataframe(df.query("Z_Score<0").sort_values("Z_Score"), hide_index=True)
+# with col3:
+#     st.header("‎ ‎‎  ‎ Configs")
+#     # st.selectbox(label="Reference Benchmarks", options=["LMSys Arena"])
+#     options = st.multiselect(
+#         "Reference Benchmarks",
+#         ["LMSys Arena", "Open Compass", "Yellow", "Red", "Blue"],
+#         ["LMSys Arena", "Open Compass"],
+#     )
+#     st.selectbox(label="# models compared", options=[20])
+#     st.selectbox(label="Model Select Strategy", options=["Random"])
+#     st.write("‎‎‎‎‎‎‎")
+#     st.button("Upload a new benchmark")