Yotam Perlitz commited on
Commit
ecb1e20
β€’
1 Parent(s): 3574021

Add application file

Browse files
Files changed (1) hide show
  1. app.py +60 -0
app.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+
4
+ st.title("β€Žβ€Žβ€Ž β€Žβ€Ž β€Ž β€Ž β€Ž β€Ž β€Ž β€ŽπŸ‹οΈβ€β™‚οΈ benchbench-Leaderboard πŸ‹οΈβ€β™‚οΈ")
5
+
6
+ # df = pd.read_csv("BAT_w_arena_10_random.csv")
7
+ # df = (
8
+ # (
9
+ # df.rename(
10
+ # columns={
11
+ # "z_score": "Z_Score",
12
+ # "benchmark": "Benchmark",
13
+ # }
14
+ # ).drop(
15
+ # columns=[
16
+ # "Unnamed: 0",
17
+ # "z_test_pass",
18
+ # ]
19
+ # )
20
+ # )
21
+ # .sort_values("Z_Score", ascending=False)
22
+ # .query(
23
+ # 'Benchmark!="Aggregate" and Benchmark!="MAGI" and Benchmark!="Alpaca(v2, len adj)" and Benchmark!="GPT4All"'
24
+ # )
25
+ # )
26
+
27
+
28
+ # df.replace(
29
+ # {
30
+ # "Arena Elo": "LMSys Arena",
31
+ # "Hugging-6": "HF OpenLLM",
32
+ # "Alpaca(v2)": "Alpaca v2",
33
+ # "Alpaca(v1)": "Alpaca v1",
34
+ # "EQ-Bench(v2)": "EQ-Bench v2",
35
+ # },
36
+ # inplace=True,
37
+ # )
38
+
39
+ # col1, col2, col3 = st.columns(3)
40
+
41
+ # with col1:
42
+ # st.header("β€Ž β€Ž β€Ž β€Ž β€Ž β€Ž β€Ž β€Ž Agree")
43
+ # st.dataframe(df.query("Z_Score>=0"), hide_index=True)
44
+
45
+ # with col2:
46
+ # st.header("β€Ž β€Žβ€Ž β€Ž Disagree")
47
+ # st.dataframe(df.query("Z_Score<0").sort_values("Z_Score"), hide_index=True)
48
+
49
+ # with col3:
50
+ # st.header("β€Ž β€Žβ€Ž β€Ž Configs")
51
+ # # st.selectbox(label="Reference Benchmarks", options=["LMSys Arena"])
52
+ # options = st.multiselect(
53
+ # "Reference Benchmarks",
54
+ # ["LMSys Arena", "Open Compass", "Yellow", "Red", "Blue"],
55
+ # ["LMSys Arena", "Open Compass"],
56
+ # )
57
+ # st.selectbox(label="# models compared", options=[20])
58
+ # st.selectbox(label="Model Select Strategy", options=["Random"])
59
+ # st.write("β€Žβ€Žβ€Žβ€Žβ€Žβ€Žβ€Ž")
60
+ # st.button("Upload a new benchmark")