category
#27
by
lisabdunlap
- opened
This view is limited to 50 files because it contains too many changes.Β
See the raw diff here.
- README.md +4 -3
- app.py +424 -38
- arena_hard_auto_leaderboard_v0.1.csv +0 -61
- elo_results_20240327.pkl +0 -3
- elo_results_20240410.pkl β elo_results_20240329.pkl +2 -2
- elo_results_20240403.pkl +0 -3
- elo_results_20240409.pkl +0 -3
- elo_results_20240411.pkl +0 -3
- elo_results_20240413.pkl +0 -3
- elo_results_20240418.pkl +0 -3
- elo_results_20240419.pkl +0 -3
- elo_results_20240422.pkl +0 -3
- elo_results_20240426.pkl +0 -3
- elo_results_20240501.pkl +0 -3
- elo_results_20240508.pkl +0 -3
- elo_results_20240515.pkl +0 -3
- elo_results_20240516.pkl +0 -3
- elo_results_20240519.pkl +0 -3
- elo_results_20240520.pkl +0 -3
- elo_results_20240527.pkl +0 -3
- elo_results_20240602.pkl +0 -3
- elo_results_20240606.pkl +0 -3
- elo_results_20240611.pkl +0 -3
- elo_results_20240617.pkl +0 -3
- elo_results_20240621.pkl +0 -3
- elo_results_20240623.pkl +0 -3
- elo_results_20240626.pkl +0 -3
- elo_results_20240629.pkl +0 -3
- elo_results_20240706.pkl +0 -3
- elo_results_20240708.pkl +0 -3
- elo_results_20240716.pkl +0 -3
- elo_results_20240722.pkl +0 -3
- elo_results_20240725.pkl +0 -3
- elo_results_20240730.pkl +0 -3
- elo_results_20240731.pkl +0 -3
- elo_results_20240801.pkl +0 -3
- elo_results_20240805.pkl +0 -3
- elo_results_20240806.pkl +0 -3
- elo_results_20240813.pkl +0 -3
- elo_results_20240822.pkl +0 -3
- elo_results_20240823.pkl +0 -3
- elo_results_20240827.pkl +0 -3
- elo_results_20240828.pkl +0 -3
- elo_results_20240904.pkl +0 -3
- elo_results_20240915.pkl +0 -3
- elo_results_20240917.pkl +0 -3
- elo_results_20240927.pkl +0 -3
- elo_results_20241007.pkl +0 -3
- elo_results_20241015.pkl +0 -3
- elo_results_20241023.pkl +0 -3
README.md
CHANGED
@@ -1,14 +1,15 @@
|
|
1 |
---
|
2 |
-
title: Chatbot Arena Leaderboard
|
3 |
emoji: ππ€
|
4 |
colorFrom: indigo
|
5 |
colorTo: green
|
6 |
sdk: gradio
|
|
|
|
|
7 |
pinned: false
|
8 |
license: apache-2.0
|
9 |
tags:
|
10 |
-
- leaderboard
|
11 |
-
sdk_version: 4.44.1
|
12 |
---
|
13 |
|
14 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
1 |
---
|
2 |
+
title: LMSys Chatbot Arena Leaderboard
|
3 |
emoji: ππ€
|
4 |
colorFrom: indigo
|
5 |
colorTo: green
|
6 |
sdk: gradio
|
7 |
+
sdk_version: 3.50.2
|
8 |
+
app_file: app.py
|
9 |
pinned: false
|
10 |
license: apache-2.0
|
11 |
tags:
|
12 |
+
- leaderboard
|
|
|
13 |
---
|
14 |
|
15 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
app.py
CHANGED
@@ -1,61 +1,451 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
import argparse
|
5 |
import glob
|
6 |
-
import
|
|
|
7 |
import gradio as gr
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
|
9 |
|
10 |
def load_demo(url_params, request: gr.Request):
|
11 |
logger.info(f"load_demo. ip: {request.client.host}. params: {url_params}")
|
12 |
return basic_component_values + leader_component_values
|
13 |
|
14 |
-
def build_demo(elo_results_file, leaderboard_table_file):
|
15 |
-
from fastchat.serve.gradio_web_server import block_css
|
16 |
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
)
|
38 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
with gr.Blocks(
|
40 |
title="Chatbot Arena Leaderboard",
|
41 |
-
theme=
|
42 |
css=block_css,
|
43 |
) as demo:
|
44 |
leader_components = build_leaderboard_tab(
|
45 |
-
elo_results_file, leaderboard_table_file,
|
46 |
)
|
47 |
return demo
|
48 |
|
|
|
49 |
if __name__ == "__main__":
|
50 |
parser = argparse.ArgumentParser()
|
51 |
parser.add_argument("--share", action="store_true")
|
52 |
-
parser.add_argument("--host", default="0.0.0.0")
|
53 |
-
parser.add_argument("--port", type=int, default=7860)
|
54 |
args = parser.parse_args()
|
55 |
|
56 |
-
logger = build_logger("monitor", "monitor.log")
|
57 |
-
logger.info(f"args: {args}")
|
58 |
-
|
59 |
elo_result_files = glob.glob("elo_results_*.pkl")
|
60 |
elo_result_files.sort(key=lambda x: int(x[12:-4]))
|
61 |
elo_result_file = elo_result_files[-1]
|
@@ -63,10 +453,6 @@ if __name__ == "__main__":
|
|
63 |
leaderboard_table_files = glob.glob("leaderboard_table_*.csv")
|
64 |
leaderboard_table_files.sort(key=lambda x: int(x[18:-4]))
|
65 |
leaderboard_table_file = leaderboard_table_files[-1]
|
66 |
-
|
67 |
-
arena_hard_files = glob.glob("arena_hard_auto_leaderboard_*.csv")
|
68 |
-
arena_hard_files.sort(key=lambda x: float(x[29:32]))
|
69 |
-
arena_hard_file = arena_hard_files[-1]
|
70 |
|
71 |
demo = build_demo(elo_result_file, leaderboard_table_file)
|
72 |
-
demo.launch(share=args.share
|
|
|
1 |
+
"""A gradio app that renders a static leaderboard. This is used for Hugging Face Space."""
|
2 |
+
import ast
|
|
|
3 |
import argparse
|
4 |
import glob
|
5 |
+
import pickle
|
6 |
+
|
7 |
import gradio as gr
|
8 |
+
import numpy as np
|
9 |
+
import pandas as pd
|
10 |
+
|
11 |
+
|
12 |
+
# notebook_url = "https://colab.research.google.com/drive/1RAWb22-PFNI-X1gPVzc927SGUdfr6nsR?usp=sharing"
|
13 |
+
notebook_url = "https://colab.research.google.com/drive/1KdwokPjirkTmpO_P1WByFNFiqxWQquwH#scrollTo=o_CpbkGEbhrK"
|
14 |
+
|
15 |
+
|
16 |
+
basic_component_values = [None] * 6
|
17 |
+
leader_component_values = [None] * 5
|
18 |
+
|
19 |
+
|
20 |
+
def make_default_md(arena_df, elo_results):
|
21 |
+
total_votes = sum(arena_df["num_battles"]) // 2
|
22 |
+
total_models = len(arena_df)
|
23 |
+
|
24 |
+
leaderboard_md = f"""
|
25 |
+
# π LMSYS Chatbot Arena Leaderboard
|
26 |
+
| [Vote](https://chat.lmsys.org) | [Blog](https://lmsys.org/blog/2023-05-03-arena/) | [GitHub](https://github.com/lm-sys/FastChat) | [Paper](https://arxiv.org/abs/2306.05685) | [Dataset](https://github.com/lm-sys/FastChat/blob/main/docs/dataset_release.md) | [Twitter](https://twitter.com/lmsysorg) | [Discord](https://discord.gg/HSWAKCrnFx) |
|
27 |
+
|
28 |
+
LMSYS [Chatbot Arena](https://lmsys.org/blog/2023-05-03-arena/) is a crowdsourced open platform for LLM evals.
|
29 |
+
We've collected over **500,000** human preference votes to rank LLMs with the Elo ranking system.
|
30 |
+
"""
|
31 |
+
return leaderboard_md
|
32 |
+
|
33 |
+
|
34 |
+
def make_arena_leaderboard_md(arena_df):
|
35 |
+
total_votes = sum(arena_df["num_battles"]) // 2
|
36 |
+
total_models = len(arena_df)
|
37 |
+
|
38 |
+
leaderboard_md = f"""
|
39 |
+
Total #models: **{total_models}**. Total #votes: **{total_votes}**. Last updated: March 29, 2024.
|
40 |
+
|
41 |
+
Contribute your vote π³οΈ at [chat.lmsys.org](https://chat.lmsys.org)! Find more analysis in the [notebook]({notebook_url}).
|
42 |
+
"""
|
43 |
+
return leaderboard_md
|
44 |
+
|
45 |
+
|
46 |
+
def make_full_leaderboard_md(elo_results):
|
47 |
+
leaderboard_md = f"""
|
48 |
+
Three benchmarks are displayed: **Arena Elo**, **MT-Bench** and **MMLU**.
|
49 |
+
- [Chatbot Arena](https://chat.lmsys.org/?arena) - a crowdsourced, randomized battle platform. We use 500K+ user votes to compute Elo ratings.
|
50 |
+
- [MT-Bench](https://arxiv.org/abs/2306.05685): a set of challenging multi-turn questions. We use GPT-4 to grade the model responses.
|
51 |
+
- [MMLU](https://arxiv.org/abs/2009.03300) (5-shot): a test to measure a model's multitask accuracy on 57 tasks.
|
52 |
+
|
53 |
+
π» Code: The MT-bench scores (single-answer grading on a scale of 10) are computed by [fastchat.llm_judge](https://github.com/lm-sys/FastChat/tree/main/fastchat/llm_judge).
|
54 |
+
The MMLU scores are mostly computed by [InstructEval](https://github.com/declare-lab/instruct-eval).
|
55 |
+
Higher values are better for all benchmarks. Empty cells mean not available.
|
56 |
+
"""
|
57 |
+
return leaderboard_md
|
58 |
+
|
59 |
+
|
60 |
+
def make_leaderboard_md_live(elo_results):
|
61 |
+
leaderboard_md = f"""
|
62 |
+
# Leaderboard
|
63 |
+
Last updated: {elo_results["last_updated_datetime"]}
|
64 |
+
{elo_results["leaderboard_table"]}
|
65 |
+
"""
|
66 |
+
return leaderboard_md
|
67 |
+
|
68 |
+
|
69 |
+
def update_elo_components(max_num_files, elo_results_file):
|
70 |
+
log_files = get_log_files(max_num_files)
|
71 |
+
|
72 |
+
# Leaderboard
|
73 |
+
if elo_results_file is None: # Do live update
|
74 |
+
battles = clean_battle_data(log_files)
|
75 |
+
elo_results = report_elo_analysis_results(battles)
|
76 |
+
|
77 |
+
leader_component_values[0] = make_leaderboard_md_live(elo_results)
|
78 |
+
leader_component_values[1] = elo_results["win_fraction_heatmap"]
|
79 |
+
leader_component_values[2] = elo_results["battle_count_heatmap"]
|
80 |
+
leader_component_values[3] = elo_results["bootstrap_elo_rating"]
|
81 |
+
leader_component_values[4] = elo_results["average_win_rate_bar"]
|
82 |
+
|
83 |
+
# Basic stats
|
84 |
+
basic_stats = report_basic_stats(log_files)
|
85 |
+
md0 = f"Last updated: {basic_stats['last_updated_datetime']}"
|
86 |
+
|
87 |
+
md1 = "### Action Histogram\n"
|
88 |
+
md1 += basic_stats["action_hist_md"] + "\n"
|
89 |
+
|
90 |
+
md2 = "### Anony. Vote Histogram\n"
|
91 |
+
md2 += basic_stats["anony_vote_hist_md"] + "\n"
|
92 |
+
|
93 |
+
md3 = "### Model Call Histogram\n"
|
94 |
+
md3 += basic_stats["model_hist_md"] + "\n"
|
95 |
+
|
96 |
+
md4 = "### Model Call (Last 24 Hours)\n"
|
97 |
+
md4 += basic_stats["num_chats_last_24_hours"] + "\n"
|
98 |
+
|
99 |
+
basic_component_values[0] = md0
|
100 |
+
basic_component_values[1] = basic_stats["chat_dates_bar"]
|
101 |
+
basic_component_values[2] = md1
|
102 |
+
basic_component_values[3] = md2
|
103 |
+
basic_component_values[4] = md3
|
104 |
+
basic_component_values[5] = md4
|
105 |
+
|
106 |
+
|
107 |
+
def update_worker(max_num_files, interval, elo_results_file):
|
108 |
+
while True:
|
109 |
+
tic = time.time()
|
110 |
+
update_elo_components(max_num_files, elo_results_file)
|
111 |
+
durtaion = time.time() - tic
|
112 |
+
print(f"update duration: {durtaion:.2f} s")
|
113 |
+
time.sleep(max(interval - durtaion, 0))
|
114 |
|
115 |
|
116 |
def load_demo(url_params, request: gr.Request):
|
117 |
logger.info(f"load_demo. ip: {request.client.host}. params: {url_params}")
|
118 |
return basic_component_values + leader_component_values
|
119 |
|
|
|
|
|
120 |
|
121 |
+
def model_hyperlink(model_name, link):
|
122 |
+
return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
|
123 |
+
|
124 |
+
|
125 |
+
def load_leaderboard_table_csv(filename, add_hyperlink=True):
|
126 |
+
lines = open(filename).readlines()
|
127 |
+
heads = [v.strip() for v in lines[0].split(",")]
|
128 |
+
rows = []
|
129 |
+
for i in range(1, len(lines)):
|
130 |
+
row = [v.strip() for v in lines[i].split(",")]
|
131 |
+
for j in range(len(heads)):
|
132 |
+
item = {}
|
133 |
+
for h, v in zip(heads, row):
|
134 |
+
if h == "Arena Elo rating":
|
135 |
+
if v != "-":
|
136 |
+
v = int(ast.literal_eval(v))
|
137 |
+
else:
|
138 |
+
v = np.nan
|
139 |
+
elif h == "MMLU":
|
140 |
+
if v != "-":
|
141 |
+
v = round(ast.literal_eval(v) * 100, 1)
|
142 |
+
else:
|
143 |
+
v = np.nan
|
144 |
+
elif h == "MT-bench (win rate %)":
|
145 |
+
if v != "-":
|
146 |
+
v = round(ast.literal_eval(v[:-1]), 1)
|
147 |
+
else:
|
148 |
+
v = np.nan
|
149 |
+
elif h == "MT-bench (score)":
|
150 |
+
if v != "-":
|
151 |
+
v = round(ast.literal_eval(v), 2)
|
152 |
+
else:
|
153 |
+
v = np.nan
|
154 |
+
item[h] = v
|
155 |
+
if add_hyperlink:
|
156 |
+
item["Model"] = model_hyperlink(item["Model"], item["Link"])
|
157 |
+
rows.append(item)
|
158 |
+
|
159 |
+
return rows
|
160 |
+
|
161 |
+
|
162 |
+
def build_basic_stats_tab():
|
163 |
+
empty = "Loading ..."
|
164 |
+
basic_component_values[:] = [empty, None, empty, empty, empty, empty]
|
165 |
+
|
166 |
+
md0 = gr.Markdown(empty)
|
167 |
+
gr.Markdown("#### Figure 1: Number of model calls and votes")
|
168 |
+
plot_1 = gr.Plot(show_label=False)
|
169 |
+
with gr.Row():
|
170 |
+
with gr.Column():
|
171 |
+
md1 = gr.Markdown(empty)
|
172 |
+
with gr.Column():
|
173 |
+
md2 = gr.Markdown(empty)
|
174 |
+
with gr.Row():
|
175 |
+
with gr.Column():
|
176 |
+
md3 = gr.Markdown(empty)
|
177 |
+
with gr.Column():
|
178 |
+
md4 = gr.Markdown(empty)
|
179 |
+
return [md0, plot_1, md1, md2, md3, md4]
|
180 |
+
|
181 |
+
def get_full_table(arena_df, model_table_df):
|
182 |
+
values = []
|
183 |
+
for i in range(len(model_table_df)):
|
184 |
+
row = []
|
185 |
+
model_key = model_table_df.iloc[i]["key"]
|
186 |
+
model_name = model_table_df.iloc[i]["Model"]
|
187 |
+
# model display name
|
188 |
+
row.append(model_name)
|
189 |
+
if model_key in arena_df.index:
|
190 |
+
idx = arena_df.index.get_loc(model_key)
|
191 |
+
row.append(round(arena_df.iloc[idx]["rating"]))
|
192 |
+
else:
|
193 |
+
row.append(np.nan)
|
194 |
+
row.append(model_table_df.iloc[i]["MT-bench (score)"])
|
195 |
+
row.append(model_table_df.iloc[i]["MMLU"])
|
196 |
+
# Organization
|
197 |
+
row.append(model_table_df.iloc[i]["Organization"])
|
198 |
+
# license
|
199 |
+
row.append(model_table_df.iloc[i]["License"])
|
200 |
+
|
201 |
+
values.append(row)
|
202 |
+
values.sort(key=lambda x: -x[1] if not np.isnan(x[1]) else 1e9)
|
203 |
+
return values
|
204 |
+
|
205 |
+
|
206 |
+
def get_arena_table(arena_df, model_table_df):
|
207 |
+
# sort by rating
|
208 |
+
arena_df = arena_df.sort_values(by=["rating"], ascending=False)
|
209 |
+
values = []
|
210 |
+
for i in range(len(arena_df)):
|
211 |
+
row = []
|
212 |
+
model_key = arena_df.index[i]
|
213 |
+
model_name = model_table_df[model_table_df["key"] == model_key]["Model"].values[
|
214 |
+
0
|
215 |
+
]
|
216 |
+
|
217 |
+
# rank
|
218 |
+
ranking = arena_df.iloc[i].get("final_ranking") or i+1
|
219 |
+
row.append(ranking)
|
220 |
+
# model display name
|
221 |
+
row.append(model_name)
|
222 |
+
# elo rating
|
223 |
+
row.append(round(arena_df.iloc[i]["rating"]))
|
224 |
+
upper_diff = round(
|
225 |
+
arena_df.iloc[i]["rating_q975"] - arena_df.iloc[i]["rating"]
|
226 |
+
)
|
227 |
+
lower_diff = round(
|
228 |
+
arena_df.iloc[i]["rating"] - arena_df.iloc[i]["rating_q025"]
|
229 |
+
)
|
230 |
+
row.append(f"+{upper_diff}/-{lower_diff}")
|
231 |
+
# num battles
|
232 |
+
row.append(round(arena_df.iloc[i]["num_battles"]))
|
233 |
+
# Organization
|
234 |
+
row.append(
|
235 |
+
model_table_df[model_table_df["key"] == model_key]["Organization"].values[0]
|
236 |
+
)
|
237 |
+
# license
|
238 |
+
row.append(
|
239 |
+
model_table_df[model_table_df["key"] == model_key]["License"].values[0]
|
240 |
+
)
|
241 |
+
|
242 |
+
cutoff_date = model_table_df[model_table_df["key"] == model_key]["Knowledge cutoff date"].values[0]
|
243 |
+
if cutoff_date == "-":
|
244 |
+
row.append("Unknown")
|
245 |
+
else:
|
246 |
+
row.append(cutoff_date)
|
247 |
+
values.append(row)
|
248 |
+
return values
|
249 |
+
|
250 |
+
def build_leaderboard_tab(elo_results_file, leaderboard_table_file, show_plot=False):
|
251 |
+
if elo_results_file is None: # Do live update
|
252 |
+
default_md = "Loading ..."
|
253 |
+
p1 = p2 = p3 = p4 = None
|
254 |
+
else:
|
255 |
+
with open(elo_results_file, "rb") as fin:
|
256 |
+
elo_results = pickle.load(fin)
|
257 |
+
if "full" in elo_results:
|
258 |
+
elo_results = elo_results["full"]
|
259 |
+
|
260 |
+
p1 = elo_results["win_fraction_heatmap"]
|
261 |
+
p2 = elo_results["battle_count_heatmap"]
|
262 |
+
p3 = elo_results["bootstrap_elo_rating"]
|
263 |
+
p4 = elo_results["average_win_rate_bar"]
|
264 |
+
arena_df = elo_results["leaderboard_table_df"]
|
265 |
+
default_md = make_default_md(arena_df, elo_results)
|
266 |
+
|
267 |
+
md_1 = gr.Markdown(default_md, elem_id="leaderboard_markdown")
|
268 |
+
if leaderboard_table_file:
|
269 |
+
data = load_leaderboard_table_csv(leaderboard_table_file)
|
270 |
+
model_table_df = pd.DataFrame(data)
|
271 |
+
|
272 |
+
with gr.Tabs() as tabs:
|
273 |
+
# arena table
|
274 |
+
arena_table_vals = get_arena_table(arena_df, model_table_df)
|
275 |
+
with gr.Tab("Arena Elo", id=0):
|
276 |
+
md = make_arena_leaderboard_md(arena_df)
|
277 |
+
gr.Markdown(md, elem_id="leaderboard_markdown")
|
278 |
+
gr.Dataframe(
|
279 |
+
headers=[
|
280 |
+
"Rank",
|
281 |
+
"π€ Model",
|
282 |
+
"β Arena Elo",
|
283 |
+
"π 95% CI",
|
284 |
+
"π³οΈ Votes",
|
285 |
+
"Organization",
|
286 |
+
"License",
|
287 |
+
"Knowledge Cutoff",
|
288 |
+
],
|
289 |
+
datatype=[
|
290 |
+
"str",
|
291 |
+
"markdown",
|
292 |
+
"number",
|
293 |
+
"str",
|
294 |
+
"number",
|
295 |
+
"str",
|
296 |
+
"str",
|
297 |
+
"str",
|
298 |
+
],
|
299 |
+
value=arena_table_vals,
|
300 |
+
elem_id="arena_leaderboard_dataframe",
|
301 |
+
height=700,
|
302 |
+
column_widths=[50, 200, 120, 100, 100, 150, 150, 100],
|
303 |
+
wrap=True,
|
304 |
+
)
|
305 |
+
with gr.Tab("Full Leaderboard", id=1):
|
306 |
+
md = make_full_leaderboard_md(elo_results)
|
307 |
+
gr.Markdown(md, elem_id="leaderboard_markdown")
|
308 |
+
full_table_vals = get_full_table(arena_df, model_table_df)
|
309 |
+
gr.Dataframe(
|
310 |
+
headers=[
|
311 |
+
"π€ Model",
|
312 |
+
"β Arena Elo",
|
313 |
+
"π MT-bench",
|
314 |
+
"π MMLU",
|
315 |
+
"Organization",
|
316 |
+
"License",
|
317 |
+
],
|
318 |
+
datatype=["markdown", "number", "number", "number", "str", "str"],
|
319 |
+
value=full_table_vals,
|
320 |
+
elem_id="full_leaderboard_dataframe",
|
321 |
+
column_widths=[200, 100, 100, 100, 150, 150],
|
322 |
+
height=700,
|
323 |
+
wrap=True,
|
324 |
+
)
|
325 |
+
if not show_plot:
|
326 |
+
gr.Markdown(
|
327 |
+
""" ## Visit our [HF space](https://huggingface.co/spaces/lmsys/chatbot-arena-leaderboard) for more analysis!
|
328 |
+
If you want to see more models, please help us [add them](https://github.com/lm-sys/FastChat/blob/main/docs/arena.md#how-to-add-a-new-model).
|
329 |
+
""",
|
330 |
+
elem_id="leaderboard_markdown",
|
331 |
+
)
|
332 |
+
else:
|
333 |
+
pass
|
334 |
+
|
335 |
+
gr.Markdown(
|
336 |
+
f"""Note: we take the 95% confidence interval into account when determining a model's ranking.
|
337 |
+
A model is ranked higher only if its lower bound of model score is higher than the upper bound of the other model's score.
|
338 |
+
See Figure 3 below for visualization of the confidence intervals.
|
339 |
+
""",
|
340 |
+
elem_id="leaderboard_markdown"
|
341 |
)
|
342 |
|
343 |
+
leader_component_values[:] = [default_md, p1, p2, p3, p4]
|
344 |
+
|
345 |
+
if show_plot:
|
346 |
+
gr.Markdown(
|
347 |
+
f"""## More Statistics for Chatbot Arena\n
|
348 |
+
Below are figures for more statistics. The code for generating them is also included in this [notebook]({notebook_url}).
|
349 |
+
You can find more discussions in this blog [post](https://lmsys.org/blog/2023-12-07-leaderboard/).
|
350 |
+
""",
|
351 |
+
elem_id="leaderboard_markdown"
|
352 |
+
)
|
353 |
+
with gr.Row():
|
354 |
+
with gr.Column():
|
355 |
+
gr.Markdown(
|
356 |
+
"#### Figure 1: Fraction of Model A Wins for All Non-tied A vs. B Battles"
|
357 |
+
)
|
358 |
+
plot_1 = gr.Plot(p1, show_label=False)
|
359 |
+
with gr.Column():
|
360 |
+
gr.Markdown(
|
361 |
+
"#### Figure 2: Battle Count for Each Combination of Models (without Ties)"
|
362 |
+
)
|
363 |
+
plot_2 = gr.Plot(p2, show_label=False)
|
364 |
+
with gr.Row():
|
365 |
+
with gr.Column():
|
366 |
+
gr.Markdown(
|
367 |
+
"#### Figure 3: Confidence Intervals on Model Strength (via Bootstrapping)"
|
368 |
+
)
|
369 |
+
plot_3 = gr.Plot(p3, show_label=False)
|
370 |
+
with gr.Column():
|
371 |
+
gr.Markdown(
|
372 |
+
"#### Figure 4: Average Win Rate Against All Other Models (Assuming Uniform Sampling and No Ties)"
|
373 |
+
)
|
374 |
+
plot_4 = gr.Plot(p4, show_label=False)
|
375 |
+
|
376 |
+
gr.Markdown(acknowledgment_md)
|
377 |
+
|
378 |
+
if show_plot:
|
379 |
+
return [md_1, plot_1, plot_2, plot_3, plot_4]
|
380 |
+
return [md_1]
|
381 |
+
|
382 |
+
block_css = """
|
383 |
+
#notice_markdown {
|
384 |
+
font-size: 104%
|
385 |
+
}
|
386 |
+
#notice_markdown th {
|
387 |
+
display: none;
|
388 |
+
}
|
389 |
+
#notice_markdown td {
|
390 |
+
padding-top: 6px;
|
391 |
+
padding-bottom: 6px;
|
392 |
+
}
|
393 |
+
#leaderboard_markdown {
|
394 |
+
font-size: 104%
|
395 |
+
}
|
396 |
+
#leaderboard_markdown td {
|
397 |
+
padding-top: 6px;
|
398 |
+
padding-bottom: 6px;
|
399 |
+
}
|
400 |
+
#leaderboard_dataframe td {
|
401 |
+
line-height: 0.1em;
|
402 |
+
}
|
403 |
+
footer {
|
404 |
+
display:none !important
|
405 |
+
}
|
406 |
+
.sponsor-image-about img {
|
407 |
+
margin: 0 20px;
|
408 |
+
margin-top: 20px;
|
409 |
+
height: 40px;
|
410 |
+
max-height: 100%;
|
411 |
+
width: auto;
|
412 |
+
float: left;
|
413 |
+
}
|
414 |
+
"""
|
415 |
+
|
416 |
+
acknowledgment_md = """
|
417 |
+
### Acknowledgment
|
418 |
+
We thank [Kaggle](https://www.kaggle.com/), [MBZUAI](https://mbzuai.ac.ae/), [a16z](https://www.a16z.com/), [Together AI](https://www.together.ai/), [Anyscale](https://www.anyscale.com/), [HuggingFace](https://huggingface.co/) for their generous [sponsorship](https://lmsys.org/donations/).
|
419 |
+
|
420 |
+
<div class="sponsor-image-about">
|
421 |
+
<img src="https://storage.googleapis.com/public-arena-asset/kaggle.png" alt="Kaggle">
|
422 |
+
<img src="https://storage.googleapis.com/public-arena-asset/mbzuai.jpeg" alt="MBZUAI">
|
423 |
+
<img src="https://storage.googleapis.com/public-arena-asset/a16z.jpeg" alt="a16z">
|
424 |
+
<img src="https://storage.googleapis.com/public-arena-asset/together.png" alt="Together AI">
|
425 |
+
<img src="https://storage.googleapis.com/public-arena-asset/anyscale.png" alt="AnyScale">
|
426 |
+
<img src="https://storage.googleapis.com/public-arena-asset/huggingface.png" alt="HuggingFace">
|
427 |
+
</div>
|
428 |
+
"""
|
429 |
+
|
430 |
+
def build_demo(elo_results_file, leaderboard_table_file):
|
431 |
+
text_size = gr.themes.sizes.text_lg
|
432 |
+
|
433 |
with gr.Blocks(
|
434 |
title="Chatbot Arena Leaderboard",
|
435 |
+
theme=gr.themes.Base(text_size=text_size),
|
436 |
css=block_css,
|
437 |
) as demo:
|
438 |
leader_components = build_leaderboard_tab(
|
439 |
+
elo_results_file, leaderboard_table_file, show_plot=True
|
440 |
)
|
441 |
return demo
|
442 |
|
443 |
+
|
444 |
if __name__ == "__main__":
|
445 |
parser = argparse.ArgumentParser()
|
446 |
parser.add_argument("--share", action="store_true")
|
|
|
|
|
447 |
args = parser.parse_args()
|
448 |
|
|
|
|
|
|
|
449 |
elo_result_files = glob.glob("elo_results_*.pkl")
|
450 |
elo_result_files.sort(key=lambda x: int(x[12:-4]))
|
451 |
elo_result_file = elo_result_files[-1]
|
|
|
453 |
leaderboard_table_files = glob.glob("leaderboard_table_*.csv")
|
454 |
leaderboard_table_files.sort(key=lambda x: int(x[18:-4]))
|
455 |
leaderboard_table_file = leaderboard_table_files[-1]
|
|
|
|
|
|
|
|
|
456 |
|
457 |
demo = build_demo(elo_result_file, leaderboard_table_file)
|
458 |
+
demo.launch(share=args.share)
|
arena_hard_auto_leaderboard_v0.1.csv
DELETED
@@ -1,61 +0,0 @@
|
|
1 |
-
model,score,rating_q025,rating_q975,CI,avg_tokens,date
|
2 |
-
gpt-4-turbo-2024-04-09,82.63,80.75,84.6,"(1.9, 2.0)",662.0,2024-07-31
|
3 |
-
claude-3-5-sonnet-20240620,79.35,77.25,80.62,"(2.1, 1.3)",567.0,2024-07-31
|
4 |
-
gpt-4o-2024-05-13,79.21,77.42,80.71,"(1.8, 1.5)",696.0,2024-07-31
|
5 |
-
gpt-4-0125-preview,77.96,75.94,79.9,"(2.0, 1.9)",619.0,2024-07-31
|
6 |
-
athene-70b-0725,76.83,74.84,78.74,"(2.0, 1.9)",683.0,2024-07-31
|
7 |
-
gpt-4o-mini-2024-07-18,74.94,72.66,77.07,"(2.3, 2.1)",668.0,2024-07-31
|
8 |
-
gemini-1.5-pro-api-0514,71.96,69.62,74.62,"(2.3, 2.7)",676.0,2024-07-31
|
9 |
-
yi-large-preview,71.48,69.02,73.37,"(2.5, 1.9)",720.0,2024-07-31
|
10 |
-
mistral-large-2407,70.42,68.11,72.43,"(2.3, 2.0)",623.0,2024-07-31
|
11 |
-
llama-3.1-405b-instruct,64.09,61.43,66.55,"(2.7, 2.5)",633.0,2024-07-31
|
12 |
-
glm-4-0520,63.84,61.28,66.19,"(2.6, 2.3)",636.0,2024-07-31
|
13 |
-
yi-large,63.7,61.76,65.86,"(1.9, 2.2)",626.0,2024-07-31
|
14 |
-
deepseek-coder-v2,62.3,59.82,64.72,"(2.5, 2.4)",578.0,2024-07-31
|
15 |
-
claude-3-opus-20240229,60.36,57.56,62.34,"(2.8, 2.0)",541.0,2024-07-31
|
16 |
-
gemma-2-27b-it,57.51,55.11,60.12,"(2.4, 2.6)",577.0,2024-07-31
|
17 |
-
llama-3.1-70b-instruct,55.73,52.85,58.2,"(2.9, 2.5)",628.0,2024-07-31
|
18 |
-
glm-4-0116,55.72,53.83,58.16,"(1.9, 2.4)",622.0,2024-07-31
|
19 |
-
gemini-1.5-pro-api-0409-preview,53.37,51.13,56.66,"(2.2, 3.3)",478.0,2024-07-31
|
20 |
-
glm-4-air,50.88,48.62,53.21,"(2.3, 2.3)",619.0,2024-07-31
|
21 |
-
gpt-4-0314,50.0,50.0,50.0,"(0.0, 0.0)",423.0,2024-07-31
|
22 |
-
gemini-1.5-flash-api-0514,49.61,47.46,52.17,"(2.1, 2.6)",642.0,2024-07-31
|
23 |
-
qwen2-72b-instruct,46.86,44.57,49.29,"(2.3, 2.4)",515.0,2024-07-31
|
24 |
-
claude-3-sonnet-20240229,46.8,44.12,49.04,"(2.7, 2.2)",552.0,2024-07-31
|
25 |
-
llama-3-70b-instruct,46.57,43.84,49.18,"(2.7, 2.6)",591.0,2024-07-31
|
26 |
-
claude-3-haiku-20240307,41.47,39.57,44.02,"(1.9, 2.6)",505.0,2024-07-31
|
27 |
-
gpt-4-0613,37.9,35.6,40.36,"(2.3, 2.5)",354.0,2024-07-31
|
28 |
-
mistral-large-2402,37.71,34.81,39.77,"(2.9, 2.1)",400.0,2024-07-31
|
29 |
-
mixtral-8x22b-instruct-v0.1,36.36,34.21,38.55,"(2.1, 2.2)",430.0,2024-07-31
|
30 |
-
qwen1.5-72b-chat,36.12,33.88,38.15,"(2.2, 2.0)",474.0,2024-07-31
|
31 |
-
phi-3-medium-4k-instruct,33.37,31.26,35.14,"(2.1, 1.8)",517.0,2024-07-31
|
32 |
-
command-r-plus,33.07,30.85,35.12,"(2.2, 2.0)",541.0,2024-07-31
|
33 |
-
mistral-medium,31.9,29.66,34.31,"(2.2, 2.4)",485.0,2024-07-31
|
34 |
-
phi-3-small-8k-instruct,29.77,27.94,31.97,"(1.8, 2.2)",568.0,2024-07-31
|
35 |
-
mistral-next,27.37,25.4,29.09,"(2.0, 1.7)",297.0,2024-07-31
|
36 |
-
gpt-3.5-turbo-0613,24.82,22.54,26.29,"(2.3, 1.5)",401.0,2024-07-31
|
37 |
-
dbrx-instruct-preview,24.63,22.33,26.83,"(2.3, 2.2)",415.0,2024-07-31
|
38 |
-
claude-2.0,23.99,21.71,25.65,"(2.3, 1.7)",295.0,2024-07-31
|
39 |
-
mixtral-8x7b-instruct-v0.1,23.4,21.38,25.41,"(2.0, 2.0)",457.0,2024-07-31
|
40 |
-
gpt-3.5-turbo-0125,23.34,21.67,25.27,"(1.7, 1.9)",329.0,2024-07-31
|
41 |
-
yi-34b-chat,23.15,20.75,24.7,"(2.4, 1.6)",611.0,2024-07-31
|
42 |
-
starling-lm-7b-beta,23.01,20.81,24.66,"(2.2, 1.6)",530.0,2024-07-31
|
43 |
-
claude-2.1,22.77,20.65,25.43,"(2.1, 2.7)",290.0,2024-07-31
|
44 |
-
llama-3.1-8b-instruct,21.34,19.71,23.09,"(1.6, 1.8)",861.0,2024-07-31
|
45 |
-
snorkel-mistral-pairrm-dpo,20.73,19.04,22.05,"(1.7, 1.3)",564.0,2024-07-31
|
46 |
-
llama-3-8b-instruct,20.56,18.82,22.61,"(1.7, 2.1)",585.0,2024-07-31
|
47 |
-
gpt-3.5-turbo-1106,18.87,17.06,20.58,"(1.8, 1.7)",285.0,2024-07-31
|
48 |
-
gpt-3.5-turbo-0314,18.05,16.57,20.06,"(1.5, 2.0)",334.0,2024-07-31
|
49 |
-
gemini-pro,17.8,15.96,19.32,"(1.8, 1.5)",322.0,2024-07-31
|
50 |
-
snowflake-arctic-instruct,17.61,16.12,19.27,"(1.5, 1.7)",365.0,2024-07-31
|
51 |
-
command-r,17.02,15.73,18.51,"(1.3, 1.5)",432.0,2024-07-31
|
52 |
-
phi-3-mini-128k-instruct,15.43,13.94,17.02,"(1.5, 1.6)",609.0,2024-07-31
|
53 |
-
tulu-2-dpo-70b,14.99,13.05,16.82,"(1.9, 1.8)",550.0,2024-07-31
|
54 |
-
starling-lm-7b-alpha,12.8,11.23,14.5,"(1.6, 1.7)",483.0,2024-07-31
|
55 |
-
mistral-7b-instruct,12.57,11.05,14.11,"(1.5, 1.5)",541.0,2024-07-31
|
56 |
-
gemma-1.1-7b-it,12.09,10.61,13.43,"(1.5, 1.3)",341.0,2024-07-31
|
57 |
-
llama-2-70b-chat,11.55,10.02,13.01,"(1.5, 1.5)",595.0,2024-07-31
|
58 |
-
vicuna-33b,8.63,7.59,9.84,"(1.0, 1.2)",451.0,2024-07-31
|
59 |
-
gemma-7b-it,7.47,6.5,8.6,"(1.0, 1.1)",378.0,2024-07-31
|
60 |
-
gemma-1.1-2b-it,3.37,2.74,4.14,"(0.6, 0.8)",316.0,2024-07-31
|
61 |
-
gemma-2b-it,3.0,2.33,3.67,"(0.7, 0.7)",369.0,2024-07-31
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
elo_results_20240327.pkl
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:bab4e9fa00e9d7c8244723993174af2c4f35ffc8487cc3059504b72658f06f43
|
3 |
-
size 457743
|
|
|
|
|
|
|
|
elo_results_20240410.pkl β elo_results_20240329.pkl
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7f4c037f68c9ddbf27b70b1cb333ca37bf70ff9a3cddad7a93cd62bca709cd77
|
3 |
+
size 115776
|
elo_results_20240403.pkl
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:ce8cebf41da8c06eee0f37156e01be83cc43182e0f00444311b4ad97a83154be
|
3 |
-
size 690286
|
|
|
|
|
|
|
|
elo_results_20240409.pkl
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:e6774f780b63f569666e9a85b12eddceef3af75e1d1799ff7c6e0529102950c3
|
3 |
-
size 119947
|
|
|
|
|
|
|
|
elo_results_20240411.pkl
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:fada8d86ddb6dae319c5bda602d921859cc4280fdd53388eff446d80c3ab8192
|
3 |
-
size 1183214
|
|
|
|
|
|
|
|
elo_results_20240413.pkl
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:3ad8ebb2a8602a7c72382fc65521fbe7b06bb36dcf6b6cc582c6b89b1d7b1a87
|
3 |
-
size 1064654
|
|
|
|
|
|
|
|
elo_results_20240418.pkl
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:1b05163db100df9ef62c7efb3332891d3321c6094e787af3b4ef4a9afe2becdb
|
3 |
-
size 1130887
|
|
|
|
|
|
|
|
elo_results_20240419.pkl
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:05239b0b4406f270fdc845632981024dec639b8351dcb1a2308def3bbcea2e68
|
3 |
-
size 1130756
|
|
|
|
|
|
|
|
elo_results_20240422.pkl
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:436c6bc88b6c03b672a3a87ddb3b101ec1c5ff03d47d64196986b5d6ca7909cd
|
3 |
-
size 1254718
|
|
|
|
|
|
|
|
elo_results_20240426.pkl
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:a39b33094ac93d4a1e0bc57bfbb17368515ce5a7e4504d3d1e310a14cd056943
|
3 |
-
size 1275849
|
|
|
|
|
|
|
|
elo_results_20240501.pkl
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:35f506d5d34555cbc055abc901623fae3aa7b429057cf3039cb1b460fdc8f41c
|
3 |
-
size 1159628
|
|
|
|
|
|
|
|
elo_results_20240508.pkl
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:79acc98f859a5f597338eff0ae98025abfec80087d60336c0d735e7dd3595eb2
|
3 |
-
size 1188396
|
|
|
|
|
|
|
|
elo_results_20240515.pkl
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:692e9b280f1587b85f28aaeceee52928a92f6a98ee81e3d63a2d789c82eb9abc
|
3 |
-
size 1596977
|
|
|
|
|
|
|
|
elo_results_20240516.pkl
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:e3ff3e278c66aa28aece3e52369d128ca6707673e0e811304294c5d8a85aaf81
|
3 |
-
size 1476125
|
|
|
|
|
|
|
|
elo_results_20240519.pkl
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:70c1136517de9396d72b2e14faee88382e503d0b80e56a5131e220173f6b472b
|
3 |
-
size 1604729
|
|
|
|
|
|
|
|
elo_results_20240520.pkl
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:82d5306e5b88813cea3490a0cf2d02952219c52257789d6077caeee986996567
|
3 |
-
size 1628933
|
|
|
|
|
|
|
|
elo_results_20240527.pkl
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:ca33433b15176a4b7c4f13584b2129ebd60f059524648c4a4a986aad4a84bc1e
|
3 |
-
size 1666346
|
|
|
|
|
|
|
|
elo_results_20240602.pkl
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:1e884bc9a41a3aa9916e29ca34e5ac2e52e6f8d7e314e380facd06f9ae855145
|
3 |
-
size 2278603
|
|
|
|
|
|
|
|
elo_results_20240606.pkl
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:6073ca1b10e9ac34c2b67b73636dcd2303b4a9291c9a440ad3813c33ef5fa170
|
3 |
-
size 2295194
|
|
|
|
|
|
|
|
elo_results_20240611.pkl
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:8b0f14fede0abeee60825682ffab3b07b50af5d9924de3c8114ddac469b34779
|
3 |
-
size 2310921
|
|
|
|
|
|
|
|
elo_results_20240617.pkl
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:a6e53f4d339bbc3420b349e89a315a1c6d2fa3c9847b206aeb02e1e5170aea73
|
3 |
-
size 2491948
|
|
|
|
|
|
|
|
elo_results_20240621.pkl
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:b098f9f69009376d87ef4d317c81c648d0558f7c912f693e285de5d3115e309b
|
3 |
-
size 2526260
|
|
|
|
|
|
|
|
elo_results_20240623.pkl
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:1469aac4504cd49bdbf1566093c5be912fd6ffe27f62213fd4961eefc92b4e30
|
3 |
-
size 2544361
|
|
|
|
|
|
|
|
elo_results_20240626.pkl
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:f6b7c6dc746e8dfb7fed966d9c027468dccbd31da5866af6fff0083478ef52ff
|
3 |
-
size 2429037
|
|
|
|
|
|
|
|
elo_results_20240629.pkl
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:acdaa9f0e89e01a5d1ee914d750727bf9877cde8bf2e195439bc7625b80f197f
|
3 |
-
size 2679801
|
|
|
|
|
|
|
|
elo_results_20240706.pkl
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:dbd774b3b25712428c96cf54b5cb8c4d912e8b8215edbcee3dee0974fd898c8e
|
3 |
-
size 2702290
|
|
|
|
|
|
|
|
elo_results_20240708.pkl
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:5d72cd3bf16c9af40910a2faf9f1403df2788596390e0f13452835748968679d
|
3 |
-
size 3154928
|
|
|
|
|
|
|
|
elo_results_20240716.pkl
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:dec0c95bbaf8a2ed382b1676eba0b22ad89fa8815d28bc22c94cc6ea00205e5d
|
3 |
-
size 3029902
|
|
|
|
|
|
|
|
elo_results_20240722.pkl
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:c01b5a786ffd8bf64b72d39ca3fc7dee5483852c6a515f515532c8096a6e16d4
|
3 |
-
size 3054518
|
|
|
|
|
|
|
|
elo_results_20240725.pkl
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:1bfde449e424f12c9316f0c64062fc1ef9926e4a924bbaeafa455fcde0decb6f
|
3 |
-
size 3073542
|
|
|
|
|
|
|
|
elo_results_20240730.pkl
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:8db59f5f0852f26600f85c3188547e5d97b4641906c979bcc178cd5be7a7554c
|
3 |
-
size 3137995
|
|
|
|
|
|
|
|
elo_results_20240731.pkl
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:9aaff49a611fef8ff7bec058827d7de3426973c15bc538ac6bb39e764cc14b34
|
3 |
-
size 3157482
|
|
|
|
|
|
|
|
elo_results_20240801.pkl
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:cdaa16f762adc54cbe05ab03dff88587491193852846b749980294b7f1ea2bec
|
3 |
-
size 3182414
|
|
|
|
|
|
|
|
elo_results_20240805.pkl
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:e638bfe870e4409c4d768d57287cc8f6310caf85191d8020ba89f7f59ee9f6d8
|
3 |
-
size 3202114
|
|
|
|
|
|
|
|
elo_results_20240806.pkl
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:a4f31f36636a280589bb1039f9d1f405a989df6a8f74d1af30555d891b23a416
|
3 |
-
size 3261205
|
|
|
|
|
|
|
|
elo_results_20240813.pkl
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:b4c5dfd4247b704f07e61ae27dc0642a3d3cfa9a6872cc3dc03d1888a594de9f
|
3 |
-
size 2943734
|
|
|
|
|
|
|
|
elo_results_20240822.pkl
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:c733894de1252232a63ce3632abe52504fb6bcf43e17bb49fea2b5ad8d76116f
|
3 |
-
size 3004697
|
|
|
|
|
|
|
|
elo_results_20240823.pkl
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:ec103aa6bf5d0f02f8bd2c69c8ccfc8f1be1b44c7dc004d967c8d5ce470975b5
|
3 |
-
size 3039588
|
|
|
|
|
|
|
|
elo_results_20240827.pkl
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:fbcf63bc492b9e2018fdd2c82924f375f12db20dd577f2c139a8ff82a2d08159
|
3 |
-
size 3093445
|
|
|
|
|
|
|
|
elo_results_20240828.pkl
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:b7da13b5f061a7a5a112e5ca45ff707d6cf6259c8a01b40ea5b77bbd5bd3d5b0
|
3 |
-
size 3819732
|
|
|
|
|
|
|
|
elo_results_20240904.pkl
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:cf6117b1e28bb982e965d20b927685dce98750a82c255b868588b5b2318aaee9
|
3 |
-
size 3486555
|
|
|
|
|
|
|
|
elo_results_20240915.pkl
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:42512fe148c81eeab05961ed64fe446dc7f6ed3703f976fc4c8c2a6a3a3e6bef
|
3 |
-
size 3726145
|
|
|
|
|
|
|
|
elo_results_20240917.pkl
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:d15ed36a6e429a9213e9230fbd28de05e7788f758c4282660dd77a4689f98590
|
3 |
-
size 3768775
|
|
|
|
|
|
|
|
elo_results_20240927.pkl
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:7c96f9a95fbbf44b8cb46129cc8ea09eef1bb6e43b8ec12d1c8837091d23ee69
|
3 |
-
size 3860967
|
|
|
|
|
|
|
|
elo_results_20241007.pkl
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:3891408ad17e52226989783562eb75f784dfb1d0a30dd9051e943b48cd0b117c
|
3 |
-
size 3919717
|
|
|
|
|
|
|
|
elo_results_20241015.pkl
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:6daea1d6b24298f32f23c903872aa6c852453de29bf82aa0d2e7aaf5337af715
|
3 |
-
size 4058396
|
|
|
|
|
|
|
|
elo_results_20241023.pkl
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:2e592a6807b4affbc2e203b385ff491743116b4854c9bcd3c3bbf037fba6092f
|
3 |
-
size 4374135
|
|
|
|
|
|
|
|