ksort commited on
Commit
ffc77cb
β€’
1 Parent(s): 8e65357

Update Leaderboard

Browse files
app.py CHANGED
@@ -21,7 +21,8 @@ def build_combine_demo(models, elo_results_file, leaderboard_table_file):
21
  with gr.Tab("Image Generation", id=0):
22
  with gr.Tabs() as tabs_ig:
23
  with gr.Tab("Generation Leaderboard", id=0):
24
- build_leaderboard_tab(elo_results_file['t2i_generation'], leaderboard_table_file['t2i_generation'])
 
25
 
26
  with gr.Tab("Generation Arena (battle)", id=1):
27
  build_side_by_side_ui_anony(models)
 
21
  with gr.Tab("Image Generation", id=0):
22
  with gr.Tabs() as tabs_ig:
23
  with gr.Tab("Generation Leaderboard", id=0):
24
+ # build_leaderboard_tab(elo_results_file['t2i_generation'], leaderboard_table_file['t2i_generation'])
25
+ build_leaderboard_tab()
26
 
27
  with gr.Tab("Generation Arena (battle)", id=1):
28
  build_side_by_side_ui_anony(models)
arena_elo/LICENSE DELETED
@@ -1,21 +0,0 @@
1
- MIT License
2
-
3
- Copyright (c) 2024 WildVision-Bench
4
-
5
- Permission is hereby granted, free of charge, to any person obtaining a copy
6
- of this software and associated documentation files (the "Software"), to deal
7
- in the Software without restriction, including without limitation the rights
8
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
- copies of the Software, and to permit persons to whom the Software is
10
- furnished to do so, subject to the following conditions:
11
-
12
- The above copyright notice and this permission notice shall be included in all
13
- copies or substantial portions of the Software.
14
-
15
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
- SOFTWARE.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
arena_elo/README.md DELETED
@@ -1,46 +0,0 @@
1
- ## Computing the Elo Ratings
2
-
3
-
4
- ```bash
5
- apt-get -y install pkg-config
6
- pip install -r requirements.txt
7
- ```
8
-
9
-
10
- ### to update the leaderboard
11
-
12
- ```bash
13
- export LOGDIR="/path/to/your/logdir"
14
- bash update_elo_rating.sh
15
- ```
16
-
17
- ### to inspect the leaderboard status
18
- ```bash
19
- python -m elo_rating.inspect_elo_rating_pkl
20
- ```
21
-
22
- ### to inspect the collected data status and cost
23
- ```bash
24
- export LOGDIR="/path/to/your/logdir"
25
- python -m elo_rating.inspect_cost
26
- ```
27
-
28
- ### to upload the battle data to hugging faceπŸ€—
29
- ```bash
30
- export HUGGINGFACE_TOKEN="your_huggingface_token"
31
- bash get_latest_data.sh
32
- python -m elo_rating.upload_battle_data --repo_id "WildVision/wildvision-bench" --log_dir "./vision-arena-logs/"
33
- ```
34
-
35
- ### to upload the chat data to hugging faceπŸ€—
36
- ```bash
37
- export HUGGINGFACE_TOKEN="your_huggingface_token"
38
- bash get_latest_data.sh
39
- python -m elo_rating.upload_chat_data --repo_id "WildVision/wildvision-bench" --log_dir "./vision-arena-logs/"
40
- ```
41
-
42
-
43
- ### to get the collected data
44
- ```bash
45
- python -m
46
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
arena_elo/edition_model_info.json DELETED
@@ -1,47 +0,0 @@
1
- {
2
- "CycleDiffusion": {
3
- "Link": "https://github.com/ChenWu98/cycle-diffusion",
4
- "License": "X11",
5
- "Organization": "Carnegie Mellon University"
6
- },
7
- "PNP": {
8
- "Link": "https://github.com/MichalGeyer/plug-and-play",
9
- "License": "-",
10
- "Organization": "Weizmann Institute of Science"
11
- },
12
- "InstructPix2Pix": {
13
- "Link": "https://www.timothybrooks.com/instruct-pix2pix",
14
- "License": "Copyright 2023 Timothy Brooks, Aleksander Holynski, Alexei A. Efros",
15
- "Organization": "University of California, Berkeley"
16
- },
17
- "Pix2PixZero": {
18
- "Link": "https://pix2pixzero.github.io",
19
- "License": "MIT License",
20
- "Organization": "Carnegie Mellon University, Adobe Research"
21
- },
22
- "MagicBrush": {
23
- "Link": "https://osu-nlp-group.github.io/MagicBrush",
24
- "License": "CC-BY-4.0",
25
- "Organization": "The Ohio State University, University of Waterloo"
26
- },
27
- "Prompt2prompt": {
28
- "Link": "https://prompt-to-prompt.github.io",
29
- "License": "Apache-2.0",
30
- "Organization": "Google, Tel Aviv University"
31
- },
32
- "SDEdit": {
33
- "Link": "https://sde-image-editing.github.io",
34
- "License": "MIT License",
35
- "Organization": "Stanford University"
36
- },
37
- "CosXLEdit": {
38
- "Link": "https://huggingface.co/spaces/multimodalart/cosxl",
39
- "License": "cosxl-nc-community",
40
- "Organization": "Stability AI"
41
- },
42
- "InfEdit": {
43
- "Link": "https://huggingface.co/spaces/sled-umich/InfEdit",
44
- "License": "Apache-2.0",
45
- "Organization": "University of Michigan, University of California, Berkeley"
46
- }
47
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
arena_elo/elo_rating/__init__.py DELETED
File without changes
arena_elo/elo_rating/basic_stats.py DELETED
@@ -1,227 +0,0 @@
1
- import argparse
2
- import code
3
- import datetime
4
- import json
5
- import os
6
- from pytz import timezone
7
- import time
8
-
9
- import pandas as pd # pandas>=2.0.3
10
- import plotly.express as px
11
- import plotly.graph_objects as go
12
- from tqdm import tqdm
13
-
14
- NUM_SERVERS = 1
15
- LOG_ROOT_DIR = os.getenv("LOGDIR", None)
16
- if LOG_ROOT_DIR is None:
17
- raise ValueError("LOGDIR environment variable not set, please set it by `export LOGDIR=...`")
18
-
19
- def get_log_files(max_num_files=None):
20
- log_root = os.path.expanduser(LOG_ROOT_DIR)
21
- filenames = []
22
- if NUM_SERVERS == 1:
23
- for filename in os.listdir(log_root):
24
- if filename.endswith("-conv.json"):
25
- filepath = f"{log_root}/{filename}"
26
- name_tstamp_tuple = (filepath, os.path.getmtime(filepath))
27
- filenames.append(name_tstamp_tuple)
28
- else:
29
- for i in range(NUM_SERVERS):
30
- for filename in os.listdir(f"{log_root}/server{i}"):
31
- if filename.endswith("-conv.json"):
32
- filepath = f"{log_root}/server{i}/{filename}"
33
- name_tstamp_tuple = (filepath, os.path.getmtime(filepath))
34
- filenames.append(name_tstamp_tuple)
35
- # sort by tstamp
36
- filenames = sorted(filenames, key=lambda x: x[1])
37
- filenames = [x[0] for x in filenames]
38
-
39
- max_num_files = max_num_files or len(filenames)
40
- filenames = filenames[-max_num_files:]
41
- return filenames
42
-
43
-
44
- def load_log_files(filename):
45
- data = []
46
- for retry in range(5):
47
- try:
48
- lines = open(filename).readlines()
49
- break
50
- except FileNotFoundError:
51
- time.sleep(2)
52
-
53
- for l in lines:
54
- row = json.loads(l)
55
- data.append(
56
- dict(
57
- type=row["type"],
58
- tstamp=row["tstamp"],
59
- model=row.get("model", ""),
60
- models=row.get("models", ["", ""]),
61
- )
62
- )
63
- return data
64
-
65
-
66
- def load_log_files_parallel(log_files, num_threads=16):
67
- data_all = []
68
- from multiprocessing import Pool
69
-
70
- with Pool(num_threads) as p:
71
- ret_all = list(tqdm(p.imap(load_log_files, log_files), total=len(log_files)))
72
- for ret in ret_all:
73
- data_all.extend(ret)
74
- return data_all
75
-
76
-
77
- def get_anony_vote_df(df):
78
- anony_vote_df = df[
79
- df["type"].isin(["leftvote", "rightvote", "tievote", "bothbad_vote"])
80
- ]
81
- anony_vote_df = anony_vote_df[anony_vote_df["models"].apply(lambda x: x[0] == "")]
82
- return anony_vote_df
83
-
84
-
85
- def merge_counts(series, on, names):
86
- ret = pd.merge(series[0], series[1], on=on)
87
- for i in range(2, len(series)):
88
- ret = pd.merge(ret, series[i], on=on)
89
- ret = ret.reset_index()
90
- old_names = list(ret.columns)[-len(series) :]
91
- rename = {old_name: new_name for old_name, new_name in zip(old_names, names)}
92
- ret = ret.rename(columns=rename)
93
- return ret
94
-
95
-
96
- def report_basic_stats(log_files):
97
- df_all = load_log_files_parallel(log_files)
98
- df_all = pd.DataFrame(df_all)
99
- now_t = df_all["tstamp"].max()
100
- df_1_hour = df_all[df_all["tstamp"] > (now_t - 3600)]
101
- df_1_day = df_all[df_all["tstamp"] > (now_t - 3600 * 24)]
102
- anony_vote_df_all = get_anony_vote_df(df_all)
103
-
104
- # Chat trends
105
- chat_dates = [
106
- datetime.datetime.fromtimestamp(x, tz=timezone("US/Pacific")).strftime(
107
- "%Y-%m-%d"
108
- )
109
- for x in df_all[df_all["type"] == "chat"]["tstamp"]
110
- ]
111
- chat_dates_counts = pd.value_counts(chat_dates)
112
- vote_dates = [
113
- datetime.datetime.fromtimestamp(x, tz=timezone("US/Pacific")).strftime(
114
- "%Y-%m-%d"
115
- )
116
- for x in anony_vote_df_all["tstamp"]
117
- ]
118
- vote_dates_counts = pd.value_counts(vote_dates)
119
- chat_dates_bar = go.Figure(
120
- data=[
121
- go.Bar(
122
- name="Anony. Vote",
123
- x=vote_dates_counts.index,
124
- y=vote_dates_counts,
125
- text=[f"{val:.0f}" for val in vote_dates_counts],
126
- textposition="auto",
127
- ),
128
- go.Bar(
129
- name="Chat",
130
- x=chat_dates_counts.index,
131
- y=chat_dates_counts,
132
- text=[f"{val:.0f}" for val in chat_dates_counts],
133
- textposition="auto",
134
- ),
135
- ]
136
- )
137
- chat_dates_bar.update_layout(
138
- barmode="stack",
139
- xaxis_title="Dates",
140
- yaxis_title="Count",
141
- height=300,
142
- width=1200,
143
- )
144
-
145
- # Model call counts
146
- model_hist_all = df_all[df_all["type"] == "chat"]["model"].value_counts()
147
- model_hist_1_day = df_1_day[df_1_day["type"] == "chat"]["model"].value_counts()
148
- model_hist_1_hour = df_1_hour[df_1_hour["type"] == "chat"]["model"].value_counts()
149
- model_hist = merge_counts(
150
- [model_hist_all, model_hist_1_day, model_hist_1_hour],
151
- on="model",
152
- names=["All", "Last Day", "Last Hour"],
153
- )
154
- model_hist_md = model_hist.to_markdown(index=False, tablefmt="github")
155
-
156
- # Action counts
157
- action_hist_all = df_all["type"].value_counts()
158
- action_hist_1_day = df_1_day["type"].value_counts()
159
- action_hist_1_hour = df_1_hour["type"].value_counts()
160
- action_hist = merge_counts(
161
- [action_hist_all, action_hist_1_day, action_hist_1_hour],
162
- on="type",
163
- names=["All", "Last Day", "Last Hour"],
164
- )
165
- action_hist_md = action_hist.to_markdown(index=False, tablefmt="github")
166
-
167
- # Anony vote counts
168
- anony_vote_hist_all = anony_vote_df_all["type"].value_counts()
169
- anony_vote_df_1_day = get_anony_vote_df(df_1_day)
170
- anony_vote_hist_1_day = anony_vote_df_1_day["type"].value_counts()
171
- # anony_vote_df_1_hour = get_anony_vote_df(df_1_hour)
172
- # anony_vote_hist_1_hour = anony_vote_df_1_hour["type"].value_counts()
173
- anony_vote_hist = merge_counts(
174
- [anony_vote_hist_all, anony_vote_hist_1_day],
175
- on="type",
176
- names=["All", "Last Day"],
177
- )
178
- anony_vote_hist_md = anony_vote_hist.to_markdown(index=False, tablefmt="github")
179
-
180
- # Last 24 hours
181
- chat_1_day = df_1_day[df_1_day["type"] == "chat"]
182
- num_chats_last_24_hours = []
183
- base = df_1_day["tstamp"].min()
184
- for i in range(24, 0, -1):
185
- left = base + (i - 1) * 3600
186
- right = base + i * 3600
187
- num = ((chat_1_day["tstamp"] >= left) & (chat_1_day["tstamp"] < right)).sum()
188
- num_chats_last_24_hours.append(num)
189
- times = [
190
- datetime.datetime.fromtimestamp(
191
- base + i * 3600, tz=timezone("US/Pacific")
192
- ).strftime("%Y-%m-%d %H:%M:%S %Z")
193
- for i in range(24, 0, -1)
194
- ]
195
- last_24_hours_df = pd.DataFrame({"time": times, "value": num_chats_last_24_hours})
196
- last_24_hours_md = last_24_hours_df.to_markdown(index=False, tablefmt="github")
197
-
198
- # Last update datetime
199
- last_updated_tstamp = now_t
200
- last_updated_datetime = datetime.datetime.fromtimestamp(
201
- last_updated_tstamp, tz=timezone("US/Pacific")
202
- ).strftime("%Y-%m-%d %H:%M:%S %Z")
203
-
204
- # code.interact(local=locals())
205
-
206
- return {
207
- "chat_dates_bar": chat_dates_bar,
208
- "model_hist_md": model_hist_md,
209
- "action_hist_md": action_hist_md,
210
- "anony_vote_hist_md": anony_vote_hist_md,
211
- "num_chats_last_24_hours": last_24_hours_md,
212
- "last_updated_datetime": last_updated_datetime,
213
- }
214
-
215
-
216
- if __name__ == "__main__":
217
- parser = argparse.ArgumentParser()
218
- parser.add_argument("--max-num-files", type=int)
219
- args = parser.parse_args()
220
-
221
- log_files = get_log_files(args.max_num_files)
222
- basic_stats = report_basic_stats(log_files)
223
-
224
- print(basic_stats["action_hist_md"] + "\n")
225
- print(basic_stats["model_hist_md"] + "\n")
226
- print(basic_stats["anony_vote_hist_md"] + "\n")
227
- print(basic_stats["num_chats_last_24_hours"] + "\n")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
arena_elo/elo_rating/clean_battle_data.py DELETED
@@ -1,432 +0,0 @@
1
- """
2
- Clean chatbot arena battle log.
3
-
4
- Usage:
5
- python3 clean_battle_data.py --mode conv_release
6
- """
7
- import argparse
8
- import datetime
9
- import json
10
- import os
11
- import sys
12
- from pytz import timezone
13
- import time
14
- import PIL
15
- from PIL import ImageFile
16
- ImageFile.LOAD_TRUNCATED_IMAGES = True
17
-
18
- from tqdm import tqdm
19
-
20
- from .basic_stats import get_log_files, NUM_SERVERS, LOG_ROOT_DIR
21
- from .utils import detect_language, get_time_stamp_from_date
22
-
23
- VOTES = ["tievote", "leftvote", "rightvote", "bothbad_vote"]
24
- IDENTITY_WORDS = [
25
- "vicuna",
26
- "lmsys",
27
- "koala",
28
- "uc berkeley",
29
- "open assistant",
30
- "laion",
31
- "chatglm",
32
- "chatgpt",
33
- "gpt-4",
34
- "openai",
35
- "anthropic",
36
- "claude",
37
- "bard",
38
- "palm",
39
- "lamda",
40
- "google",
41
- "llama",
42
- "qianwan",
43
- "alibaba",
44
- "mistral",
45
- "zhipu",
46
- "KEG lab",
47
- "01.AI",
48
- "AI2",
49
- "TΓΌlu",
50
- "Tulu",
51
- "NETWORK ERROR DUE TO HIGH TRAFFIC. PLEASE REGENERATE OR REFRESH THIS PAGE.",
52
- "$MODERATION$ YOUR INPUT VIOLATES OUR CONTENT MODERATION GUIDELINES.",
53
- "API REQUEST ERROR. Please increase the number of max tokens.",
54
- "**API REQUEST ERROR** Reason: The response was blocked.",
55
- "**API REQUEST ERROR**",
56
- ]
57
-
58
- for i in range(len(IDENTITY_WORDS)):
59
- IDENTITY_WORDS[i] = IDENTITY_WORDS[i].lower()
60
-
61
-
62
- def remove_html(raw):
63
- if raw.startswith("<h3>"):
64
- return raw[raw.find(": ") + 2 : -len("</h3>\n")]
65
- if raw.startswith("### Model A: ") or raw.startswith("### Model B: "):
66
- return raw[13:]
67
- return raw
68
-
69
-
70
- def to_openai_format(messages):
71
- roles = ["user", "assistant"]
72
- ret = []
73
- for i, x in enumerate(messages):
74
- ret.append({"role": roles[i % 2], "content": x[1]})
75
- return ret
76
-
77
-
78
- def replace_model_name(old_name, tstamp):
79
- replace_dict = {
80
- "bard": "palm-2",
81
- "claude-v1": "claude-1",
82
- "claude-instant-v1": "claude-instant-1",
83
- "oasst-sft-1-pythia-12b": "oasst-pythia-12b",
84
- "claude-2": "claude-2.0",
85
- "PlayGroundV2": "PlayGround V2",
86
- "PlayGroundV2.5": "PlayGround V2.5",
87
- }
88
- if old_name in ["gpt-4", "gpt-3.5-turbo"]:
89
- if tstamp > 1687849200:
90
- return old_name + "-0613"
91
- else:
92
- return old_name + "-0314"
93
- if old_name in replace_dict:
94
- return replace_dict[old_name]
95
- return old_name
96
-
97
-
98
- def read_file(filename):
99
- data = []
100
- for retry in range(5):
101
- try:
102
- # lines = open(filename).readlines()
103
- for l in open(filename):
104
- row = json.loads(l)
105
- if row["type"] in VOTES:
106
- data.append(row)
107
- break
108
- except FileNotFoundError:
109
- time.sleep(2)
110
- except json.JSONDecodeError:
111
- print(f"Error in reading {filename}")
112
- print(row)
113
- exit(0)
114
- return data
115
-
116
-
117
- def read_file_parallel(log_files, num_threads=16):
118
- data_all = []
119
- from multiprocessing import Pool
120
-
121
- with Pool(num_threads) as p:
122
- ret_all = list(tqdm(p.imap(read_file, log_files), total=len(log_files)))
123
- for ret in ret_all:
124
- data_all.extend(ret)
125
- return data_all
126
-
127
- def load_image(image_path):
128
- try:
129
- return PIL.Image.open(image_path)
130
- except:
131
- return None
132
-
133
- def clean_battle_data(
134
- log_files, exclude_model_names, ban_ip_list=None, sanitize_ip=False, mode="simple", task_name="image_editing"
135
- ):
136
- data = read_file_parallel(log_files, num_threads=16)
137
-
138
- convert_type = {
139
- "leftvote": "model_a",
140
- "rightvote": "model_b",
141
- "tievote": "tie",
142
- "bothbad_vote": "tie (bothbad)",
143
- }
144
-
145
- all_models = set()
146
- all_ips = dict()
147
- ct_anony = 0
148
- ct_invalid = 0
149
- ct_leaked_identity = 0
150
- ct_banned = 0
151
- battles = []
152
- for row in tqdm(data, desc="Cleaning"):
153
- if row["models"][0] is None or row["models"][1] is None:
154
- continue
155
-
156
- # Resolve model names
157
- models_public = [remove_html(row["models"][0]), remove_html(row["models"][1])]
158
- if "model_name" in row["states"][0]:
159
- models_hidden = [
160
- row["states"][0]["model_name"],
161
- row["states"][1]["model_name"],
162
- ]
163
- if models_hidden[0] is None:
164
- models_hidden = models_public
165
- else:
166
- models_hidden = models_public
167
-
168
- if (models_public[0] == "" and models_public[1] != "") or (
169
- models_public[1] == "" and models_public[0] != ""
170
- ):
171
- ct_invalid += 1
172
- continue
173
-
174
- if models_public[0] == "" or models_public[0] == "Model A":
175
- anony = True
176
- models = models_hidden
177
- ct_anony += 1
178
- else:
179
- anony = False
180
- models = models_public
181
- if not models_public == models_hidden:
182
- ct_invalid += 1
183
- continue
184
-
185
- # # Detect langauge
186
- # state = row["states"][0]
187
- # if state["offset"] >= len(state["messages"]):
188
- # ct_invalid += 1
189
- # continue
190
- # lang_code = detect_language(state["messages"][state["offset"]][1])
191
-
192
- # # Drop conversations if the model names are leaked
193
- # leaked_identity = False
194
- # messages = ""
195
- # for i in range(2):
196
- # state = row["states"][i]
197
- # for turn_idx, (role, msg) in enumerate(
198
- # state["messages"][state["offset"] :]
199
- # ):
200
- # if msg:
201
- # messages += msg.lower()
202
- # for word in IDENTITY_WORDS:
203
- # if word in messages:
204
- # leaked_identity = True
205
- # break
206
-
207
- # if leaked_identity:
208
- # ct_leaked_identity += 1
209
- # continue
210
-
211
- def preprocess_model_name(m):
212
- if m == "Playground v2":
213
- return 'playground_PlayGroundV2_generation'
214
- if m == "Playground v2.5":
215
- return 'playground_PlayGroundV2.5_generation'
216
- return m
217
- models = [preprocess_model_name(m) for m in models]
218
-
219
- # Replace bard with palm
220
- if task_name == "image_editing":
221
- valid = True
222
- for _model in models:
223
- try:
224
- platform, model_name, task = _model.split("_")
225
- except ValueError:
226
- print(f"Invalid model names: {_model}")
227
- valid = False
228
- break
229
- if not (platform in ["playground", "imagenhub"] and task == "edition"):
230
- valid = False
231
- break
232
- if not valid:
233
- ct_invalid += 1
234
- continue
235
- for i, _model in enumerate(models):
236
- platform, model_name, task = _model.split("_")
237
- models[i] = model_name
238
-
239
- # if not all(x.startswith("imagenhub_") and x.endswith("_edition") for x in models):
240
- # # print(f"Invalid model names: {models}")
241
- # ct_invalid += 1
242
- # continue
243
-
244
- # models = [x[len("imagenhub_"):-len("_edition")] for x in models]
245
- elif task_name == "t2i_generation":
246
- valid = True
247
- for _model in models:
248
- try:
249
- platform, model_name, task = _model.split("_")
250
- except ValueError:
251
- print(f"Invalid model names: {_model}")
252
- valid = False
253
- break
254
- if not (platform.lower() in ["playground", "imagenhub", 'fal'] and (task == "generation" or task == "text2image")):
255
- valid = False
256
- break
257
- if not valid:
258
- ct_invalid += 1
259
- continue
260
- for i, _model in enumerate(models):
261
- platform, model_name, task = _model.split("_")
262
- models[i] = model_name
263
- # if not all("playground" in x.lower() or (x.startswith("imagenhub_") and x.endswith("_generation")) for x in models):
264
- # print(f"Invalid model names: {models}")
265
- # ct_invalid += 1
266
- # continue
267
- # models = [x[len("imagenhub_"):-len("_generation")] for x in models]
268
- # for i, model_name in enumerate(models):
269
- # mode
270
- # if model_name.startswith("imagenhub_"):
271
- # models[i] = model_name[len("imagenhub_"):-len("_generation")]
272
-
273
- else:
274
- raise ValueError(f"Invalid task_name: {task_name}")
275
- models = [replace_model_name(m, row["tstamp"]) for m in models]
276
-
277
- # Exclude certain models
278
- if exclude_model_names and any(x in exclude_model_names for x in models):
279
- ct_invalid += 1
280
- continue
281
-
282
- # if models[0] not in model_infos or models[1] not in model_infos:
283
- # continue
284
-
285
- # # Exclude votes before the starting date
286
- # if model_infos and (model_infos[models[0]]["starting_from"] > row["tstamp"] or model_infos[models[1]]["starting_from"] > row["tstamp"]):
287
- # print(f"Invalid vote before the valid starting date for {models[0]} and {models[1]}")
288
- # ct_invalid += 1
289
- # continue
290
-
291
-
292
-
293
- if mode == "conv_release":
294
- # assert the two images are the same
295
- date = datetime.datetime.fromtimestamp(row["tstamp"], tz=timezone("US/Pacific")).strftime("%Y-%m-%d") # 2024-02-29
296
- image_path_format = f"{LOG_ROOT_DIR}/{date}-convinput_images/input_image_"
297
- image_path_0 = image_path_format + str(row["states"][0]["conv_id"]) + ".png"
298
- image_path_1 = image_path_format + str(row["states"][1]["conv_id"]) + ".png"
299
- if not os.path.exists(image_path_0) or not os.path.exists(image_path_1):
300
- print(f"Image not found for {image_path_0} or {image_path_1}")
301
- ct_invalid += 1
302
- continue
303
-
304
- image_0 = load_image(image_path_0)
305
- image_1 = load_image(image_path_1)
306
- if image_0 is None or image_1 is None:
307
- print(f"Image not found for {image_path_0} or {image_path_1}")
308
- ct_invalid += 1
309
- continue
310
- if image_0.tobytes() != image_1.tobytes():
311
- print(f"Image not the same for {image_path_0} and {image_path_1}")
312
- ct_invalid += 1
313
- continue
314
-
315
-
316
- question_id = row["states"][0]["conv_id"]
317
- # conversation_a = to_openai_format(
318
- # row["states"][0]["messages"][row["states"][0]["offset"] :]
319
- # )
320
- # conversation_b = to_openai_format(
321
- # row["states"][1]["messages"][row["states"][1]["offset"] :]
322
- # )
323
-
324
- ip = row["ip"]
325
- if ip not in all_ips:
326
- all_ips[ip] = {"ip": ip, "count": 0, "sanitized_id": len(all_ips)}
327
- all_ips[ip]["count"] += 1
328
- if sanitize_ip:
329
- user_id = f"arena_user_{all_ips[ip]['sanitized_id']}"
330
- else:
331
- user_id = f"{all_ips[ip]['ip']}"
332
-
333
- if ban_ip_list is not None and ip in ban_ip_list:
334
- ct_banned += 1
335
- continue
336
-
337
- # Save the results
338
- battles.append(
339
- dict(
340
- question_id=question_id,
341
- model_a=models[0],
342
- model_b=models[1],
343
- winner=convert_type[row["type"]],
344
- judge=f"arena_user_{user_id}",
345
- # conversation_a=conversation_a,
346
- # conversation_b=conversation_b,
347
- # turn=len(conversation_a) // 2,
348
- anony=anony,
349
- # language=lang_code,
350
- tstamp=row["tstamp"],
351
- )
352
- )
353
-
354
- all_models.update(models_hidden)
355
- battles.sort(key=lambda x: x["tstamp"])
356
- last_updated_tstamp = battles[-1]["tstamp"]
357
-
358
- last_updated_datetime = datetime.datetime.fromtimestamp(
359
- last_updated_tstamp, tz=timezone("US/Pacific")
360
- ).strftime("%Y-%m-%d %H:%M:%S %Z")
361
-
362
- print(
363
- f"#votes: {len(data)}, #invalid votes: {ct_invalid}, "
364
- f"#leaked_identity: {ct_leaked_identity} "
365
- f"#banned: {ct_banned} "
366
- )
367
- print(f"#battles: {len(battles)}, #anony: {ct_anony}")
368
- print(f"#models: {len(all_models)}, {all_models}")
369
- print(f"last-updated: {last_updated_datetime}")
370
-
371
- if ban_ip_list is not None:
372
- for ban_ip in ban_ip_list:
373
- if ban_ip in all_ips:
374
- del all_ips[ban_ip]
375
- print("Top 30 IPs:")
376
- print(sorted(all_ips.values(), key=lambda x: x["count"], reverse=True)[:30])
377
- return battles
378
-
379
-
380
- if __name__ == "__main__":
381
- parser = argparse.ArgumentParser()
382
- parser.add_argument("--max-num-files", type=int)
383
- parser.add_argument(
384
- "--mode", type=str, choices=["simple", "conv_release"], default="simple"
385
- )
386
- parser.add_argument("--task_name", type=str, default="image_editing", choices=["image_editing", "t2i_generation"])
387
- parser.add_argument("--exclude-model-names", type=str, nargs="+")
388
- parser.add_argument("--ban-ip-file", type=str)
389
- parser.add_argument("--sanitize-ip", action="store_true", default=False)
390
- args = parser.parse_args()
391
-
392
- log_files = get_log_files(args.max_num_files)
393
- ban_ip_list = json.load(open(args.ban_ip_file)) if args.ban_ip_file else None
394
-
395
- battles = clean_battle_data(
396
- log_files, args.exclude_model_names or [], ban_ip_list, args.sanitize_ip, args.mode, args.task_name
397
- )
398
- last_updated_tstamp = battles[-1]["tstamp"]
399
- cutoff_date = datetime.datetime.fromtimestamp(
400
- last_updated_tstamp, tz=timezone("US/Pacific")
401
- ).strftime("%Y%m%d")
402
-
403
- if args.mode == "simple":
404
- for x in battles:
405
- for key in [
406
- "conversation_a",
407
- "conversation_b",
408
- "question_id",
409
- ]:
410
- if key in x:
411
- del x[key]
412
- print("Samples:")
413
- for i in range(min(4, len(battles))):
414
- print(battles[i])
415
- output = f"clean_battle_{args.task_name}_{cutoff_date}.json"
416
- elif args.mode == "conv_release":
417
- # new_battles = []
418
- # for x in battles:
419
- # if not x["anony"]:
420
- # continue
421
- # for key in []:
422
- # del x[key]
423
- # new_battles.append(x)
424
- # battles = new_battles
425
- output = f"clean_battle_{args.task_name}_conv_{cutoff_date}.json"
426
-
427
- with open(output, "w") as fout:
428
- json.dump(battles, fout, indent=2, ensure_ascii=False)
429
- print(f"Write cleaned data to {output}")
430
-
431
- with open("cut_off_date.txt", "w") as fout:
432
- fout.write(cutoff_date)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
arena_elo/elo_rating/elo_analysis.py DELETED
@@ -1,379 +0,0 @@
1
- import argparse
2
- from collections import defaultdict
3
- import datetime
4
- import json
5
- import math
6
- import pickle
7
- from pytz import timezone
8
-
9
- import numpy as np
10
- import pandas as pd
11
- import plotly.express as px
12
- from tqdm import tqdm
13
-
14
- from .model_registry import get_model_info
15
- from .basic_stats import get_log_files
16
- from .clean_battle_data import clean_battle_data
17
-
18
- pd.options.display.float_format = "{:.2f}".format
19
-
20
-
21
- def compute_elo(battles, K=4, SCALE=400, BASE=10, INIT_RATING=1000):
22
- rating = defaultdict(lambda: INIT_RATING)
23
-
24
- for rd, model_a, model_b, winner in battles[
25
- ["model_a", "model_b", "winner"]
26
- ].itertuples():
27
- ra = rating[model_a]
28
- rb = rating[model_b]
29
- ea = 1 / (1 + BASE ** ((rb - ra) / SCALE))
30
- eb = 1 / (1 + BASE ** ((ra - rb) / SCALE))
31
- if winner == "model_a":
32
- sa = 1
33
- elif winner == "model_b":
34
- sa = 0
35
- elif winner == "tie" or winner == "tie (bothbad)":
36
- sa = 0.5
37
- else:
38
- raise Exception(f"unexpected vote {winner}")
39
- rating[model_a] += K * (sa - ea)
40
- rating[model_b] += K * (1 - sa - eb)
41
-
42
- return dict(rating)
43
-
44
-
45
- def get_bootstrap_result(battles, func_compute_elo, num_round=1000):
46
- rows = []
47
- for i in tqdm(range(num_round), desc="bootstrap"):
48
- tmp_battles = battles.sample(frac=1.0, replace=True)
49
- rows.append(func_compute_elo(tmp_battles))
50
- df = pd.DataFrame(rows)
51
- return df[df.median().sort_values(ascending=False).index]
52
-
53
-
54
- def compute_elo_mle_with_tie(df, SCALE=400, BASE=10, INIT_RATING=1000):
55
- from sklearn.linear_model import LogisticRegression
56
-
57
- models = pd.concat([df["model_a"], df["model_b"]]).unique()
58
- models = pd.Series(np.arange(len(models)), index=models)
59
-
60
- # duplicate battles
61
- df = pd.concat([df, df], ignore_index=True)
62
- p = len(models.index)
63
- n = df.shape[0]
64
-
65
- X = np.zeros([n, p])
66
- X[np.arange(n), models[df["model_a"]]] = +math.log(BASE)
67
- X[np.arange(n), models[df["model_b"]]] = -math.log(BASE)
68
-
69
- # one A win => two A win
70
- Y = np.zeros(n)
71
- Y[df["winner"] == "model_a"] = 1.0
72
-
73
- # one tie => one A win + one B win
74
- # find tie + tie (both bad) index
75
- tie_idx = (df["winner"] == "tie") | (df["winner"] == "tie (bothbad)")
76
- tie_idx[len(tie_idx) // 2 :] = False
77
- Y[tie_idx] = 1.0
78
-
79
- lr = LogisticRegression(fit_intercept=False)
80
- lr.fit(X, Y)
81
-
82
- elo_scores = SCALE * lr.coef_[0] + INIT_RATING
83
- # calibrate llama-13b to 800 if applicable
84
- if "llama-13b" in models.index:
85
- elo_scores += 800 - elo_scores[models["llama-13b"]]
86
- return pd.Series(elo_scores, index=models.index).sort_values(ascending=False)
87
-
88
-
89
- def get_median_elo_from_bootstrap(bootstrap_df):
90
- median = dict(bootstrap_df.quantile(0.5))
91
- median = {k: int(v + 0.5) for k, v in median.items()}
92
- return median
93
-
94
-
95
- def compute_pairwise_win_fraction(battles, model_order, limit_show_number=None):
96
- # Times each model wins as Model A
97
- a_win_ptbl = pd.pivot_table(
98
- battles[battles["winner"] == "model_a"],
99
- index="model_a",
100
- columns="model_b",
101
- aggfunc="size",
102
- fill_value=0,
103
- )
104
-
105
- # Table counting times each model wins as Model B
106
- b_win_ptbl = pd.pivot_table(
107
- battles[battles["winner"] == "model_b"],
108
- index="model_a",
109
- columns="model_b",
110
- aggfunc="size",
111
- fill_value=0,
112
- )
113
-
114
- # Table counting number of A-B pairs
115
- num_battles_ptbl = pd.pivot_table(
116
- battles, index="model_a", columns="model_b", aggfunc="size", fill_value=0
117
- )
118
-
119
- # Computing the proportion of wins for each model as A and as B
120
- # against all other models
121
- row_beats_col_freq = (a_win_ptbl + b_win_ptbl.T) / (
122
- num_battles_ptbl + num_battles_ptbl.T
123
- )
124
-
125
- if model_order is None:
126
- prop_wins = row_beats_col_freq.mean(axis=1).sort_values(ascending=False)
127
- model_order = list(prop_wins.keys())
128
-
129
- if limit_show_number is not None:
130
- model_order = model_order[:limit_show_number]
131
-
132
- # Arrange ordering according to proprition of wins
133
- row_beats_col = row_beats_col_freq.loc[model_order, model_order]
134
- return row_beats_col
135
-
136
-
137
- def visualize_leaderboard_table(rating):
138
- models = list(rating.keys())
139
- models.sort(key=lambda k: -rating[k])
140
-
141
- emoji_dict = {
142
- 1: "πŸ₯‡",
143
- 2: "πŸ₯ˆ",
144
- 3: "πŸ₯‰",
145
- }
146
-
147
- md = ""
148
- md += "| Rank | Model | Elo Rating | Description |\n"
149
- md += "| --- | --- | --- | --- |\n"
150
- for i, model in enumerate(models):
151
- rank = i + 1
152
- minfo = get_model_info(model)
153
- emoji = emoji_dict.get(rank, "")
154
- md += f"| {rank} | {emoji} [{model}]({minfo.link}) | {rating[model]:.0f} | {minfo.description} |\n"
155
-
156
- return md
157
-
158
-
159
- def visualize_pairwise_win_fraction(battles, model_order):
160
- row_beats_col = compute_pairwise_win_fraction(battles, model_order)
161
- fig = px.imshow(
162
- row_beats_col,
163
- color_continuous_scale="RdBu",
164
- text_auto=".2f",
165
- height=700,
166
- width=700,
167
- )
168
- fig.update_layout(
169
- xaxis_title="Model B",
170
- yaxis_title="Model A",
171
- xaxis_side="top",
172
- title_y=0.07,
173
- title_x=0.5,
174
- )
175
- fig.update_traces(
176
- hovertemplate="Model A: %{y}<br>Model B: %{x}<br>Fraction of A Wins: %{z}<extra></extra>"
177
- )
178
-
179
- return fig
180
-
181
-
182
- def visualize_battle_count(battles, model_order):
183
- ptbl = pd.pivot_table(
184
- battles, index="model_a", columns="model_b", aggfunc="size", fill_value=0
185
- )
186
- battle_counts = ptbl + ptbl.T
187
- fig = px.imshow(
188
- battle_counts.loc[model_order, model_order],
189
- text_auto=True,
190
- height=700,
191
- width=700,
192
- )
193
- fig.update_layout(
194
- xaxis_title="Model B",
195
- yaxis_title="Model A",
196
- xaxis_side="top",
197
- title_y=0.07,
198
- title_x=0.5,
199
- )
200
- fig.update_traces(
201
- hovertemplate="Model A: %{y}<br>Model B: %{x}<br>Count: %{z}<extra></extra>"
202
- )
203
- return fig
204
-
205
-
206
- def visualize_average_win_rate(battles, limit_show_number):
207
- row_beats_col_freq = compute_pairwise_win_fraction(
208
- battles, None, limit_show_number=limit_show_number
209
- )
210
- fig = px.bar(
211
- row_beats_col_freq.mean(axis=1).sort_values(ascending=False),
212
- text_auto=".2f",
213
- height=500,
214
- width=700,
215
- )
216
- fig.update_layout(
217
- yaxis_title="Average Win Rate", xaxis_title="Model", showlegend=False
218
- )
219
- return fig
220
-
221
-
222
- def visualize_bootstrap_elo_rating(df, df_final, limit_show_number):
223
- bars = (
224
- pd.DataFrame(
225
- dict(
226
- lower=df.quantile(0.025),
227
- rating=df_final,
228
- upper=df.quantile(0.975),
229
- )
230
- )
231
- .reset_index(names="model")
232
- .sort_values("rating", ascending=False)
233
- )
234
- bars = bars[:limit_show_number]
235
- bars["error_y"] = bars["upper"] - bars["rating"]
236
- bars["error_y_minus"] = bars["rating"] - bars["lower"]
237
- bars["rating_rounded"] = np.round(bars["rating"], 2)
238
- fig = px.scatter(
239
- bars,
240
- x="model",
241
- y="rating",
242
- error_y="error_y",
243
- error_y_minus="error_y_minus",
244
- text="rating_rounded",
245
- height=500,
246
- width=700,
247
- )
248
- fig.update_layout(xaxis_title="Model", yaxis_title="Rating")
249
- return fig
250
-
251
-
252
- def report_elo_analysis_results(battles_json, rating_system="bt", num_bootstrap=100, anony_only=True):
253
- battles = pd.DataFrame(battles_json)
254
- battles = battles.sort_values(ascending=True, by=["tstamp"])
255
- # Only use anonymous votes
256
- if anony_only:
257
- battles = battles[battles["anony"]].reset_index(drop=True)
258
- battles_no_ties = battles[~battles["winner"].str.contains("tie")]
259
-
260
- # Online update
261
- elo_rating_online = compute_elo(battles)
262
-
263
- if rating_system == "bt":
264
- bootstrap_df = get_bootstrap_result(
265
- battles, compute_elo_mle_with_tie, num_round=num_bootstrap
266
- )
267
- elo_rating_final = compute_elo_mle_with_tie(battles)
268
- elif rating_system == "elo":
269
- bootstrap_df = get_bootstrap_result(
270
- battles, compute_elo, num_round=num_bootstrap
271
- )
272
- elo_rating_median = get_median_elo_from_bootstrap(bootstrap_df)
273
- elo_rating_final = elo_rating_median
274
-
275
- model_order = list(elo_rating_final.keys())
276
- model_order.sort(key=lambda k: -elo_rating_final[k])
277
-
278
- limit_show_number = 25 # limit show number to make plots smaller
279
- model_order = model_order[:limit_show_number]
280
-
281
- # leaderboard_table_df: elo rating, variance, 95% interval, number of battles
282
- leaderboard_table_df = pd.DataFrame(
283
- {
284
- "rating": elo_rating_final,
285
- "variance": bootstrap_df.var(),
286
- "rating_q975": bootstrap_df.quantile(0.975),
287
- "rating_q025": bootstrap_df.quantile(0.025),
288
- "num_battles": battles["model_a"].value_counts()
289
- + battles["model_b"].value_counts(),
290
- }
291
- )
292
-
293
- # Plots
294
- leaderboard_table = visualize_leaderboard_table(elo_rating_final)
295
- win_fraction_heatmap = visualize_pairwise_win_fraction(battles_no_ties, model_order)
296
- battle_count_heatmap = visualize_battle_count(battles_no_ties, model_order)
297
- average_win_rate_bar = visualize_average_win_rate(
298
- battles_no_ties, limit_show_number
299
- )
300
- bootstrap_elo_rating = visualize_bootstrap_elo_rating(
301
- bootstrap_df, elo_rating_final, limit_show_number
302
- )
303
-
304
- last_updated_tstamp = battles["tstamp"].max()
305
- last_updated_datetime = datetime.datetime.fromtimestamp(
306
- last_updated_tstamp, tz=timezone("US/Pacific")
307
- ).strftime("%Y-%m-%d %H:%M:%S %Z")
308
-
309
- return {
310
- "rating_system": rating_system,
311
- "elo_rating_online": elo_rating_online,
312
- "elo_rating_final": elo_rating_final,
313
- "leaderboard_table": leaderboard_table,
314
- "win_fraction_heatmap": win_fraction_heatmap,
315
- "battle_count_heatmap": battle_count_heatmap,
316
- "average_win_rate_bar": average_win_rate_bar,
317
- "bootstrap_elo_rating": bootstrap_elo_rating,
318
- "last_updated_datetime": last_updated_datetime,
319
- "last_updated_tstamp": last_updated_tstamp,
320
- "bootstrap_df": bootstrap_df,
321
- "leaderboard_table_df": leaderboard_table_df,
322
- }
323
-
324
-
325
- def pretty_print_elo_rating(rating):
326
- model_order = list(rating.keys())
327
- model_order.sort(key=lambda k: -rating[k])
328
- for i, model in enumerate(model_order):
329
- print(f"{i+1:2d}, {model:25s}, {rating[model]:.0f}")
330
-
331
-
332
- if __name__ == "__main__":
333
- parser = argparse.ArgumentParser()
334
- parser.add_argument("--clean-battle-file", type=str)
335
- parser.add_argument("--max-num-files", type=int)
336
- parser.add_argument("--num-bootstrap", type=int, default=100)
337
- parser.add_argument(
338
- "--rating-system", type=str, choices=["bt", "elo"], default="bt"
339
- )
340
- parser.add_argument("--exclude-tie", action="store_true", default=False)
341
- args = parser.parse_args()
342
-
343
- np.random.seed(42)
344
-
345
- if args.clean_battle_file:
346
- # Read data from a cleaned battle files
347
- battles = pd.read_json(args.clean_battle_file)
348
- else:
349
- # Read data from all log files
350
- log_files = get_log_files(args.max_num_files)
351
- battles = clean_battle_data(log_files)
352
-
353
- anony_results = report_elo_analysis_results(
354
- battles, rating_system=args.rating_system, num_bootstrap=args.num_bootstrap, anony_only=True
355
- )
356
- full_results = report_elo_analysis_results(
357
- battles, rating_system=args.rating_system, num_bootstrap=args.num_bootstrap, anony_only=False
358
- )
359
-
360
-
361
- print("# Online Elo")
362
- pretty_print_elo_rating(anony_results["elo_rating_online"])
363
- print("# Median")
364
- pretty_print_elo_rating(anony_results["elo_rating_final"])
365
- print(f"Annoy last update : {anony_results['last_updated_datetime']}")
366
- print(f"Full last update : {full_results['last_updated_datetime']}")
367
-
368
- last_updated_tstamp = full_results["last_updated_tstamp"]
369
- cutoff_date = datetime.datetime.fromtimestamp(
370
- last_updated_tstamp, tz=timezone("US/Pacific")
371
- ).strftime("%Y%m%d")
372
-
373
-
374
- results = {
375
- "anony": anony_results,
376
- "full": full_results,
377
- }
378
- with open(f"elo_results_{cutoff_date}.pkl", "wb") as fout:
379
- pickle.dump(results, fout)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
arena_elo/elo_rating/generate_leaderboard.py DELETED
@@ -1,72 +0,0 @@
1
- import fire
2
- import json
3
- import pandas as pd
4
- import pickle
5
-
6
-
7
- def main(
8
- model_info_file: str,
9
- elo_rating_pkl: str,
10
- output_csv: str
11
- ):
12
- model_info = json.load(open(model_info_file))
13
-
14
- with open(elo_rating_pkl, "rb") as fin:
15
- elo_rating_results = pickle.load(fin)
16
-
17
- anony_elo_rating_results = elo_rating_results["anony"]
18
- full_elo_rating_results = elo_rating_results["full"]
19
- anony_leaderboard_data = anony_elo_rating_results["leaderboard_table_df"]
20
- full_leaderboard_data = full_elo_rating_results["leaderboard_table_df"]
21
-
22
- # Model,MT-bench (score),Arena Elo rating,MMLU,License,Link
23
- fields = ["key", "Model", "Arena Elo rating (anony)", "Arena Elo rating (full)", "License", "Organization", "Link"]
24
- # set Organization and license to empty for now
25
- all_models = anony_leaderboard_data.index.tolist()
26
-
27
- for model in all_models:
28
- if not model in model_info:
29
- model_info[model] = {}
30
- model_info[model]["License"] = "N/A"
31
- model_info[model]["Organization"] = "N/A"
32
- model_info[model]["Link"] = "N/A"
33
- print(f"Model {model} not found in model_info.json")
34
- model_info[model]["Model"] = model
35
- model_info[model]["key"] = model
36
-
37
- if model in anony_leaderboard_data.index:
38
- model_info[model]["Arena Elo rating (anony)"] = anony_leaderboard_data.loc[model, "rating"]
39
- else:
40
- model_info[model]["Arena Elo rating (anony)"] = 0
41
-
42
- if model in full_elo_rating_results["leaderboard_table_df"].index:
43
- model_info[model]["Arena Elo rating (full)"] = full_leaderboard_data.loc[model, "rating"]
44
- else:
45
- model_info[model]["Arena Elo rating (full)"] = 0
46
- # if model in anony_leaderboard_data.index:
47
- # model_info[model]["Arena Elo rating"] = anony_leaderboard_data.loc[model, "rating"]
48
- # else:
49
- # model_info[model]["Arena Elo rating"] = 0
50
-
51
- final_model_info = {}
52
- for model in model_info:
53
- if "Model" in model_info[model]:
54
- final_model_info[model] = model_info[model]
55
- model_info = final_model_info
56
-
57
- exclude_keys = ['starting_from']
58
- for key in exclude_keys:
59
- for model in model_info:
60
- if key in model_info[model]:
61
- del model_info[model][key]
62
- df = pd.DataFrame(model_info).T
63
- df = df[fields]
64
- # sort by anony rating
65
- df = df.sort_values(by=["Arena Elo rating (anony)"], ascending=False)
66
- df.to_csv(output_csv, index=False)
67
- print("Leaderboard data saved to", output_csv)
68
- print(df)
69
-
70
-
71
- if __name__ == "__main__":
72
- fire.Fire(main)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
arena_elo/elo_rating/inspect_conv_rating.py DELETED
@@ -1,234 +0,0 @@
1
- import argparse
2
- import code
3
- import datetime
4
- import json
5
- import os
6
- from pytz import timezone
7
- import time
8
-
9
- import pandas as pd
10
- from tqdm import tqdm
11
- import csv
12
-
13
- import base64
14
- from icecream import ic
15
- from openai import OpenAI
16
-
17
- # Function to encode the image
18
- def encode_image(image_path):
19
- with open(image_path, "rb") as image_file:
20
- return base64.b64encode(image_file.read()).decode('utf-8')
21
-
22
- def get_log_files(max_num_files=None):
23
- dates = []
24
- for month in [2, 3]:
25
- for day in range(1, 32):
26
- dates.append(f"2024-{month:02d}-{day:02d}")
27
-
28
- num_servers = 1
29
- filenames = []
30
- for d in dates:
31
- for i in range(num_servers):
32
- # name = os.path.expanduser(f"~/fastchat_logs/server{i}/{d}-conv.json")
33
- name = os.path.expanduser(f"vision-arena-logs/{d}-conv.json")
34
- if os.path.exists(name):
35
- filenames.append(name)
36
- max_num_files = max_num_files or len(filenames)
37
- filenames = filenames[-max_num_files:]
38
- return filenames
39
-
40
-
41
- def pretty_print_conversation(messages):
42
- for role, msg in messages:
43
- print(f"[[{role}]]: {msg}")
44
-
45
-
46
- def get_gpt4v_response(client, img_bs64=None, text_prompt="", use_vision=False):
47
- if use_vision:
48
- response = client.chat.completions.create(
49
- model="gpt-4-vision-preview",
50
- messages=[
51
- {
52
- "role": "user",
53
- "content": [
54
- {"type": "text", "text": text_prompt},
55
- {
56
- "type": "image_url",
57
- "image_url": {
58
- "url": f"data:image/jpeg;base64,{img_bs64}"
59
- }
60
- },
61
- ],
62
- }
63
- ],
64
- max_tokens=100,
65
- )
66
- else:
67
- response = client.chat.completions.create(
68
- model="gpt-4-vision-preview",
69
- messages=[
70
- {
71
- "role": "user",
72
- "content": [
73
- {"type": "text", "text": text_prompt},
74
- ],
75
- }
76
- ],
77
- max_tokens=100,
78
- )
79
- return response.choices[0].message.content
80
-
81
- task_template_map = {
82
- "image_caption": "Give me the semantic alignment score between the given image and the given caption: \"{generated_sentence}\" on a scale of 0-100. Only reply the score value.",
83
- "vqa": "Rate the answer correctness regarding the question within the context of the given image on a scale of 0-100. Only reply the score value.",
84
- "pair_rate_old": "[Instruction]\n\"{instruction}\"\n\n\"{generated_sentence}\"\n\n[System]\nGiven the instruction and the image, please compare the correctness of responses A and B. Reply with \"leftvote\" if you find A better, \"rightvote\" if B is better, \"bothbad_vote\" if both responses are wrong, and \"tievote\" if both responses are equally satisfactory. If you are unable to make a decision, please reply with \"NA\".",
85
- "pair_rate_wexplanation": "[Instruction]\n\"{instruction}\"\n\n\"{generated_sentence}\"[System]\nPlease act as an impartial judge and evaluate the quality of the responses provided by two AI assistants to the user question displayed below. You should choose the assistant that follows the user’s instructions and answers the user’s question better. Your evaluation should consider factors such as the helpfulness, relevance, accuracy, depth, creativity, and level of detail of their responses. Begin your evaluation by comparing the two responses and provide a short explanation. Avoid any positional biases and ensure that the order in which the responses were presented does not influence your decision. Do not allow the length of the responses to influence your evaluation. Do not favor certain names of the assistants. Be as objective as possible. After providing your explanation, output your final verdict by strictly following this format: \"[[A]]\" if assistant A is better, \"[[B]]\" if assistant B is better, and \"[[C]]\" for a tie.",
86
- "pair_rate": "[Instruction]\n\"{instruction}\"\n\n\"{generated_sentence}\"\n\n[System]\nPlease act as an impartial judge and evaluate the quality of the responses provided by two AI assistants to the user question displayed below. You should choose the assistant that follows the user’s instructions and answers the user’s question better. Your evaluation should consider factors such as the helpfulness, relevance, accuracy, depth, creativity, and level of detail of their responses. Begin your evaluation by comparing the two responses and provide a short explanation. Avoid any positional biases and ensure that the order in which the responses were presented does not influence your decision. Do not allow the length of the responses to influence your evaluation. Do not favor certain names of the assistants. Be as objective as possible. Reply with \"leftvote\" if you find assistant A better, \"rightvote\" if assistant B is better, \"bothbad_vote\" if both responses are wrong, and \"tievote\" if both assistants provide equally satisfactory answers. If you are unable to make a decision, please reply with \"NA\"."
87
- }
88
-
89
- def inspect_convs(log_files):
90
- ic(log_files)
91
- data = []
92
- total_vote = 0
93
- correct_vote = 0
94
-
95
- client = OpenAI()
96
- with open('all_pairvote_log_wgpt_prtchatbot.csv', 'w', newline='') as csvfile:
97
- # fieldnames = ['tstamp', 'type', 'model_1', 'model_2', 'template_name_1', 'template_name_2', 'system_message_1', 'system_message_2', 'role_1', 'role_2', 'instruction_1', 'instruction_2', 'message_1', 'message_2', 'offset_1', 'offset_2', 'conv_id_1', 'conv_id_2', 'model_name_1', 'model_name_2', 'ip']
98
- fieldnames = ['tstamp', 'type', 'models', 'states', 'ip', 'gpt_vote']
99
- writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
100
-
101
- # Write the header
102
- writer.writeheader()
103
-
104
- for filename in tqdm(log_files, desc="read files"):
105
- for retry in range(5):
106
- try:
107
- lines = open(filename).readlines()
108
- break
109
- except FileNotFoundError:
110
- time.sleep(2)
111
-
112
- for l in lines:
113
- row = json.loads(l)
114
-
115
- if "states" not in row:
116
- continue
117
- if row["type"] not in ["leftvote", "rightvote", "bothbad_vote", "tievote"]:
118
- continue
119
-
120
- model_names = row["states"][0]["model_name"], row["states"][1]["model_name"]
121
-
122
-
123
- # Iterate through each state and write the relevant information
124
- if not len(row["states"][0]['messages']): continue
125
- # ic(row["states"][0]['messages'][1][1])
126
-
127
- if row["states"][0]['messages'][1][1] is None or row["states"][1]['messages'][1][1] is None or "NETWORK ERROR" in row["states"][0]['messages'][1][1] or "NETWORK ERROR" in row["states"][1]['messages'][1][1]: continue
128
- total_vote += 1
129
- # row = {
130
- # 'tstamp': row['tstamp'],
131
- # 'type': row['type'],
132
- # 'model_1': row['models'][0],
133
- # 'model_2': row['models'][1],
134
- # 'template_name_1': row["states"][0]['template_name'],
135
- # 'system_message_1': row["states"][0]['system_message'],
136
- # 'template_name_2': row["states"][1]['template_name'],
137
- # 'system_message_2': row["states"][1]['system_message'],
138
- # 'role_1': row["states"][0]['roles'],
139
- # 'role_2': row["states"][1]['roles'],
140
- # 'instruction_1': row["states"][0]['messages'][0][1],
141
- # 'instruction_2': row["states"][1]['messages'][0][1],
142
- # 'message_1': row["states"][0]['messages'][1][1],
143
- # 'message_2': row["states"][1]['messages'][1][1],
144
- # 'offset_1': row["states"][0]['offset'],
145
- # 'offset_2': row["states"][1]['offset'],
146
- # 'conv_id_1': row["states"][0]['conv_id'],
147
- # 'conv_id_2': row["states"][1]['conv_id'],
148
- # 'model_name_1': row["states"][0]['model_name'],
149
- # 'model_name_2': row["states"][1]['model_name'],
150
- # 'ip': row['ip']
151
- # }
152
- # writer.writerow(row)
153
- # Convert complex objects to JSON strings
154
- # TODO: check two image are the same
155
- conv_id = row["states"][0]['conv_id']
156
- image_path = os.path.join("/local/home/yujielu/project/Arena-Elo/vision-arena-logs", os.path.basename(filename)[:-5]+"input_images", f"input_image_{conv_id}.png")
157
- if not os.path.exists(image_path):
158
- response = "NA"
159
- ic(image_path)
160
- else:
161
- base64_image = encode_image(image_path)
162
- left_response = row["states"][0]['messages'][1][1]
163
- right_response = row["states"][1]['messages'][1][1]
164
- sep = "-" * 20
165
- instruction = row["states"][0]['messages'][0][1]
166
- generated_sentence = f"[The Start of Assistant A’s Answer]\n{left_response}\n[The End of Assistant A’s Answer]\n\n[The Start of Assistant B’s Answer]\n{right_response}\n[The End of Assistant B’s Answer]"
167
- text_prompt = task_template_map["pair_rate"].format(instruction=instruction, generated_sentence=generated_sentence)
168
- # ic(text_prompt)
169
- try:
170
- response = get_gpt4v_response(client, img_bs64=base64_image, text_prompt=text_prompt, use_vision=True)
171
- except:
172
- ic(">>> skip")
173
- response = "NA"
174
-
175
- # response = get_gpt4v_response(client, img_bs64=base64_image, text_prompt=text_prompt, use_vision=True)
176
- ic(row['type'], response)
177
- if response.strip() not in ["leftvote", "rightvote", "bothbad_vote", "tievote"]:
178
- response = "NA"
179
- # ic(generated_sentence)
180
-
181
- # if row['type'] == "leftvote":
182
- # row['type'] = "A"
183
- # elif row['type'] == "rightvote":
184
- # row['type'] = "B"
185
- # elif row['type'] in ["bothbad_vote", "tievote"]:
186
- # row['type'] = "C"
187
- if row['type'] == response.strip():
188
- correct_vote += 1
189
- row['models'] = json.dumps(row['models'])
190
- row['states'] = json.dumps(row['states'], ensure_ascii=False)
191
- row['gpt_vote'] = response
192
-
193
- # Write the modified row to the CSV file
194
- writer.writerow(row)
195
- # if row["type"] == "leftvote":
196
- # winner, loser = model_names[0], model_names[1]
197
- # winner_conv, loser_conv = row["states"][0], row["states"][1]
198
- # elif row["type"] == "rightvote":
199
- # loser, winner = model_names[0], model_names[1]
200
- # loser_conv, winner_conv = row["states"][0], row["states"][1]
201
-
202
- # if loser == "llava-v1.5-13b" and winner == "llava-v1.5-13b":
203
- # print("=" * 20)
204
- # print(f"Winner: {winner}")
205
- # pretty_print_conversation(winner_conv["messages"])
206
- # print(f"Loser: {loser}")
207
- # pretty_print_conversation(loser_conv["messages"])
208
- # print("=" * 20)
209
- # input()
210
- # if row['type'] == 'bothbad_vote':
211
- # from icecream import ic
212
- # ic(model_names)
213
- # if row["type"] == "bothbad_vote" and "gpt-4-vision-preview" in model_names:
214
- # print("=" * 20)
215
- # print(f"Model A: {model_names[0]}")
216
- # pretty_print_conversation(row["states"][0]["messages"])
217
- # print(f"Model B: {model_names[1]}")
218
- # pretty_print_conversation(row["states"][1]["messages"])
219
- # print("=" * 20)
220
- # input()
221
- # if correct_vote >= 300: break
222
- ic(total_vote, correct_vote)
223
-
224
-
225
- if __name__ == "__main__":
226
- parser = argparse.ArgumentParser()
227
- parser.add_argument("--max-num-files", type=int)
228
- args = parser.parse_args()
229
-
230
- log_files = get_log_files(args.max_num_files)
231
-
232
-
233
-
234
- inspect_convs(log_files)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
arena_elo/elo_rating/inspect_cost.py DELETED
@@ -1,177 +0,0 @@
1
- import fire
2
- import time
3
- import json
4
- from collections import defaultdict
5
- from .basic_stats import get_log_files, NUM_SERVERS, LOG_ROOT_DIR
6
- from .utils import detect_language, get_time_stamp_from_date, get_input_image_path, load_image_from_path
7
- from tqdm import tqdm
8
- VOTES = ["tievote", "leftvote", "rightvote", "bothbad_vote", "chat"]
9
-
10
-
11
- def remove_html(raw):
12
- if raw.startswith("<h3>"):
13
- return raw[raw.find(": ") + 2 : -len("</h3>\n")]
14
- if raw.startswith("### Model A: ") or raw.startswith("### Model B: "):
15
- return raw[13:]
16
- return raw
17
-
18
-
19
- def read_file(filename):
20
- data = []
21
- for retry in range(5):
22
- try:
23
- # lines = open(filename).readlines()
24
- for l in open(filename):
25
- row = json.loads(l)
26
- if row["type"] in VOTES:
27
- data.append(row)
28
- break
29
- except FileNotFoundError:
30
- time.sleep(2)
31
- return data
32
-
33
-
34
- def read_file_parallel(log_files, num_threads=16):
35
- data_all = []
36
- from multiprocessing import Pool
37
-
38
- with Pool(num_threads) as p:
39
- ret_all = list(tqdm(p.imap(read_file, log_files), total=len(log_files)))
40
- for ret in ret_all:
41
- data_all.extend(ret)
42
- return data_all
43
-
44
- def num_tokens(s:str):
45
- if s is None:
46
- return 0
47
- return len(s) / 4
48
-
49
- def main(
50
- ):
51
- log_files = get_log_files()
52
- data = read_file_parallel(log_files)
53
-
54
- all_model_counts = defaultdict(int)
55
- all_model_input_tokens_counts = defaultdict(list)
56
- all_model_output_tokens_counts = defaultdict(list)
57
- all_model_image_sizes = defaultdict(list)
58
- chat_battle_counts = defaultdict(int)
59
- for row in tqdm(data, desc="counting"):
60
- if row['type'] == "chat":
61
- chat_battle_counts["chat"] += 1
62
- all_model_counts[row['model']] += 1
63
- tstamp = row["tstamp"]
64
- conv_id = row["state"]["conv_id"]
65
-
66
- image = load_image_from_path(get_input_image_path(tstamp, conv_id))
67
- if image is None:
68
- image_size = None
69
- else:
70
- image_size = load_image_from_path(get_input_image_path(tstamp, conv_id)).size
71
- all_model_image_sizes[row['model']].append(image_size)
72
- try:
73
- for message in row["state"]["messages"][row["state"]["offset"] :: 2]:
74
- all_model_input_tokens_counts[row['model']].append(num_tokens(message[1]))
75
- for message in row["state"]["messages"][row["state"]["offset"] + 1 :: 2]:
76
- all_model_output_tokens_counts[row['model']].append(num_tokens(message[1]))
77
- except Exception as e:
78
- print(row)
79
- raise e
80
-
81
- else:
82
- chat_battle_counts[row['type']] += 1
83
- if row["models"][0] is None or row["models"][1] is None:
84
- continue
85
-
86
- # Resolve model names
87
- models_public = [remove_html(row["models"][0]), remove_html(row["models"][1])]
88
- if "model_name" in row["states"][0]:
89
- models_hidden = [
90
- row["states"][0]["model_name"],
91
- row["states"][1]["model_name"],
92
- ]
93
- if models_hidden[0] is None:
94
- models_hidden = models_public
95
- else:
96
- models_hidden = models_public
97
-
98
- if (models_public[0] == "" and models_public[1] != "") or (
99
- models_public[1] == "" and models_public[0] != ""
100
- ):
101
- continue
102
-
103
- if models_public[0] == "" or models_public[0] == "Model A":
104
- anony = True
105
- models = models_hidden
106
- else:
107
- anony = False
108
- models = models_public
109
- if not models_public == models_hidden:
110
- continue
111
-
112
- all_model_counts[models[0]] += 1
113
- all_model_counts[models[1]] += 1
114
- tstamp = row["tstamp"]
115
- conv_id1 = row["states"][0]["conv_id"]
116
- conv_id2 = row["states"][1]["conv_id"]
117
-
118
- image1 = load_image_from_path(get_input_image_path(tstamp, conv_id1))
119
- image2 = load_image_from_path(get_input_image_path(tstamp, conv_id2))
120
- all_model_image_sizes[models[0]].append(None if image1 is None else image1.size)
121
- all_model_image_sizes[models[1]].append(None if image2 is None else image2.size)
122
-
123
- for message in row["states"][0]["messages"][row["states"][0]["offset"] :: 2]:
124
- all_model_input_tokens_counts[models[0]].append(num_tokens(message[1]))
125
- for message in row["states"][0]["messages"][row["states"][0]["offset"] + 1 :: 2]:
126
- all_model_output_tokens_counts[models[0]].append(num_tokens(message[1]))
127
- for message in row["states"][1]["messages"][row["states"][1]["offset"] :: 2]:
128
- all_model_input_tokens_counts[models[1]].append(num_tokens(message[1]))
129
- for message in row["states"][1]["messages"][row["states"][1]["offset"] + 1 :: 2]:
130
- all_model_output_tokens_counts[models[1]].append(num_tokens(message[1]))
131
-
132
- print("### Chat battle counts (requests)")
133
- print(json.dumps(chat_battle_counts, indent=4))
134
-
135
- print("### Model counts (requests)")
136
- print(json.dumps(all_model_counts, indent=4))
137
-
138
- print("### Model Avg input tokens counts (tokens)")
139
- average_input_tokens_counts = {}
140
- for model, counts in all_model_input_tokens_counts.items():
141
- average_input_tokens_counts[model] = sum(counts) / len(counts)
142
- print(json.dumps(average_input_tokens_counts, indent=4))
143
-
144
- print("### Model AVg output tokens counts (tokens)")
145
- average_output_tokens_counts = {}
146
- for model, counts in all_model_output_tokens_counts.items():
147
- average_output_tokens_counts[model] = sum(counts) / len(counts)
148
- print(json.dumps(average_output_tokens_counts, indent=4))
149
-
150
- print("### Model Avg image sizes (height, width)")
151
- average_image_sizes = {}
152
- for model, sizes in all_model_image_sizes.items():
153
- avg_height = sum([size[0] for size in sizes if size is not None]) / len(sizes)
154
- avg_width = sum([size[1] for size in sizes if size is not None]) / len(sizes)
155
- average_image_sizes[model] = (avg_height, avg_width)
156
- print(json.dumps(average_image_sizes, indent=4))
157
-
158
- print("### GPT-4V estimated cost (USD)")
159
- gpt_4v_name = "gpt-4-vision-preview"
160
- gpt_4v_cost = {}
161
- gpt_4v_cost['input'] = sum(all_model_input_tokens_counts[gpt_4v_name]) / 1000 * 0.01
162
- gpt_4v_cost['output'] = sum(all_model_output_tokens_counts[gpt_4v_name]) / 1000 * 0.03
163
-
164
- all_image_cost = 0
165
- for size in all_model_image_sizes[gpt_4v_name]:
166
- if size is None:
167
- continue
168
- all_image_tokens = (size[0] // 512 + 1) * (size[1] // 512 + 1) * 170 + 85
169
- all_image_cost += all_image_tokens / 1000 * 0.01
170
- gpt_4v_cost['image'] = all_image_cost
171
- print(json.dumps(gpt_4v_cost, indent=4))
172
-
173
-
174
-
175
-
176
- if __name__ == "__main__":
177
- fire.Fire(main)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
arena_elo/elo_rating/inspect_elo_rating_pkl.py DELETED
@@ -1,33 +0,0 @@
1
- import pickle
2
- import plotly.graph_objects as go
3
-
4
- def output_figure(data, figure_name="battle_count_heatmap", label="annoy"):
5
- fig = data[label][figure_name]
6
- fig.update_layout(
7
- height=700,
8
- width=700,
9
- title={'text': f'{figure_name}', 'x': 0.5, 'y': 0.07},
10
- xaxis_title="Model B",
11
- yaxis_title="Model A",
12
- # coloraxis_colorscale=[[0.0, '#0d0887'], [1.0, '#f0f921']],
13
- margin={'t': 60}
14
- )
15
- fig.write_image(f"{figure_name}.png")
16
-
17
- with open("./results/latest/elo_results.pkl",'rb') as f:
18
- data = pickle.load(f)
19
- print()
20
- df = data["anony"]["leaderboard_table_df"]
21
- # sort by rating
22
- print(data["anony"].keys())
23
-
24
- for figure_name in [ 'win_fraction_heatmap', 'battle_count_heatmap',]:
25
- output_figure(data, figure_name, "anony")
26
-
27
- df = df.sort_values(by=["rating"], ascending=False)
28
- print(df)
29
- df = data["full"]["leaderboard_table_df"]
30
- # sort by rating
31
- df = df.sort_values(by=["rating"], ascending=False)
32
- print(df)
33
- print('done')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
arena_elo/elo_rating/model_registry.py DELETED
@@ -1,578 +0,0 @@
1
- """Additional information of the models."""
2
- from collections import namedtuple, OrderedDict
3
- from typing import List
4
-
5
-
6
- ModelInfo = namedtuple("ModelInfo", ["simple_name", "link", "description"])
7
-
8
-
9
- model_info = OrderedDict()
10
-
11
-
12
- def register_model_info(
13
- full_names: List[str], simple_name: str, link: str, description: str
14
- ):
15
- info = ModelInfo(simple_name, link, description)
16
-
17
- for full_name in full_names:
18
- model_info[full_name] = info
19
-
20
-
21
- def get_model_info(name: str) -> ModelInfo:
22
- if name in model_info:
23
- return model_info[name]
24
- else:
25
- # To fix this, please use `register_model_info` to register your model
26
- return ModelInfo(
27
- name, "", "Register the description at arena.model/model_registry.py"
28
- )
29
-
30
-
31
- register_model_info(
32
- [
33
- "IEITYuan/Yuan2-2B-Janus-hf",
34
- "IEITYuan/Yuan2-2B-hf",
35
- "IEITYuan/Yuan2-51B-hf",
36
- "IEITYuan/Yuan2-102B-hf",
37
- ],
38
- "IEIT-Yuan2",
39
- "https://github.com/IEIT-Yuan/Yuan-2.0",
40
- "Yuan2.0 is a new generation Fundamental Large Language Model developed by IEIT System.",
41
- )
42
-
43
- register_model_info(
44
- ["mixtral-8x7b-instruct-v0.1", "mistral-7b-instruct"],
45
- "Mixtral of experts",
46
- "https://mistral.ai/news/mixtral-of-experts/",
47
- "A Mixture-of-Experts model by Mistral AI",
48
- )
49
-
50
- register_model_info(
51
- ["gemini-pro"],
52
- "Gemini",
53
- "https://blog.google/technology/ai/google-gemini-pro-imagen-duet-ai-update/",
54
- "Gemini by Google",
55
- )
56
-
57
- register_model_info(
58
- ["gemini-pro-vision"],
59
- "Gemini",
60
- "https://blog.google/technology/ai/google-gemini-pro-imagen-duet-ai-update/",
61
- "Gemini by Google",
62
- )
63
-
64
- register_model_info(
65
- ["solar-10.7b-instruct-v1.0"],
66
- "SOLAR-10.7B-Instruct",
67
- "https://huggingface.co/upstage/SOLAR-10.7B-Instruct-v1.0",
68
- "A model trained using depth up-scaling by Upstage AI",
69
- )
70
-
71
- register_model_info(
72
- ["gpt-4-turbo"],
73
- "GPT-4-Turbo",
74
- "https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo",
75
- "GPT-4-Turbo by OpenAI",
76
- )
77
-
78
- register_model_info(
79
- ["gpt-4-vision-preview"],
80
- "gpt-4-vision-preview",
81
- "https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo",
82
- "GPT-4(V) by OpenAI",
83
- )
84
-
85
- register_model_info(
86
- ["gpt-3.5-turbo", "gpt-3.5-turbo-0314", "gpt-3.5-turbo-0613", "gpt-3.5-turbo-1106"],
87
- "GPT-3.5",
88
- "https://platform.openai.com/docs/models/gpt-3-5",
89
- "GPT-3.5-Turbo by OpenAI",
90
- )
91
-
92
- register_model_info(
93
- ["gpt-4", "gpt-4-0314", "gpt-4-0613"],
94
- "GPT-4",
95
- "https://openai.com/research/gpt-4",
96
- "GPT-4 by OpenAI",
97
- )
98
-
99
- register_model_info(
100
- ["claude-2.1", "claude-2.0"],
101
- "Claude",
102
- "https://www.anthropic.com/index/claude-2",
103
- "Claude 2 by Anthropic",
104
- )
105
-
106
- register_model_info(
107
- ["claude-1"],
108
- "Claude",
109
- "https://www.anthropic.com/index/introducing-claude",
110
- "Claude 1 by Anthropic",
111
- )
112
-
113
- register_model_info(
114
- ["claude-instant-1", "claude-instant-1.2"],
115
- "Claude Instant",
116
- "https://www.anthropic.com/index/introducing-claude",
117
- "Claude Instant by Anthropic",
118
- )
119
-
120
- register_model_info(
121
- ["pplx-70b-online", "pplx-7b-online"],
122
- "pplx-online-llms",
123
- "https://blog.perplexity.ai/blog/introducing-pplx-online-llms",
124
- "Online LLM API by Perplexity AI",
125
- )
126
-
127
- register_model_info(
128
- ["openhermes-2.5-mistral-7b"],
129
- "OpenHermes-2.5-Mistral-7B",
130
- "https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B",
131
- "a mistral-based model fine-tuned on 1M GPT-4 outputs",
132
- )
133
-
134
- register_model_info(
135
- ["starling-lm-7b-alpha"],
136
- "Starling-LM-7B-alpha",
137
- "https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha",
138
- "an open model trained using RLAIF by Berkeley",
139
- )
140
-
141
- register_model_info(
142
- ["tulu-2-dpo-70b"],
143
- "Tulu 2",
144
- "https://huggingface.co/allenai/tulu-2-dpo-70b",
145
- "an instruction and RLHF model by UW/AllenAI",
146
- )
147
-
148
- register_model_info(
149
- ["yi-34b-chat", "yi-6b-chat"],
150
- "Yi-Chat",
151
- "https://huggingface.co/01-ai/Yi-34B-Chat",
152
- "A large language model by 01 AI",
153
- )
154
-
155
- register_model_info(
156
- ["llama-2-70b-chat", "llama-2-34b-chat", "llama-2-13b-chat", "llama-2-7b-chat"],
157
- "Llama 2",
158
- "https://ai.meta.com/llama/",
159
- "open foundation and fine-tuned chat models by Meta",
160
- )
161
-
162
- register_model_info(
163
- [
164
- "vicuna-33b",
165
- "vicuna-33b-v1.3",
166
- "vicuna-13b",
167
- "vicuna-13b-v1.3",
168
- "vicuna-7b",
169
- "vicuna-7b-v1.3",
170
- ],
171
- "Vicuna",
172
- "https://lmsys.org/blog/2023-03-30-vicuna/",
173
- "a chat assistant fine-tuned on user-shared conversations by LMSYS",
174
- )
175
-
176
- register_model_info(
177
- ["chatglm3-6b", "chatglm2-6b", "chatglm-6b"],
178
- "ChatGLM",
179
- "https://chatglm.cn/blog",
180
- "an open bilingual dialogue language model by Tsinghua University",
181
- )
182
-
183
- register_model_info(
184
- ["openchat-3.5"],
185
- "OpenChat 3.5",
186
- "https://github.com/imoneoi/openchat",
187
- "an open model fine-tuned on Mistral-7B using C-RLFT",
188
- )
189
-
190
- register_model_info(
191
- ["tenyxchat-7b-v1"],
192
- "TenyxChat-7B",
193
- "https://huggingface.co/tenyx/TenyxChat-7B-v1",
194
- "an open model DPO trained on top of OpenChat-3.5 using Tenyx fine-tuning",
195
- )
196
-
197
- register_model_info(
198
- ["zephyr-7b-beta", "zephyr-7b-alpha"],
199
- "Zephyr",
200
- "https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha",
201
- "a chatbot fine-tuned from Mistral by Hugging Face",
202
- )
203
-
204
- register_model_info(
205
- ["notus-7b-v1"],
206
- "Notus",
207
- "https://huggingface.co/argilla/notus-7b-v1",
208
- "a chatbot fine-tuned from Zephyr SFT by Argilla",
209
- )
210
-
211
- register_model_info(
212
- ["catppt"],
213
- "CatPPT",
214
- "https://huggingface.co/rishiraj/CatPPT",
215
- "a chatbot fine-tuned from a SLERP merged model by Rishiraj Acharya",
216
- )
217
-
218
- register_model_info(
219
- ["TinyLlama"],
220
- "TinyLlama",
221
- "https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v1.0",
222
- "The TinyLlama project is an open endeavor to pretrain a 1.1B Llama model on 3 trillion tokens.",
223
- )
224
-
225
- register_model_info(
226
- ["qwen-14b-chat"],
227
- "Qwen",
228
- "https://huggingface.co/Qwen/Qwen-14B-Chat",
229
- "a large language model by Alibaba Cloud",
230
- )
231
-
232
- register_model_info(
233
- ["codellama-34b-instruct", "codellama-13b-instruct", "codellama-7b-instruct"],
234
- "Code Llama",
235
- "https://ai.meta.com/blog/code-llama-large-language-model-coding/",
236
- "open foundation models for code by Meta",
237
- )
238
-
239
- register_model_info(
240
- ["wizardlm-70b", "wizardlm-30b", "wizardlm-13b"],
241
- "WizardLM",
242
- "https://github.com/nlpxucan/WizardLM",
243
- "an instruction-following LLM using evol-instruct by Microsoft",
244
- )
245
-
246
- register_model_info(
247
- ["wizardcoder-15b-v1.0"],
248
- "WizardLM",
249
- "https://github.com/nlpxucan/WizardLM/tree/main/WizardCoder",
250
- "Empowering Code Large Language Models with Evol-Instruct",
251
- )
252
-
253
- register_model_info(
254
- ["mpt-7b-chat", "mpt-30b-chat"],
255
- "MPT-Chat",
256
- "https://www.mosaicml.com/blog/mpt-30b",
257
- "a chatbot fine-tuned from MPT by MosaicML",
258
- )
259
-
260
- register_model_info(
261
- ["guanaco-33b", "guanaco-65b"],
262
- "Guanaco",
263
- "https://github.com/artidoro/qlora",
264
- "a model fine-tuned with QLoRA by UW",
265
- )
266
-
267
- register_model_info(
268
- ["gpt4all-13b-snoozy"],
269
- "GPT4All-Snoozy",
270
- "https://github.com/nomic-ai/gpt4all",
271
- "a finetuned LLaMA model on assistant style data by Nomic AI",
272
- )
273
-
274
- register_model_info(
275
- ["koala-13b"],
276
- "Koala",
277
- "https://bair.berkeley.edu/blog/2023/04/03/koala",
278
- "a dialogue model for academic research by BAIR",
279
- )
280
-
281
- register_model_info(
282
- ["RWKV-4-Raven-14B"],
283
- "RWKV-4-Raven",
284
- "https://huggingface.co/BlinkDL/rwkv-4-raven",
285
- "an RNN with transformer-level LLM performance",
286
- )
287
-
288
- register_model_info(
289
- ["alpaca-13b"],
290
- "Alpaca",
291
- "https://crfm.stanford.edu/2023/03/13/alpaca.html",
292
- "a model fine-tuned from LLaMA on instruction-following demonstrations by Stanford",
293
- )
294
-
295
- register_model_info(
296
- ["oasst-pythia-12b"],
297
- "OpenAssistant (oasst)",
298
- "https://open-assistant.io",
299
- "an Open Assistant for everyone by LAION",
300
- )
301
-
302
- register_model_info(
303
- ["oasst-sft-7-llama-30b"],
304
- "OpenAssistant (oasst)",
305
- "https://open-assistant.io",
306
- "an Open Assistant for everyone by LAION",
307
- )
308
-
309
- register_model_info(
310
- ["palm-2"],
311
- "PaLM 2 Chat",
312
- "https://cloud.google.com/vertex-ai/docs/release-notes#May_10_2023",
313
- "PaLM 2 for Chat (chat-bison@001) by Google",
314
- )
315
-
316
- register_model_info(
317
- ["llama-7b", "llama-13b"],
318
- "LLaMA",
319
- "https://arxiv.org/abs/2302.13971",
320
- "open and efficient foundation language models by Meta",
321
- )
322
-
323
- register_model_info(
324
- ["open-llama-7b-v2-open-instruct", "open-llama-7b-open-instruct"],
325
- "Open LLaMa (Open Instruct)",
326
- "https://medium.com/vmware-data-ml-blog/starter-llm-for-the-enterprise-instruction-tuning-openllama-7b-d05fc3bbaccc",
327
- "Open LLaMa fine-tuned on instruction-following data by VMware",
328
- )
329
-
330
- register_model_info(
331
- ["dolly-v2-12b"],
332
- "Dolly",
333
- "https://www.databricks.com/blog/2023/04/12/dolly-first-open-commercially-viable-instruction-tuned-llm",
334
- "an instruction-tuned open large language model by Databricks",
335
- )
336
-
337
- register_model_info(
338
- ["stablelm-tuned-alpha-7b"],
339
- "StableLM",
340
- "https://github.com/stability-AI/stableLM",
341
- "Stability AI language models",
342
- )
343
-
344
- register_model_info(
345
- ["codet5p-6b"],
346
- "CodeT5p-6b",
347
- "https://huggingface.co/Salesforce/codet5p-6b",
348
- "Code completion model released by Salesforce",
349
- )
350
-
351
- register_model_info(
352
- ["fastchat-t5-3b", "fastchat-t5-3b-v1.0"],
353
- "FastChat-T5",
354
- "https://huggingface.co/lmsys/fastchat-t5-3b-v1.0",
355
- "a chat assistant fine-tuned from FLAN-T5 by LMSYS",
356
- )
357
-
358
- register_model_info(
359
- ["phoenix-inst-chat-7b"],
360
- "Phoenix-7B",
361
- "https://huggingface.co/FreedomIntelligence/phoenix-inst-chat-7b",
362
- "a multilingual chat assistant fine-tuned from Bloomz to democratize ChatGPT across languages by CUHK(SZ)",
363
- )
364
-
365
- register_model_info(
366
- ["realm-7b-v1"],
367
- "ReaLM",
368
- "https://github.com/FreedomIntelligence/ReaLM",
369
- "A chatbot fine-tuned from LLaMA2 with data generated via iterative calls to UserGPT and ChatGPT by CUHK(SZ) and SRIBD.",
370
- )
371
-
372
- register_model_info(
373
- ["billa-7b-sft"],
374
- "BiLLa-7B-SFT",
375
- "https://huggingface.co/Neutralzz/BiLLa-7B-SFT",
376
- "an instruction-tuned bilingual LLaMA with enhanced reasoning ability by an independent researcher",
377
- )
378
-
379
- register_model_info(
380
- ["h2ogpt-gm-oasst1-en-2048-open-llama-7b-preview-300bt-v2"],
381
- "h2oGPT-GM-7b",
382
- "https://huggingface.co/h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-7b-preview-300bt-v2",
383
- "an instruction-tuned OpenLLaMA with enhanced conversational ability by H2O.ai",
384
- )
385
-
386
- register_model_info(
387
- ["baize-v2-7b", "baize-v2-13b"],
388
- "Baize v2",
389
- "https://github.com/project-baize/baize-chatbot#v2",
390
- "A chatbot fine-tuned from LLaMA with ChatGPT self-chat data and Self-Disillation with Feedback (SDF) by UCSD and SYSU.",
391
- )
392
-
393
- register_model_info(
394
- [
395
- "airoboros-l2-7b-2.1",
396
- "airoboros-l2-13b-2.1",
397
- "airoboros-c34b-2.1",
398
- "airoboros-l2-70b-2.1",
399
- ],
400
- "airoboros",
401
- "https://huggingface.co/jondurbin/airoboros-l2-70b-2.1",
402
- "an instruction-tuned LlaMa model tuned with 100% synthetic instruction-response pairs from GPT4",
403
- )
404
-
405
- register_model_info(
406
- [
407
- "spicyboros-7b-2.2",
408
- "spicyboros-13b-2.2",
409
- "spicyboros-70b-2.2",
410
- ],
411
- "spicyboros",
412
- "https://huggingface.co/jondurbin/spicyboros-70b-2.2",
413
- "de-aligned versions of the airoboros models",
414
- )
415
-
416
- register_model_info(
417
- ["Robin-7b-v2", "Robin-13b-v2", "Robin-33b-v2"],
418
- "Robin-v2",
419
- "https://huggingface.co/OptimalScale/robin-7b-v2-delta",
420
- "A chatbot fine-tuned from LLaMA-7b, achieving competitive performance on chitchat, commonsense reasoning and instruction-following tasks, by OptimalScale, HKUST.",
421
- )
422
-
423
- register_model_info(
424
- ["manticore-13b-chat"],
425
- "Manticore 13B Chat",
426
- "https://huggingface.co/openaccess-ai-collective/manticore-13b-chat-pyg",
427
- "A chatbot fine-tuned from LlaMa across several CoT and chat datasets.",
428
- )
429
-
430
- register_model_info(
431
- ["redpajama-incite-7b-chat"],
432
- "RedPajama-INCITE-7B-Chat",
433
- "https://huggingface.co/togethercomputer/RedPajama-INCITE-7B-Chat",
434
- "A chatbot fine-tuned from RedPajama-INCITE-7B-Base by Together",
435
- )
436
-
437
- register_model_info(
438
- [
439
- "falcon-7b",
440
- "falcon-7b-instruct",
441
- "falcon-40b",
442
- "falcon-40b-instruct",
443
- "falcon-180b",
444
- "falcon-180b-chat",
445
- ],
446
- "Falcon",
447
- "https://huggingface.co/tiiuae/falcon-180B",
448
- "TII's flagship series of large language models",
449
- )
450
-
451
- register_model_info(
452
- ["tigerbot-7b-sft"],
453
- "Tigerbot",
454
- "https://huggingface.co/TigerResearch/tigerbot-7b-sft",
455
- "TigerBot is a large-scale language model (LLM) with multiple languages and tasks.",
456
- )
457
-
458
- register_model_info(
459
- ["internlm-chat-7b", "internlm-chat-7b-8k"],
460
- "InternLM",
461
- "https://huggingface.co/internlm/internlm-chat-7b",
462
- "InternLM is a multi-language large-scale language model (LLM), developed by SHLAB.",
463
- )
464
-
465
- register_model_info(
466
- ["Qwen-7B-Chat"],
467
- "Qwen",
468
- "https://huggingface.co/Qwen/Qwen-7B-Chat",
469
- "Qwen is a multi-language large-scale language model (LLM), developed by Damo Academy.",
470
- )
471
-
472
- register_model_info(
473
- ["Llama2-Chinese-13b-Chat", "LLama2-Chinese-13B"],
474
- "Llama2-Chinese",
475
- "https://huggingface.co/FlagAlpha/Llama2-Chinese-13b-Chat",
476
- "Llama2-Chinese is a multi-language large-scale language model (LLM), developed by FlagAlpha.",
477
- )
478
-
479
- register_model_info(
480
- ["Chinese-Alpaca-2-7B", "Chinese-Alpaca-2-13B"],
481
- "Chinese-Alpaca",
482
- "https://huggingface.co/hfl/chinese-alpaca-2-13b",
483
- "New extended Chinese vocabulary beyond Llama-2, open-sourcing the Chinese LLaMA-2 and Alpaca-2 LLMs.",
484
- )
485
-
486
- register_model_info(
487
- ["Vigogne-2-7B-Instruct", "Vigogne-2-13B-Instruct"],
488
- "Vigogne-Instruct",
489
- "https://huggingface.co/bofenghuang/vigogne-2-7b-instruct",
490
- "Vigogne-Instruct is a French large language model (LLM) optimized for instruction-following, developed by Bofeng Huang",
491
- )
492
-
493
- register_model_info(
494
- ["Vigogne-2-7B-Chat", "Vigogne-2-13B-Chat"],
495
- "Vigogne-Chat",
496
- "https://huggingface.co/bofenghuang/vigogne-2-7b-chat",
497
- "Vigogne-Chat is a French large language model (LLM) optimized for instruction-following and multi-turn dialogues, developed by Bofeng Huang",
498
- )
499
-
500
- register_model_info(
501
- ["stable-vicuna-13B-HF"],
502
- "stable-vicuna",
503
- "https://huggingface.co/TheBloke/stable-vicuna-13B-HF",
504
- "StableVicuna is a Vicuna model fine-tuned using RLHF via PPO on various conversational and instructional datasets.",
505
- )
506
-
507
- register_model_info(
508
- ["deluxe-chat-v1", "deluxe-chat-v1.1", "deluxe-chat-v1.2"],
509
- "DeluxeChat",
510
- "",
511
- "Deluxe Chat",
512
- )
513
-
514
- register_model_info(
515
- [
516
- "Xwin-LM-7B-V0.1",
517
- "Xwin-LM-13B-V0.1",
518
- "Xwin-LM-70B-V0.1",
519
- "Xwin-LM-7B-V0.2",
520
- "Xwin-LM-13B-V0.2",
521
- ],
522
- "Xwin-LM",
523
- "https://github.com/Xwin-LM/Xwin-LM",
524
- "Chat models developed by Xwin-LM team",
525
- )
526
-
527
- register_model_info(
528
- ["lemur-70b-chat"],
529
- "Lemur-Chat",
530
- "https://huggingface.co/OpenLemur/lemur-70b-chat-v1",
531
- "an openly accessible language model optimized for both natural language and coding capabilities ",
532
- )
533
-
534
- register_model_info(
535
- ["Mistral-7B-OpenOrca"],
536
- "Open-Orca",
537
- "https://huggingface.co/Open-Orca/Mistral-7B-OpenOrca",
538
- "A fine-tune of [Mistral 7B](https://huggingface.co/mistralai/Mistral-7B-v0.1) using [OpenOrca dataset](https://huggingface.co/datasets/Open-Orca/OpenOrca)",
539
- )
540
-
541
- register_model_info(
542
- ["dolphin-2.2.1-mistral-7b"],
543
- "dolphin-mistral",
544
- "https://huggingface.co/ehartford/dolphin-2.2.1-mistral-7b",
545
- "An uncensored fine-tuned Mistral 7B",
546
- )
547
-
548
- register_model_info(
549
- [
550
- "AquilaChat-7B",
551
- "AquilaChat2-7B",
552
- "AquilaChat2-34B",
553
- ],
554
- "Aquila-Chat",
555
- "https://huggingface.co/BAAI/AquilaChat2-34B",
556
- "Chat models developed by BAAI team",
557
- )
558
-
559
- register_model_info(
560
- ["xDAN-L1-Chat-RL-v1"],
561
- "xDAN-L1-Chat",
562
- "https://huggingface.co/xDAN-AI/xDAN-L1-Chat-RL-v1",
563
- "A large language chat model created by xDAN-AI.",
564
- )
565
-
566
- register_model_info(
567
- ["MetaMath-70B-V1.0", "MetaMath-7B-V1.0"],
568
- "MetaMath",
569
- "https://huggingface.co/meta-math",
570
- "MetaMath is a finetune of Llama2 on [MetaMathQA](https://huggingface.co/datasets/meta-math/MetaMathQA) that specializes in mathematical reasoning.",
571
- )
572
-
573
- register_model_info(
574
- ["Yuan2-2B-hf", "Yuan2-51B-hf", "Yuan2-102B-hf"],
575
- "IEIYuan",
576
- "https://huggingface.co/IEITYuan",
577
- "Yuan2 is a Basemodel developed by IEI.",
578
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
arena_elo/elo_rating/upload_battle_data.py DELETED
@@ -1,193 +0,0 @@
1
- import fire
2
- import json
3
- import os
4
- import datasets
5
- import datetime
6
- from pathlib import Path
7
- from datetime import datetime
8
- from PIL import Image
9
-
10
- datasets.config.DEFAULT_MAX_BATCH_SIZE = 500
11
- def create_hf_dataset(data_file: str, split="test"):
12
- hf_dataset = datasets.Dataset.from_list(
13
- data_file,
14
- features=datasets.Features(
15
- {
16
- "question_id": datasets.Value("string"),
17
- "model": datasets.Value("string"),
18
- "conversation": [
19
- {
20
- "role": datasets.Value("string"),
21
- "content": datasets.Value("string"),
22
- }
23
- ],
24
- "language": datasets.Value("string"),
25
- "image": datasets.Image(),
26
- "turn": datasets.Value("int32"),
27
- }
28
- ),
29
- split=split,
30
- )
31
- return hf_dataset
32
-
33
- def create_hf_battle_dataset(data_file: str, split="test"):
34
- hf_dataset = datasets.Dataset.from_list(
35
- data_file,
36
- features=datasets.Features(
37
- {
38
- "question_id": datasets.Value("string"),
39
- "model_a": datasets.Value("string"),
40
- "model_b": datasets.Value("string"),
41
- "conversation_a": [
42
- {
43
- "role": datasets.Value("string"),
44
- "content": datasets.Value("string"),
45
- }
46
- ],
47
- "conversation_b": [
48
- {
49
- "role": datasets.Value("string"),
50
- "content": datasets.Value("string"),
51
- }
52
- ],
53
- "language": datasets.Value("string"),
54
- "image": datasets.Image(),
55
- "turn": datasets.Value("int32"),
56
- "anony": datasets.Value("bool"),
57
- }
58
- ),
59
- split=split,
60
- )
61
- return hf_dataset
62
-
63
-
64
-
65
-
66
- def load_image(path:str):
67
- try:
68
- return Image.open(path)
69
- except Exception as e:
70
- print(f"Error loading image {path}: {e}")
71
- return None
72
-
73
- def get_date_from_time_stamp(unix_timestamp: int):
74
- # Create a datetime object from the Unix timestamp
75
- dt = datetime.fromtimestamp(unix_timestamp)
76
-
77
- # Convert the datetime object to a string with the desired format
78
- date_str = dt.strftime("%Y-%m-%d")
79
- return date_str
80
-
81
- def load_battle_image(battle, log_dir):
82
- image_path = Path(log_dir) / f"{get_date_from_time_stamp(battle['tstamp'])}-convinput_images" / f"input_image_{battle['question_id']}.png"
83
- return load_image(image_path)
84
-
85
-
86
- def main(
87
- data_file: str = "./results/latest/clean_battle_conv.json",
88
- repo_id: str = "DongfuTingle/wildvision-bench",
89
- log_dir: str = os.getenv("LOGDIR", "./vision-arena-logs/"),
90
- mode="battle",
91
- token = os.environ.get("HUGGINGFACE_TOKEN", None)
92
- ):
93
- with open(data_file, "r") as f:
94
- data = json.load(f)
95
-
96
-
97
-
98
- has_image_stats = {
99
- "has_image": 0,
100
- "no_image": 0,
101
- }
102
- if mode == "keep_bad_only":
103
- # anony only
104
- data = [d for d in data if d["anony"]]
105
-
106
- new_data = []
107
- for battle in data:
108
- image = load_battle_image(battle, log_dir)
109
- if image is None:
110
- has_image_stats["no_image"] += 1
111
- # we don't keep the data without image
112
- continue
113
- has_image_stats["has_image"] += 1
114
-
115
- if battle["winner"] in ["model_a", "model_b"]:
116
- if battle["winner"] == "model_a":
117
- worse_model = "model_b"
118
- worse_conv = "conversation_b"
119
- if battle["winner"] == "model_b":
120
- worse_model = "model_a"
121
- worse_conv = "conversation_a"
122
-
123
- new_data.append({
124
- "question_id": battle["question_id"],
125
- "model": battle[worse_model],
126
- "conversation": battle[worse_conv],
127
- "language": battle["language"],
128
- "image": image,
129
- "turn": battle["turn"],
130
- })
131
- elif battle["winner"] == "tie (bothbad)":
132
-
133
- new_data.append({
134
- "question_id": battle["question_id"],
135
- "model": battle["model_a"],
136
- "conversation": battle["conversation_a"],
137
- "language": battle["language"],
138
- "image": image,
139
- "turn": battle["turn"],
140
- })
141
-
142
- new_data.append({
143
- "question_id": battle["question_id"],
144
- "model": battle["model_b"],
145
- "conversation": battle["conversation_b"],
146
- "language": battle["language"],
147
- "image": image,
148
- "turn": battle["turn"],
149
- })
150
-
151
- split = "test"
152
- hf_dataset = create_hf_dataset(new_data, "test")
153
-
154
- elif mode == "battle":
155
- new_data = []
156
- for battle in data:
157
- image = load_battle_image(battle, log_dir)
158
- if image is None:
159
- has_image_stats["no_image"] += 1
160
- continue
161
- has_image_stats["has_image"] += 1
162
- new_data.append({
163
- "question_id": battle["question_id"],
164
- "model_a": battle["model_a"],
165
- "model_b": battle["model_b"],
166
- "conversation_a": battle["conversation_a"],
167
- "conversation_b": battle["conversation_b"],
168
- "language": battle["language"],
169
- "image": image,
170
- "turn": battle["turn"],
171
- "anony": battle["anony"],
172
- })
173
- split = "test"
174
- hf_dataset = create_hf_battle_dataset(new_data, "test")
175
- else:
176
- raise ValueError(f"Invalid mode: {mode}")
177
-
178
- print(f"Stats: {has_image_stats}")
179
- print(hf_dataset)
180
- print(f"Uploading to part {repo_id}:{split}...")
181
- hf_dataset.push_to_hub(
182
- repo_id=repo_id,
183
- config_name=mode,
184
- split=split,
185
- token=token,
186
- commit_message=f"Add vision-arena {split} dataset",
187
- )
188
-
189
- print("Done!")
190
-
191
-
192
- if __name__ == "__main__":
193
- fire.Fire(main)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
arena_elo/elo_rating/utils.py DELETED
@@ -1,83 +0,0 @@
1
- from datetime import datetime
2
- import pytz
3
- import PIL
4
- import os
5
-
6
- def detect_language(text: str) -> str:
7
- """Detect the langauge of a string."""
8
- import polyglot # pip3 install polyglot pyicu pycld2
9
- from polyglot.detect import Detector
10
- from polyglot.detect.base import logger as polyglot_logger
11
- import pycld2
12
-
13
- polyglot_logger.setLevel("ERROR")
14
-
15
- try:
16
- lang_code = Detector(text).language.name
17
- except (pycld2.error, polyglot.detect.base.UnknownLanguage):
18
- lang_code = "unknown"
19
- return lang_code
20
-
21
-
22
- def get_time_stamp_from_date(date_str:str):
23
- """
24
- Convert a date string to a Unix timestamp
25
- Args:
26
- date_str (str): The input date string in the format 'YYYY-MM-DD-HH:MM-TZ', e.g. '2024-02-10-14:00-PT'
27
- """
28
-
29
- # Convert the date string into a format that Python's datetime can understand
30
- # and specify the correct timezone for PT, which is 'US/Pacific'
31
- date_format = "%Y-%m-%d-%H:%M-%Z"
32
-
33
- # Parse the date string into a datetime object
34
- # Note: PT is not directly recognized by pytz, so we manually map it to 'US/Pacific'
35
- timezone_map = {
36
- "PT": "US/Pacific",
37
- }
38
-
39
- # Extract the timezone abbreviation
40
- tz_abbr = date_str.split("-")[-1]
41
- # Map the abbreviation to a pytz timezone
42
- tz_info = pytz.timezone(timezone_map[tz_abbr])
43
-
44
- # Remove the timezone abbreviation for parsing
45
- date_str_parsed = date_str.rsplit("-", 1)[0]
46
-
47
- # Create a datetime object with the corresponding timezone
48
- dt = datetime.strptime(date_str_parsed, "%Y-%m-%d-%H:%M").replace(tzinfo=tz_info)
49
-
50
- # Convert the datetime object to a Unix timestamp
51
- unix_timestamp = dt.timestamp()
52
- return unix_timestamp
53
-
54
- def get_date_from_time_stamp(unix_timestamp: int):
55
- # Create a datetime object from the Unix timestamp
56
- dt = datetime.fromtimestamp(unix_timestamp)
57
-
58
- # Convert the datetime object to a string with the desired format
59
- date_str = dt.strftime("%Y-%m-%d %H:%M:%S %Z")
60
- return date_str
61
-
62
-
63
- def get_input_image_path(tstamp, conv_id):
64
- # from tstamp to date e.g. 2024-02-10
65
- date_str = datetime.fromtimestamp(tstamp, tz=pytz.timezone("US/Pacific")).strftime("%Y-%m-%d")
66
- LOGDIR = os.getenv("LOGDIR")
67
- return f"{LOGDIR}/{date_str}-convinput_images/input_image_{conv_id}.png"
68
-
69
- def load_image_from_path(image_path):
70
- # Load the image from the specified
71
- # path using the Python Imaging Library (PIL)
72
- try:
73
- image = PIL.Image.open(image_path)
74
- return image
75
- except FileNotFoundError:
76
- print(f"Image not found at path: {image_path}")
77
- return None
78
- except PIL.UnidentifiedImageError:
79
- print(f"Unidentified image format at path: {image_path}")
80
- return None
81
-
82
-
83
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
arena_elo/evaluator/convert_to_evaluator_data.py DELETED
@@ -1,134 +0,0 @@
1
- import argparse
2
- import json
3
- import os
4
- import time
5
- from pytz import timezone
6
- from tqdm import tqdm
7
- import base64
8
- from icecream import ic
9
- from PIL import Image
10
-
11
-
12
- # Function to encode the image
13
- def encode_image(image_path):
14
- with open(image_path, "rb") as image_file:
15
- return base64.b64encode(image_file.read()).decode('utf-8')
16
-
17
- def get_log_files(max_num_files=None):
18
- dates = []
19
- for month in [2, 3]:
20
- for day in range(1, 32):
21
- dates.append(f"2024-{month:02d}-{day:02d}")
22
-
23
- num_servers = 1
24
- filenames = []
25
- for d in dates:
26
- for i in range(num_servers):
27
- # name = os.path.expanduser(f"~/fastchat_logs/server{i}/{d}-conv.json")
28
- name = os.path.expanduser(f"vision-arena-logs/{d}-conv.json")
29
- if os.path.exists(name):
30
- filenames.append(name)
31
- max_num_files = max_num_files or len(filenames)
32
- filenames = filenames[-max_num_files:]
33
- return filenames
34
-
35
-
36
- def pretty_print_conversation(messages):
37
- for role, msg in messages:
38
- print(f"[[{role}]]: {msg}")
39
-
40
- task_template_map = {
41
- "image_caption": "Give me the semantic alignment score between the given image and the given caption: \"{generated_sentence}\" on a scale of 0-100. Only reply the score value.",
42
- "vqa": "Rate the answer correctness regarding the question within the context of the given image on a scale of 0-100. Only reply the score value.",
43
- "pair_rate_old": "[Instruction]\n\"{instruction}\"\n\n\"{generated_sentence}\"\n\n[System]\nGiven the instruction and the image, please compare the correctness of responses A and B. Reply with \"leftvote\" if you find A better, \"rightvote\" if B is better, \"bothbad_vote\" if both responses are wrong, and \"tievote\" if both responses are equally satisfactory. If you are unable to make a decision, please reply with \"NA\".",
44
- "pair_rate_wexplanation": "<image>[Instruction]\n\"{instruction}\"\n\n\"{generated_sentence}\"[System]\nPlease act as an impartial judge and evaluate the quality of the responses provided by two AI assistants to the user question displayed below. You should choose the assistant that follows the user’s instructions and answers the user’s question better. Your evaluation should consider factors such as the helpfulness, relevance, accuracy, depth, creativity, and level of detail of their responses. Begin your evaluation by comparing the two responses and provide a short explanation. Avoid any positional biases and ensure that the order in which the responses were presented does not influence your decision. Do not allow the length of the responses to influence your evaluation. Do not favor certain names of the assistants. Be as objective as possible. After providing your explanation, output your final verdict by strictly following this format: \"[[A]]\" if assistant A is better, \"[[B]]\" if assistant B is better, and \"[[C]]\" for a tie.",
45
- "pair_rate": "<image>[Instruction]\n\"{instruction}\"\n\n\"{generated_sentence}\"\n\n[System]\nPlease act as an impartial judge and evaluate the quality of the responses provided by two AI assistants to the user question displayed below. You should choose the assistant that follows the user’s instructions and answers the user’s question better. Your evaluation should consider factors such as the helpfulness, relevance, accuracy, depth, creativity, and level of detail of their responses. Begin your evaluation by comparing the two responses and provide a short explanation. Avoid any positional biases and ensure that the order in which the responses were presented does not influence your decision. Do not allow the length of the responses to influence your evaluation. Do not favor certain names of the assistants. Be as objective as possible. Reply with \"leftvote\" if you find assistant A better, \"rightvote\" if assistant B is better, \"bothbad_vote\" if both responses are wrong, and \"tievote\" if both assistants provide equally satisfactory answers. If you are unable to make a decision, please reply with \"NA\"."
46
- }
47
-
48
- def inspect_convs(log_files):
49
- json_data = []
50
-
51
- ic(log_files)
52
- total_vote = 0
53
-
54
- for filename in tqdm(log_files, desc="read files"):
55
- for retry in range(5):
56
- try:
57
- lines = open(filename).readlines()
58
- break
59
- except FileNotFoundError:
60
- time.sleep(2)
61
-
62
- for l in lines:
63
- row = json.loads(l)
64
-
65
- if "states" not in row:
66
- continue
67
- if row["type"] not in ["leftvote", "rightvote", "bothbad_vote", "tievote"]:
68
- continue
69
-
70
- model_names = row["states"][0]["model_name"], row["states"][1]["model_name"]
71
-
72
-
73
- # Iterate through each state and write the relevant information
74
- if not len(row["states"][0]['messages']): continue
75
- # ic(row["states"][0]['messages'][1][1])
76
-
77
- if row["states"][0]['messages'][1][1] is None or row["states"][1]['messages'][1][1] is None or "NETWORK ERROR" in row["states"][0]['messages'][1][1] or "NETWORK ERROR" in row["states"][1]['messages'][1][1]: continue
78
- total_vote += 1
79
-
80
- conv_id = row["states"][0]['conv_id']
81
- image_path = os.path.join("/local/home/yujielu/project/Arena-Elo/vision-arena-logs", os.path.basename(filename)[:-5]+"input_images", f"input_image_{conv_id}.png")
82
- if not os.path.exists(image_path) :
83
- continue
84
- try:
85
- image = Image.open(image_path).convert("RGB")
86
- except:
87
- continue
88
-
89
- left_response = row["states"][0]['messages'][1][1]
90
- right_response = row["states"][1]['messages'][1][1]
91
- instruction = row["states"][0]['messages'][0][1]
92
- generated_sentence = f"[The Start of Assistant A’s Answer]\n{left_response}\n[The End of Assistant A’s Answer]\n\n[The Start of Assistant B’s Answer]\n{right_response}\n[The End of Assistant B’s Answer]"
93
- text_prompt = task_template_map["pair_rate"].format(instruction=instruction, generated_sentence=generated_sentence)
94
-
95
- user_input = text_prompt
96
- # Create the conversation structure
97
- conversation = [
98
- {
99
- "from": "human",
100
- "value": user_input
101
- },
102
- {
103
- "from": "gpt",
104
- "value": row["type"]
105
- }
106
- ]
107
-
108
- # Create the JSON object for each row
109
- json_obj = {
110
- "id": conv_id,
111
- "image": image_path,
112
- "conversations": conversation
113
- }
114
-
115
- # Append the JSON object to the list
116
- json_data.append(json_obj)
117
-
118
- # Write the JSON data to a file
119
- with open('output_evaluator_data.json', 'w') as json_file:
120
- json.dump(json_data, json_file, indent=2)
121
-
122
- if __name__ == "__main__":
123
- parser = argparse.ArgumentParser()
124
- parser.add_argument("--max-num-files", type=int)
125
- args = parser.parse_args()
126
-
127
- log_files = get_log_files(args.max_num_files)
128
-
129
-
130
-
131
- inspect_convs(log_files)
132
-
133
-
134
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
arena_elo/evaluator/rating_analysis.ipynb DELETED
@@ -1,321 +0,0 @@
1
- {
2
- "cells": [
3
- {
4
- "cell_type": "code",
5
- "execution_count": 43,
6
- "metadata": {},
7
- "outputs": [
8
- {
9
- "name": "stdout",
10
- "output_type": "stream",
11
- "text": [
12
- "1338\n",
13
- "1044\n"
14
- ]
15
- }
16
- ],
17
- "source": [
18
- "\n",
19
- "import pandas as pd\n",
20
- "import json\n",
21
- "\n",
22
- "# Replace 'your_file_name.csv' with the path to your CSV file\n",
23
- "file_name = 'all_pairvote_log_wgpt.csv'\n",
24
- "\n",
25
- "# Load the CSV file into a DataFrame\n",
26
- "df = pd.read_csv(file_name)\n",
27
- "\n",
28
- "# Define a function to parse JSON data\n",
29
- "def parse_json(data):\n",
30
- " try:\n",
31
- " # Parse the JSON data\n",
32
- " return json.loads(data)\n",
33
- " except ValueError as e:\n",
34
- " # Return None or an empty dictionary if the data cannot be parsed\n",
35
- " return None\n",
36
- "\n",
37
- "# Apply the parse_json function to the 'models' and 'states' columns\n",
38
- "df['models'] = df['models'].apply(parse_json)\n",
39
- "df['states'] = df['states'].apply(parse_json)\n",
40
- "# row[\"states\"][0]['messages'][0][1]\n",
41
- "\n",
42
- "# Now df contains the parsed JSON data in the 'models' and 'states' columns\n",
43
- "# print(df.head())\n",
44
- "print(len(df))\n",
45
- "# filter_vote_df = df[df[\"gpt_vote\"].isin([\"leftvote\", \"rightvote\"])]#, \"tievote\", \"bothbad_vote\"\n",
46
- "# \\#1\n",
47
- "filter_vote_df = df[df[\"gpt_vote\"].isin([\"leftvote\", \"rightvote\", \"tievote\", \"bothbad_vote\"])]\n",
48
- "# \\#2\n",
49
- "# filter_vote_df = df\n",
50
- "filter_vote_df.loc[~filter_vote_df[\"gpt_vote\"].isin([\"leftvote\", \"rightvote\"]), \"gpt_vote\"] = \"tie\"\n",
51
- "filter_vote_df.loc[~filter_vote_df[\"type\"].isin([\"leftvote\", \"rightvote\"]), \"type\"] = \"tie\"\n",
52
- "# \\#3\n",
53
- "#[df[\"gpt_vote\"].isin([\"leftvote\", \"rightvote\"]) & df[\"type\"].isin([\"leftvote\", \"rightvote\"])]\n",
54
- "filtered_df = filter_vote_df[filter_vote_df[\"states\"].apply(lambda x: len(x[0]['messages'][0][1]) > 10)]\n",
55
- "print(len(filtered_df))\n"
56
- ]
57
- },
58
- {
59
- "cell_type": "code",
60
- "execution_count": 44,
61
- "metadata": {},
62
- "outputs": [
63
- {
64
- "name": "stdout",
65
- "output_type": "stream",
66
- "text": [
67
- "Confusion Matrix:\n",
68
- "[[300 61 34]\n",
69
- " [102 269 27]\n",
70
- " [ 99 111 41]]\n",
71
- "\n",
72
- "Accuracy: 0.5842911877394636\n"
73
- ]
74
- }
75
- ],
76
- "source": [
77
- "import warnings\n",
78
- "warnings.filterwarnings('ignore')\n",
79
- "\n",
80
- "from sklearn.metrics import confusion_matrix, accuracy_score\n",
81
- "import pandas as pd\n",
82
- "\n",
83
- "# Assuming df is your DataFrame\n",
84
- "\n",
85
- "# True labels\n",
86
- "y_true = filtered_df[\"type\"]\n",
87
- "\n",
88
- "# Predictions\n",
89
- "y_pred = filtered_df[\"gpt_vote\"]\n",
90
- "\n",
91
- "# Compute the confusion matrix\n",
92
- "# conf_matrix = confusion_matrix(y_true, y_pred, labels=[\"leftvote\", \"rightvote\", \"tievote\", \"bothbad_vote\"])\n",
93
- "conf_matrix = confusion_matrix(y_true, y_pred, labels=[\"leftvote\", \"rightvote\", \"tie\"])\n",
94
- "\n",
95
- "# Compute the accuracy\n",
96
- "accuracy = accuracy_score(y_true, y_pred)\n",
97
- "\n",
98
- "print(\"Confusion Matrix:\")\n",
99
- "print(conf_matrix)\n",
100
- "\n",
101
- "print(\"\\nAccuracy:\", accuracy)\n"
102
- ]
103
- },
104
- {
105
- "cell_type": "code",
106
- "execution_count": 45,
107
- "metadata": {},
108
- "outputs": [
109
- {
110
- "data": {
111
- "image/png": "iVBORw0KGgoAAAANSUhEUgAAA0YAAAJwCAYAAACtcHEcAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy81sbWrAAAACXBIWXMAAA9hAAAPYQGoP6dpAAB0yUlEQVR4nO3deZyN5f/H8feZfcaYMZaxMwyGkTWJURSKKFshS/YtREQoe4oKaVFpsaRBZUlRUrLLUtYsk2VkaYSxjGEYZq7fH37O12ksc8aMM+N+PR+P83jMXPd9X+dzxrnHec913ddtM8YYAQAAAICFubm6AAAAAABwNYIRAAAAAMsjGAEAAACwPIIRAAAAAMsjGAEAAACwPIIRAAAAAMsjGAEAAACwPIIRAAAAAMsjGAEAAACwPIIRAAAWsWTJElWsWFE+Pj6y2Ww6c+ZMuvY/ffp02Ww2HTx4MF37zcpsNptGjhzp6jIApALBCECafPjhh7LZbHrwwQddXcodu/Zh7vfff7/h9kceeUT33XffXa7q3hQdHa3evXurVKlS8vPzk5+fn8LDw9WrVy9t377dYd+RI0fKZrPZH9f2HTp0qOLi4iTJYfutHitWrLhtbZcvX1Z4eLhsNpvGjx9vb+/Tp49sNpv27dt302NfffVV2Wy2FK/hRlasWKFmzZopX7588vLyUnBwsJ566inNnz//tsfeidjYWLVo0UK+vr6aPHmyZs6cqWzZsmXoc95NISEhstlsqlu37g23f/rpp/b3w83O9VtZt26dRo4cme5hEkDm4eHqAgBkTZGRkQoJCdHGjRu1b98+lShRwtUlIZNbtGiRWrZsKQ8PD7Vp00YVKlSQm5ub9uzZo/nz5+ujjz5SdHS0ihYt6nDcRx99JH9/f8XHx2vp0qV6/fXX9euvv2rt2rWaOXOmw75ffPGFfv755xTtZcqUuW1977//vg4dOpSivU2bNnr//fc1a9YsDR8+/IbHzp49W+XKlVP58uVv+RwjRozQ6NGjVbJkSXXv3l1FixZVbGysfvjhBz399NOKjIxU69atb1trWmzatEnnzp3Ta6+9dtPwcKeee+45Pfvss/L29s6Q/m/Hx8dHy5cv17Fjx5QvXz6HbZGRkfLx8dHFixfT1Pe6des0atQodejQQTly5Ej1cQkJCfLw4OMWkCUYAHDSgQMHjCQzf/58kydPHjNy5MhUHXf58mVz6dKlDK7OedOmTTOSzKZNm264vVatWqZs2bJ3uap7y759+0y2bNlMmTJlzD///JNi++XLl827775rDh06ZG8bMWKEkWROnDjhsG+zZs2MJLNu3boU/fTq1cuk5b+2f//91wQGBprRo0cbSebtt9922F6iRAlTunTpGx67bt06I8mMGzfuls/xzTffGEnmmWeeMYmJiSm2L1myxHz//fdO155aM2bMuOX7PKsrWrSoqVOnjgkICDCTJk1y2Hb48GHj5uZmnn766TT/DN5++20jyURHR99236SkJJOQkOD0cwBwLabSAXBaZGSkgoKC1LBhQz3zzDOKjIxMsc/BgwftU5ImTZqk0NBQeXt7a9euXZKkPXv26JlnnlHOnDnl4+OjKlWq6LvvvnPo49oUt7Vr16p///7KkyePsmXLpqZNm+rEiRMO+y5cuFANGzZUgQIF5O3trdDQUL322mtKSkpK99d/7bVNnz49xbb/Xk9wbTrYX3/9pbZt2yowMFB58uTRsGHDZIzR4cOH1bhxYwUEBChfvnyaMGGCQ3+JiYkaPny47r//fgUGBipbtmx6+OGHtXz58hvWNH78eH3yySf2n/cDDzygTZs23fL1/P7777LZbJoxY0aKbT/99JNsNpsWLVokSTp37pxefPFFhYSEyNvbW8HBwXrssce0efPmWz7HW2+9pfPnz2vatGnKnz9/iu0eHh7q06ePChcufMt+JKl27dqSrk7LSy+DBw9WWFiY2rZte8Ptbdq00Z49e274OmfNmiWbzaZWrVrd8jmGDRumnDlzaurUqfL09EyxvV69enryySft3x8/flydO3dW3rx55ePjowoVKqT4N0rtv/sjjzyi9u3bS5IeeOAB2Ww2dejQQdLVKWjXvr7eI488okceecSh7f3331fZsmXl5+enoKAgValSRbNmzbJvv9k1Rh9++KHKli0rb29vFShQQL169UoxJe3alNVdu3bp0UcflZ+fnwoWLKi33nrrZj/SFHx8fNSsWTOHmqSrI3pBQUGqV69eimO2b9+uDh06qHjx4vLx8VG+fPnUqVMnxcbG2vcZOXKkBg4cKEkqVqyYfUretddps9nUu3dvRUZG2l/nkiVL7Nuu/U5ISEhQ6dKlVbp0aSUkJNj7P3XqlPLnz6+IiIgM+Z0FIHUY2wXgtMjISDVr1kxeXl5q1aqVPvroI23atEkPPPBAin2nTZumixcvqlu3bvL29lbOnDm1c+dO1ahRQwULFtTgwYOVLVs2ff3112rSpInmzZunpk2bOvTxwgsvKCgoSCNGjNDBgwc1adIk9e7dW1999ZV9n+nTp8vf31/9+/eXv7+/fv31Vw0fPlxxcXF6++23U/W6zp49q5MnT6Zov3z5spM/oZRatmypMmXKaNy4cVq8eLHGjBmjnDlzasqUKapdu7befPNNRUZGasCAAXrggQdUs2ZNSVJcXJw+++wztWrVSl27dtW5c+f0+eefq169etq4caMqVqzo8DyzZs3SuXPn1L17d9lsNr311ltq1qyZDhw4cMMP45JUpUoVFS9eXF9//bX9w/M1X331lcMHyh49emju3Lnq3bu3wsPDFRsbqzVr1mj37t2qXLnyTV//okWLVKJEiXS5Jm3//v2SpFy5ct1xX5K0ceNGzZgxQ2vWrJHNZrvhPm3atNGoUaM0a9Ysh9eZlJSkr7/+Wg8//LCKFCly0+fYu3ev9uzZo06dOil79uy3rSkhIUGPPPKI9u3bp969e6tYsWL65ptv1KFDB505c0Z9+/Z12P92/+6vvvqqwsLC9Mknn2j06NEqVqyYQkNDU/kTuurTTz9Vnz599Mwzz6hv3766ePGitm/frg0bNtxy+t/IkSM1atQo1a1bV88//7yioqLsvzPWrl3r8L48ffq06tevr2bNmqlFixaaO3euBg0apHLlyumJJ55IVZ2tW7fW448/rv3799tf46xZs/TMM8/c8Bz4+eefdeDAAXXs2FH58uXTzp079cknn2jnzp1av369bDabmjVrpr/++kuzZ8/WO++8o9y5c0uS8uTJY+/n119/1ddff63evXsrd+7cCgkJSfFcvr6+mjFjhmrUqKFXX31VEydOlCT16tVLZ8+e1fTp0+Xu7p6q1wkgA7h6yApA1vL7778bSebnn382xhiTnJxsChUqZPr27euwX3R0tJFkAgICzPHjxx221alTx5QrV85cvHjR3pacnGwiIiJMyZIl7W3XprjVrVvXJCcn29v79etn3N3dzZkzZ+xtFy5cSFFr9+7djZ+fn8Pz3Mi157nV4/qpdNde27Rp01L0JcmMGDHC/v216WDdunWzt125csUUKlTI2Gw2h+lXp0+fNr6+vqZ9+/YO+/53+uHp06dN3rx5TadOnVLUlCtXLnPq1Cl7+8KFC42k207RGjJkiPH09HQ49tKlSyZHjhwOzxMYGGh69ep1y77+6+zZs0aSadKkSYptp0+fNidOnLA/rv93vPazi4qKMidOnDDR0dFmypQpxtvb2+TNm9ecP38+RX/OTqVLTk42VatWNa1atTLG/O/n+N+pdMYY88ADD5hChQqZpKQke9uSJUuMJDNlypRbPs+1f4d33nknVXVNmjTJSDJffvmlvS0xMdFUr17d+Pv7m7i4OId6U/PvfrMpo0WLFnV4z11Tq1YtU6tWLfv3jRs3vu2U0mvPcW262fHjx42Xl5d5/PHHHX5uH3zwgZFkpk6d6vB8kswXX3xhb7t06ZLJly+fefrpp2/5vNdeR8OGDc2VK1dMvnz5zGuvvWaMMWbXrl1Gklm5cuUNfwY3+t0xe/ZsI8msWrXK3narqXSSjJubm9m5c+cNt13/O8GYq+ebm5ubWbVqlX2K5X+n/wG4+5hKB8ApkZGRyps3rx599FFJV6eJtGzZUnPmzLnhFJCnn37a4a+qp06d0q+//qoWLVro3LlzOnnypE6ePKnY2FjVq1dPe/fu1dGjRx366Natm8Nf8h9++GElJSXp77//trf5+vrav77W78MPP6wLFy5oz549qXptkydP1s8//5zicbsL6lOjS5cu9q/d3d1VpUoVGWPUuXNne3uOHDkUFhamAwcOOOzr5eUlSUpOTtapU6d05coVValS5YbTulq2bKmgoCD79w8//LAkOfR5Iy1bttTly5cdVkZbunSpzpw5o5YtWzrUuGHDBv3zzz+pfen2FeT8/f1TbHvkkUeUJ08e+2Py5Mkp9gkLC1OePHlUrFgxde/eXSVKlNDixYvl5+eX6hpuZvr06dqxY4fefPPN2+7btm1bHTlyRKtWrbK3zZo1S15eXmrevPktj732M0jNaJEk/fDDD8qXL5/D9DxPT0/16dNH8fHxWrlypcP+af13d0aOHDl05MiR207NvN4vv/yixMREvfjii3Jz+99Hjq5duyogIECLFy922N/f399hOqOXl5eqVq3q1Otwd3dXixYtNHv2bElXf2cVLlzY/jP5r+t/d1y8eFEnT55UtWrVJOm2U0SvV6tWLYWHh6dq35EjR6ps2bJq3769evbsqVq1aqlPnz6pfi4AGYNgBCDVkpKSNGfOHD366KOKjo7Wvn37tG/fPj344IP6999/tWzZshTHFCtWzOH7ffv2yRijYcOGOXwgzpMnj0aMGCHp6rUV1/vvFKVrHwBPnz5tb9u5c6eaNm2qwMBABQQEKE+ePPYPWGfPnk3V66tatarq1q2b4nH9B860+u9rCAwMlI+Pj31KzvXt178uSZoxY4bKly8vHx8f5cqVS3ny5NHixYtv+LpS87O6kQoVKqh06dIO0xO/+uor5c6d235Nj3T1WqE///xThQsXVtWqVTVy5Mjbfmi9Fgbi4+NTbJsyZYp+/vlnffnllzc9ft68efr555+1YsUK7du3T3/++afuv//+Wz7n9eLj43Xs2DH749r1aXFxcRoyZIgGDhyYqmubnn32Wbm7u9uvX7l48aIWLFigJ5544rbvkYCAAElXQ3tq/P333ypZsqRDmJD+t7re9X8UkNL+7+6MQYMGyd/fX1WrVlXJkiXVq1cvrV279pbHXKszLCzMod3Ly0vFixdP8ToKFSqUYjpjUFCQ06+jdevW2rVrl7Zt26ZZs2bp2Wefvek0yVOnTqlv377KmzevfH197SFcSv3vDinl77pb8fLy0tSpUxUdHa1z585p2rRpN60PwN3DNUYAUu3XX39VTEyM5syZozlz5qTYHhkZqccff9yh7fq/xkpXRz0kacCAATe8EFpSiqW/bzbn3hgjSTpz5oxq1aqlgIAAjR49WqGhofLx8dHmzZs1aNAg+3Oml5t9gLnVRdM3eg23e12S9OWXX6pDhw5q0qSJBg4cqODgYLm7u2vs2LH2a22c7fNmWrZsqddff10nT55U9uzZ9d1336lVq1YOSw23aNFCDz/8sBYsWKClS5fq7bff1ptvvqn58+ff9BqQwMBA5c+fX3/++WeKbdeuObrVDUFr1qyZIkA6Y/z48Ro1apT9+6JFi+rgwYMaP368EhMT1bJlS/vzHzlyRNLVQHHw4EEVKFDAPmJ3baGJefPmafLkyfr+++917tw5tWnT5rY1lC5dWpK0Y8eONL+OW7mTf/dbvZ+v77dMmTKKiorSokWLtGTJEs2bN08ffvihhg8f7vDzvRN38jqu9+CDDyo0NFQvvviioqOjb3kNVIsWLbRu3ToNHDhQFStWlL+/v5KTk1W/fn2nfnf893fd7fz000+SrgbsvXv3OhWsAGQMghGAVIuMjFRwcPANpzvNnz9fCxYs0Mcff3zLDwjFixeXdHVaUHrdS2XFihWKjY3V/Pnz7YsWSOm7atn1rv01/r+rav33r9/pYe7cuSpevLjmz5/v8AH22uhaemrZsqVGjRqlefPmKW/evIqLi9Ozzz6bYr/8+fOrZ8+e6tmzp44fP67KlSvr9ddfv+XF8Q0bNtRnn32mjRs3qmrVqule+620a9dODz30kP37a+/PQ4cO6fTp0ypbtmyKY9544w298cYb2rJli8MCF23atNGSJUv0448/atasWQoICNBTTz112xpKlSqlsLAwLVy4UO++++4NpxVer2jRotq+fbuSk5MdRo2uTQv9772e7kRQUNANb1r6999/28/Xa7Jly6aWLVuqZcuWSkxMVLNmzfT6669ryJAh8vHxueHrkKSoqCiHvhITExUdHZ1h91OSpFatWmnMmDEqU6ZMikVKrjl9+rSWLVumUaNGOdyjau/evSn2Tc8Rne3bt2v06NHq2LGjtm7dqi5dumjHjh0KDAxMt+cA4Dym0gFIlYSEBM2fP19PPvmknnnmmRSP3r1769y5cymW3P6v4OBgPfLII5oyZYpiYmJSbP/vMtypce2vzNf/VTkxMVEffvih032lRkBAgHLnzu1wrYmkDHm+G722DRs26Lfffkv35ypTpozKlSunr776Sl999ZXy58/vEDSTkpJSTC0KDg5WgQIFdOnSpVv2/fLLL8vPz0+dOnXSv//+m2K7syMCzihevLjD1MgaNWpIkvr06aMFCxY4PKZMmSJJ6tChgxYsWJDir/hNmjSRn5+fPvzwQ/34449q1qzZDQPBjYwaNUqxsbHq0qWLrly5kmL70qVL7cuiN2jQQMeOHXOY2njlyhW9//778vf3V61atdL0s7iR0NBQrV+/XomJifa2RYsW6fDhww77Xb98tXR1Olh4eLiMMTddubFu3bry8vLSe++95/Bv/Pnnn+vs2bNq2LBhur2O/+rSpYtGjBiRYgn8693o/JKkSZMmpdg3W7ZsklL+QcRZly9fVocOHVSgQAG9++67mj59uv7991/169fvjvoFcOcYMQKQKt99953OnTunRo0a3XB7tWrVlCdPHkVGRjpcrH8jkydP1kMPPaRy5cqpa9euKl68uP7991/99ttvOnLkiLZt2+ZUbREREQoKClL79u3Vp08f2Ww2zZw5M0M/bHfp0kXjxo1Tly5dVKVKFa1atUp//fVXuj/Pk08+qfnz56tp06Zq2LChoqOj9fHHHys8PPyG1+zcqZYtW2r48OHy8fFR586dHUYrzp07p0KFCumZZ55RhQoV5O/vr19++UWbNm265YdPSSpZsqRmzZqlVq1aKSwsTG3atFGFChVkjFF0dLRmzZolNzc3FSpUKN1f081Urlw5xRLj16bUlS1bVk2aNElxjL+/v5o0aWK/zig10+iuadmypXbs2KHXX39dW7ZsUatWrVS0aFHFxsZqyZIlWrZsmb3fbt26acqUKerQoYP++OMPhYSEaO7cuVq7dq0mTZqU6kUcUqNLly6aO3eu6tevrxYtWmj//v368ssvUyzn/fjjjytfvnyqUaOG8ubNq927d+uDDz5Qw4YNb1pPnjx5NGTIEI0aNUr169dXo0aNFBUVpQ8//FAPPPDATe8blR6KFi3qcE+xGwkICFDNmjX11ltv6fLlyypYsKCWLl16w9Hma9e1vfrqq3r22Wfl6empp556yh6YUmvMmDHaunWrli1bpuzZs6t8+fIaPny4hg4dqmeeeUYNGjRwqj8A6YdgBCBVIiMj5ePjo8cee+yG293c3NSwYUNFRkam+Mvyf4WHh+v333/XqFGjNH36dMXGxio4OFiVKlVymM6SWrly5dKiRYv00ksvaejQoQoKClLbtm1Vp06dm17HdKeGDx+uEydOaO7cufr666/1xBNP6Mcff1RwcHC6Pk+HDh107NgxTZkyRT/99JPCw8P15Zdf6ptvvtGKFSvS9bmkqx/ehw4dqgsXLqQIuH5+furZs6eWLl2q+fPnKzk5WSVKlNCHH36o559//rZ9N27cWDt27NCECRO0dOlSTZ06VTabTUWLFlXDhg3Vo0cPVahQId1fU3pr06aNZs2apfz58zssTJEaY8aMUe3atfXee+/po48+0qlTpxQUFKRq1app4cKF9j88+Pr6asWKFRo8eLBmzJihuLg4hYWFadq0aTe8GeudqFevniZMmKCJEyfqxRdfVJUqVezn0/W6d++uyMhITZw4UfHx8SpUqJD69OmjoUOH3rL/kSNHKk+ePPrggw/Ur18/5cyZU926ddMbb7xx03tr3U2zZs3SCy+8oMmTJ8sYo8cff1w//vijChQo4LDfAw88oNdee00ff/yxlixZouTkZEVHRzsVjDZv3qw33nhDvXv3tq/sKV29wfDChQvVtWtX7dy5Uzly5EivlwfACTaTkX9SBQAAAIAsgGuMAAAAAFgewQgAAACA5RGMAAAAAFgewQgAAACA5RGMAAAAAFgewQgAAACA5RGMAAAAAFjePXmDV99KvV1dAuByvy0c6+oSAJcrXSC7q0sAALiYTyoTDyNGAAAAACyPYAQAAADA8ghGAAAAACyPYAQAAADA8ghGAAAAACyPYAQAAADA8ghGAAAAACyPYAQAAADA8ghGAAAAACyPYAQAAADA8ghGAAAAACyPYAQAAADA8ghGAAAAACyPYAQAAADA8ghGAAAAACyPYAQAAADA8ghGAAAAACyPYAQAAADA8ghGAAAAACyPYAQAAADA8ghGAAAAACyPYAQAAADA8ghGAAAAACyPYAQAAADA8ghGAAAAACyPYAQAAADA8ghGAAAAACyPYAQAAADA8ghGAAAAACyPYAQAAADA8ghGAAAAACyPYAQAAADA8ghGAAAAACyPYAQAAADA8ghGAAAAACyPYAQAAADA8ghGAAAAACyPYAQAAADA8ghGAAAAACyPYAQAAADA8ghGAAAAACyPYAQAAADA8ghGAAAAACwv0wSj/fv3a+jQoWrVqpWOHz8uSfrxxx+1c+dOF1cGAAAA4F6XKYLRypUrVa5cOW3YsEHz589XfHy8JGnbtm0aMWKEi6sDAAAAcK/LFMFo8ODBGjNmjH7++Wd5eXnZ22vXrq3169e7sDIAAAAAVpApgtGOHTvUtGnTFO3BwcE6efKkCyoCAAAAYCWZIhjlyJFDMTExKdq3bNmiggULuqAiAAAAAFaSKYLRs88+q0GDBunYsWOy2WxKTk7W2rVrNWDAALVr187V5QEAAAC4x2WKYPTGG2+odOnSKly4sOLj4xUeHq6aNWsqIiJCQ4cOdXV5AAAAAO5xNmOMcXUR1xw+fFg7duxQfHy8KlWqpJIlS6apH99KvdO5MiDr+W3hWFeXALhc6QLZXV0CAMDFfDxSt1+mGDEaPXq0Lly4oMKFC6tBgwZq0aKFSpYsqYSEBI0ePdrV5QEAAAC4x2WKESN3d3fFxMQoODjYoT02NlbBwcFKSkpyqj9GjABGjACJESMAQBYbMTLGyGazpWjftm2bcubM6YKKAAAAAFhJKvNTxggKCpLNZpPNZlOpUqUcwlFSUpLi4+PVo0cPF1YIAAAAwApcGowmTZokY4w6deqkUaNGKTAw0L7Ny8tLISEhql69ugsrBAAAAGAFLg1G7du3lyQVK1ZMERER8vT0dGU5AAAAACzKpcHomlq1aikpKUnz5s3T7t27JUlly5ZVo0aN5O7u7uLqAAAAANzrMkUw2rdvnxo0aKCjR48qLCxMkjR27FgVLlxYixcvVmhoqIsrBAAAAHAvyxSr0vXp00ehoaE6fPiwNm/erM2bN+vQoUMqVqyY+vTp4+ryAAAAANzjMsWI0cqVK7V+/XqHpblz5cqlcePGqUaNGi6sDAAAAIAVZIpg5O3trXPnzqVoj4+Pl5eXlwsqwn91bf6Quj7zsIoWuBpedx84pjc++VFL1+6SJHl7eWhc/2ZqXu9+eXt56JffdqvvG1/p+Kn//bsWzhekd19pqVpVSik+4ZIiv9+gYe9/p6SkZJe8JiA9nDp5XJGfva+tG9fp0qWLylegkJ4fMEKhYeGSpA2rf9Uvi+bpwN49ij93Vm9+FKmQEmEurhrIeHNmRWrGtM918uQJlQorrcGvDFO58uVdXRZwV3EeZC2ZYirdk08+qW7dumnDhg0yxsgYo/Xr16tHjx5q1KiRq8uDpKP/ntGw9xcqos1bqtHmba3Y+Je+eaebyhTPJ0l6a8DTaljzPrV5+XM93mWS8ucJ1JwJXezHu7nZNP+95+Xl6aFHO0xQ1+Ez1bbRgxr+fENXvSTgjsWfi9PwFzvL3d1DQ954VxM/+1rPde+nbNkD7PtcupigsPsqqnWXF1xYKXB3LfnxB41/a6y69+ylOd8sUFhYaT3fvbNiY2NdXRpw13AeZD2ZIhi99957Cg0NVfXq1eXj4yMfHx/VqFFDJUqU0Lvvvuvq8iDph1V/6qc1u7T/0AntO3RcIyd/r/gLl1S1fDEF+PuoQ5PqGjRxvlZu+ktbdh9WtxFfqnrFUFUtFyJJqlu9jMoUz6dOr87Q9r+OaunaXRr94WJ1b1FTnh6sPIis6buvZihXnrzqOXCESpS+T8H5C6pClWrKV6CQfZ+ajzXUM891VbnKVV1YKXB3zZwxTc2eaaEmTZ9WaIkSGjpilHx8fPTt/HmuLg24azgPsp5MEYxy5MihhQsXKioqSnPnztXcuXMVFRWlBQsWONz0FZmDm5tNzevdr2y+XtqwPVqVyhSRl6eHfl0fZd/nr4P/6lDMKT1Yvpgk6cHyxfTnvn8cptb9vG63ArP7Kjw0/11/DUB6+P23VSpeqowmjh6krs0f06AerbXshwWuLgtwqcuJidq9a6eqVY+wt7m5ualatQht37bFhZUBdw/nQdaUKa4xWrNmjR566CGVLFlSJUuWdOrYS5cu6dKlSw5tJjlJNjdGIdJb2RIFtGLGS/Lx8lB8wiW1fOlT7TlwTBVKFdKlxMs6G5/gsP/x2DjlzXV1SlHeXAE6Hut4HdnxU3FXt+UOkKIEZDnHY47q5+/nqeHTbdS0dUftj9qlaZPHy8PDU7Uef9LV5QEucfrMaSUlJSlXrlwO7bly5VJ09AEXVQXcXZwHWVOmGDGqXbu2ihUrpldeeUW7du1y6tixY8cqMDDQ4XHl3z8yqFJr++vgv3rw2bGq2W68Pv1mjT4d/ZxK//81RoAVJZtkFStZWq0691KxEqVVt2Ez1WnQRD8vYpoEAABZTaYIRv/8849eeuklrVy5Uvfdd58qVqyot99+W0eOHLntsUOGDNHZs2cdHh55778LVVvP5StJOnD4pLbsPqzh73+nHX8dVa9Wj+hYbJy8vTwV6O/rsH9wrgD9G3t1VOjf2DgF58ruuD3n1dGkf0/G3Z0XAKSzoJy5VbBIMYe2gkWK6eTxYy6qCHC9oBxBcnd3T3GBeWxsrHLnzu2iqoC7i/Mga8oUwSh37tzq3bu31q5dq/3796t58+aaMWOGQkJCVLt27Vse6+3trYCAAIcH0+juDjebTd5eHtqy+5ASL1/Row/+bwnikkWDVSR/Tm3YHi1J2rA9WveVKKA8Qf72fepUK62z5xK0+wAfIpE1hZWtoJgjfzu0xRz5W3nyct0crMvTy0tlwstqw/rf7G3JycnasOE3la9QyYWVAXcP50HWlCmuMbpesWLFNHjwYFWoUEHDhg3TypUrXV0SJI1+oZF+WrtTh2NOK3s2H7V8oopqVimpp3p+qLj4i5r+7W9686VmOnX2vM6dv6iJg5pr/bYD2rjjoCTpl992a/eBY/p8THu9+u63ypsrQCN6PakpX69S4uUrrn1xQBo1eLq1hvftpAWzpqp6rce0L2qnlv2wQF1ffNW+T3zcWZ08fkynY09Ikv75/yCVI2cu5cjJXw1xb3qufUcNe2WQypa9T/eVK68vZ85QQkKCmjRt5urSgLuG8yDrsRljjKuLuGbt2rWKjIzU3LlzdfHiRTVu3Fht2rRR/fr1nerHt1LvDKrQuj4a0VqPVg1TvtwBOht/UX/uPaoJ037Rrxv2SPrfDV5b1P//G7yu262+Y7/Sv9ctuFAkf5DefeVZ1by/pM5fvKTI7zdq6HsLucFrBvlt4VhXl2AJf6xfrdmff6BjRw8rT74CevKZNqrToKl9+4qfvtdH40elOO6Z57qqebvud7NUSypdIPvtd0KGmB35pf3GlmGly2jQK0NVvnwFV5cF3FWcB5mDTyqHgjJFMBoyZIjmzJmjf/75R4899pjatGmjxo0by8/PL039EYwAghEgEYwAAKkPRpliKt2qVas0cOBAtWjRggvSAAAAANx1mSIYjR07VhEREfLwcCznypUrWrdunWrWrOmiygAAAABYQaZYle7RRx/VqVOnUrSfPXtWjz76qAsqAgAAAGAlmSIYGWNks9lStMfGxipbtmwuqAgAAACAlbh0Kl2zZleXK7TZbOrQoYO8vb3t25KSkrR9+3ZFRES4qjwAAAAAFuHSYBQYGCjp6ohR9uzZ5evra9/m5eWlatWqqWvXrq4qDwAAAIBFuCwY9e/fXx988IGyZcumgwcP6rPPPpO/v7+rygEAAABgYS67xuj9999XfHy8pKvLdV+4cMFVpQAAAACwOJeNGIWEhOi9997T448/LmOMfvvtNwUFBd1wX5brBgAAAJCRbMYY44on/vbbb9WjRw8dP35cNptNNyvDZrMpKSnJqb59K/VOjxKBLO23hWNdXQLgcqULZHd1CQAAF/NJ5VCQy0aMmjRpoiZNmig+Pl4BAQGKiopScHCwq8oBAAAAYGEuXZVOkvz9/bV8+XIVK1ZMHh4uLwcAAACABWWKG7zWqlVLf//9t4YOHapWrVrp+PHjkqQff/xRO3fudHF1AAAAAO51mSIYrVy5UuXKldOGDRs0f/58+2p127Zt04gRI1xcHQAAAIB7XaYIRoMHD9aYMWP0888/y8vLy95eu3ZtrV+/3oWVAQAAALCCTBGMduzYoaZNm6ZoDw4O1smTJ11QEQAAAAAryRTBKEeOHIqJiUnRvmXLFhUsWNAFFQEAAACwkkwRjJ599lkNGjRIx44dk81mU3JystauXasBAwaoXbt2ri4PAAAAwD0uUwSjN954Q6VLl1bhwoUVHx+v8PBwPfzww4qIiNDQoUNdXR4AAACAe5zNGGNcXcQ1hw8f1o4dOxQfH69KlSqpZMmSaerHt1LvdK4MyHp+WzjW1SUALle6QHZXlwAAcDGfVN4q1WV3VO3fv/8tt1+/Gt3EiRMzuhwAAAAAFuayYLRly5ZU7Wez2TK4EgAAAABW57JgtHz5clc9NQAAAAA4yBSLLwAAAACAKxGMAAAAAFgewQgAAACA5RGMAAAAAFgewQgAAACA5RGMAAAAAFgewQgAAACA5RGMAAAAAFgewQgAAACA5RGMAAAAAFgewQgAAACA5RGMAAAAAFgewQgAAACA5RGMAAAAAFgewQgAAACA5RGMAAAAAFgewQgAAACA5RGMAAAAAFgewQgAAACA5RGMAAAAAFgewQgAAACA5RGMAAAAAFgewQgAAACA5RGMAAAAAFgewQgAAACA5RGMAAAAAFgewQgAAACA5RGMAAAAAFgewQgAAACA5RGMAAAAAFgewQgAAACA5RGMAAAAAFgewQgAAACA5RGMAAAAAFgewQgAAACA5RGMAAAAAFgewQgAAACA5RGMAAAAAFiezRhjXF1Eevt66z+uLgFwufZD57u6BMDlTn/b29UlAABczMcjdfsxYgQAAADA8ghGAAAAACyPYAQAAADA8ghGAAAAACyPYAQAAADA8ghGAAAAACyPYAQAAADA8ghGAAAAACyPYAQAAADA8ghGAAAAACyPYAQAAADA8ghGAAAAACyPYAQAAADA8ghGAAAAACyPYAQAAADA8ghGAAAAACyPYAQAAADA8ghGAAAAACyPYAQAAADA8ghGAAAAACyPYAQAAADA8ghGAAAAACyPYAQAAADA8ghGAAAAACyPYAQAAADA8ghGAAAAACyPYAQAAADA8ghGAAAAACyPYAQAAADA8ghGAAAAACyPYAQAAADA8ghGAAAAACyPYAQAAADA8ghGAAAAACyPYAQAAADA8ghGAAAAACyPYAQAAADA8ghGAAAAACyPYAQAAADA8jJNMFq9erXatm2r6tWr6+jRo5KkmTNnas2aNS6uDAAAAMC9LlMEo3nz5qlevXry9fXVli1bdOnSJUnS2bNn9cYbb7i4OgAAAAD3ukwRjMaMGaOPP/5Yn376qTw9Pe3tNWrU0ObNm11YGQAAAAAryBTBKCoqSjVr1kzRHhgYqDNnztz9ggAAAABYSqYIRvny5dO+fftStK9Zs0bFixd3QUUAAAAArCRTBKOuXbuqb9++2rBhg2w2m/755x9FRkZqwIABev75511dHgAAAIB7nIerC5CkwYMHKzk5WXXq1NGFCxdUs2ZNeXt7a8CAAXrhhRdcXR4AAACAe5zNGGNcXcQ1iYmJ2rdvn+Lj4xUeHi5/f/809fP11n/SuTIg62k/dL6rSwBc7vS3vV1dAgDAxXxSORSUKabSderUSefOnZOXl5fCw8NVtWpV+fv76/z58+rUqZOrywMAAABwj8sUwWjGjBlKSEhI0Z6QkKAvvvjCBRUBAAAAsBKXXmMUFxcnY4yMMTp37px8fHzs25KSkvTDDz8oODjYhRUCAAAAsAKXBqMcOXLIZrPJZrOpVKlSKbbbbDaNGjXKBZUBAAAAsBKXBqPly5fLGKPatWtr3rx5ypkzp32bl5eXihYtqgIFCriwQgAAAABW4NJgVKtWLUlSdHS0ChcuLDe3THHJEwAAAACLyRT3MSpatKjOnDmjzz//XLt375YklS1bVp06dVJgYKCLqwMAAABwr8sUQzS///67QkND9c477+jUqVM6deqUJk6cqNDQUG3evNnV5QEAAAC4x2WKEaN+/fqpUaNG+vTTT+XhcbWkK1euqEuXLnrxxRe1atUqF1cIAAAA4F6WKYLR77//7hCKJMnDw0Mvv/yyqlSp4sLKAAAAAFhBpphKFxAQoEOHDqVoP3z4sLJnz+6CigAAAABYSaYIRi1btlTnzp311Vdf6fDhwzp8+LDmzJmjLl26qFWrVq4uDwAAAMA9LlNMpRs/frxsNpvatWunK1euSJI8PT31/PPPa9y4cS6uDgAAAMC9zmaMMa4u4poLFy5o//79kqTQ0FD5+fmlqZ+vt/6TnmUBWVL7ofNdXQLgcqe/7e3qEgAALuaTyqGgTDFi9OWXX6pZs2by8/NTuXLlXF0ObuLgrm1a8/1X+if6L507HatWA15T+AMP2bcbY/TrN9P0+7LFung+XkXC7lOjLv2UK38hSdLp48e0Yv4XOvDnFsWfOaXsOXOrwkN1VatZW3l4eLrqZQGpNqD5/WpSvbhKFQpSQuIVbdh9TK9OX6e9R8847Pdg6Xwa+Vw1PRCWV0nJRtsPnNBTw7/TxcQkSVLF0Dwa06G67i95dfu36/Zr0GdrdP7iZRe8KiBjzJkVqRnTPtfJkydUKqy0Br8yTOXKl3d1WcBdxXmQtWSKa4z69eun4OBgtW7dWj/88IOSkpJcXRJuIPHSReUrGqonO/W94fbV383R+h/nq1GXfur++ofy8vHRjDde1uXEREnSyX8OySQbNe7aXy9MmKYn2vXUpl++1y+zP7ubLwNIs4fvK6CPF+9QrQFz9eSwhfLwcNOi1xrJz/t/f2N6sHQ+LRz1lJZtOaSH+3+jh/p9rY8X7VBy8tXB+fw5s2nxmMbaH3NWNV/6Ro1HfKfwIjn1ab86rnpZQLpb8uMPGv/WWHXv2UtzvlmgsLDSer57Z8XGxrq6NOCu4TzIejJFMIqJidGcOXNks9nUokUL5c+fX7169dK6detcXRquU6rSg6r7bGeFV304xTZjjH77Ya5qNXtOZR54SPmKhurpXkN07vRJ7d60RpJUsmJVNes5SCUqPKCceQuoTJUaeujJFtq1cfXdfilAmjQe8b2+XLZHuw+d0o7oWHV75xcVCQ5QpRLB9n3e6vKQPvx+u8bP3azdh05p79EzmrdmnxKvJEuSnnggRJevJOvFj1Zq79Ez+mPvcb0weYWa1iih4vkDXfXSgHQ1c8Y0NXumhZo0fVqhJUpo6IhR8vHx0bfz57m6NOCu4TzIejJFMPLw8NCTTz6pyMhIHT9+XO+8844OHjyoRx99VKGhoa4uD6lw+niM4s+cUmi5++1tPn7+KlSijA7v3XnT4y5eOC9ff5ZkR9YUkM1bknQ6/qIkKU+gr6qWzqcTZxK0/O2ndXBmJy0d21QR4fntx3h7uuvylSRdf3VnQuLVRWeu3w/Iqi4nJmr3rp2qVj3C3ubm5qZq1SK0fdsWF1YG3D2cB1lTpghG1/Pz81O9evX0xBNPqGTJkjp48KCrS0IqxJ85JUnyDwxyaM8WGGTf9l+xx45q/ZIFeqDuUxleH5DebDbp7a4Pa93Of7Tr76vv8WL5AiRJr7auqqk/7VLjEd9p6/4T+uH1JgotcHU0aMX2I8ob5Kd+zSrJ08NNObJ5a0yH6pKkfDmzuebFAOno9JnTSkpKUq5cuRzac+XKpZMnT7qoKuDu4jzImjJNMLpw4YIiIyPVoEEDFSxYUJMmTVLTpk21c+fNRxsk6dKlS4qLi3N4XE68dJeqRlrFnTqhL954WfdVq6UqdZ50dTmA0yY9X0tli+ZUu7d+sre52WySpM+X/KmZv+zWtgMn9fJna/TXkdNq/1i4JGn3oVPq+s4y9WlaUafm9dDBLzvp4LE4HTt9XiY50ywSCgCA5WSKVemeffZZLVq0SH5+fmrRooWGDRum6tWrp+rYsWPHatSoUQ5tz3Tvr+Y9XsqIUnET/jlySpLiz55W9qD//XXk/NnTyhdSwmHfuFMnNXV0fxUuVVaNuvHvhKznnR411eCBENUdPF9HY8/b22NOX/169yHHUdKow6dVOI+//fuvVv6lr1b+peAcvjp/8YqMMerTpKKij529Oy8AyEBBOYLk7u6e4gLz2NhY5c6d20VVAXcX50HWlClGjNzd3fX1118rJiZGH3zwQapDkSQNGTJEZ8+edXg06cR9K+62oOD88s+RUwd2bLa3XbxwXkf27VbhkmXtbXGnTmjq6H4qUKyUmvUcJDe3TPEWBFLtnR411ah6cdV/9Vv9/e85h21//3tO/8TGq1QhxymlJQrm0KHjjvtK0vEzCTp/8bKeqVlSFy8nadnWwxlaO3A3eHp5qUx4WW1Y/5u9LTk5WRs2/KbyFSq5sDLg7uE8yJoyxYhRvXr1VKdOHbm7uzu0JyYmas6cOWrXrt1Nj/X29pa3t7dDm6dXfIbUaXWXLibo1LGj9u/PHI9RzMF98vXPrhy586p6g2e0YsFM5cxfUEHB+bXsq6nKHpRbZf7/Xkdxp07o81H9lCN3XtV/rofOx/3vr+PZ/3/ECcjMJj1fSy1rlVLzMYsVf+Gy8ua4ehPqsxcu2e9R9M68LRrapqp2RJ/UtgMn1bZOaYUVClLrsT/a++nxZDmt331M8QmXVadSYb3RMULDZvyms+cTXfK6gPT2XPuOGvbKIJUte5/uK1deX86coYSEBDVp2szVpQF3DedB1mMzxrh8Uru7u7tiYmIUHBzs0B4bG6vg4GCn72v09dZ/0rM8/L/onVs1dXS/FO2VatVTs56D/3eD118W6eKFeBUJK6enOr+o3AUKS5I2r1iiBR+9ecO+X/tqeYbWbkXth853dQn3nIRFNx6N7vrOL/py2R779wOeqazuDcspKLuPdkSf1KvT1mndrhj79s/611X9KiHy9/VU1JHTmjR/i2Yvj8rw+q3o9LfMIHCV2ZFf2m9sGVa6jAa9MlTly1dwdVnAXcV5kDn4pHIoKE3BaPXq1ZoyZYr279+vuXPnqmDBgpo5c6aKFSumhx56yNnu5Obmpn///Vd58uRxaN+2bZseffRRnTp141XNboZgBBCMAIlgBABIfTByeirdvHnz9Nxzz6lNmzbasmWLLl26ugLc2bNn9cYbb+iHH35IdV+VKlWSzWaTzWZTnTp15OHxv3KSkpIUHR2t+vXrO1siAAAAADjF6WA0ZswYffzxx2rXrp3mzJljb69Ro4bGjBnjVF9NmjSRJG3dulX16tWTv///Vm3y8vJSSEiInn76aWdLBAAAAACnOB2MoqKiVLNmzRTtgYGBOnPmjFN9jRgxQpIUEhKili1bysfHx9lyAAAAAOCOOR2M8uXLp3379ikkJMShfc2aNSpevHiaimjfvr2kq6vQHT9+XMnJyQ7bixQpkqZ+AQAAACA1nA5GXbt2Vd++fTV16lTZbDb9888/+u233zRgwAANGzYsTUXs3btXnTp10rp16xzajTGy2WxOr0oHAAAAAM5wOhgNHjxYycnJqlOnji5cuKCaNWvK29tbAwYM0AsvvJCmIjp06CAPDw8tWrRI+fPnl81mS1M/AAAAAJAWab6PUWJiovbt26f4+HiFh4c7LJzgrGzZsumPP/5Q6dKl09zH9ViuG2C5bkBiuW4AQOqX63ZztuNOnTrp3Llz8vLyUnh4uKpWrSp/f3+dP39enTp1crY7SVJ4eLhOnjyZpmMBAAAA4E45HYxmzJihhISEFO0JCQn64osvUt1PXFyc/fHmm2/q5Zdf1ooVKxQbG+uwLS4uztkSAQAAAMApqb7GKC4uTsYYGWN07tw5h6W1k5KS9MMPPyg4ODjVT5wjRw6Ha4mMMapTp47DPiy+AAAAAOBuSHUwuhZkbDabSpUqlWK7zWbTqFGjUv3Ey5cvT/W+AAAAAJCRUh2Mli9fLmOMateurXnz5ilnzpz2bV5eXipatKgKFCiQ6ieuVauWc5UCAAAAQAZJdTC6FmSio6NVuHBhubk5fXnSTW3fvv2G7TabTT4+PipSpIi8vb3T7fkAAAAA4HpO38eoaNGiOnPmjD7//HPt3r1bklS2bFl16tRJgYGBaSqiYsWKt7x3kaenp1q2bKkpU6Y4XNsEAAAAAOnB6WGf33//XaGhoXrnnXd06tQpnTp1ShMnTlRoaKg2b96cpiIWLFigkiVL6pNPPtHWrVu1detWffLJJwoLC9OsWbP0+eef69dff9XQoUPT1D8AAAAA3IrTN3h9+OGHVaJECX366afy8Lg64HTlyhV16dJFBw4c0KpVq5wuomrVqnrttddUr149h/affvpJw4YN08aNG/Xtt9/qpZde0v79+2/bHzd4BbjBKyBxg1cAQOpv8Or0VLrff//dIRRJkoeHh15++WVVqVLF2e4kSTt27FDRokVTtBctWlQ7duyQdHW6XUxMTJr6BwAAAIBbcXoqXUBAgA4dOpSi/fDhw8qePXuaiihdurTGjRunxMREe9vly5c1btw4lS5dWpJ09OhR5c2bN039AwAAAMCtOD1i1LJlS3Xu3Fnjx49XRESEJGnt2rUaOHCgWrVqlaYiJk+erEaNGqlQoUIqX768pKujSElJSVq0aJEk6cCBA+rZs2ea+gcAAACAW3E6GI0fP142m03t2rXTlStXJF1dNe7555/XuHHj0lRERESEoqOjFRkZqb/++kuS1Lx5c7Vu3do+CvXcc8+lqW8AAAAAuJ1UL75QpUoVdenSRa1bt1ZAQIAuXLhgXwghNDRUfn5+GVqoM1h8AWDxBUBi8QUAQAYsvlChQgW9/PLLeumll/T000+rU6dOeuSRR9JYnvTdd9/piSeekKenp7777rtb7tuoUaM0Pw8AAAAA3I5Ty3VfuHBBX3/9taZPn67Vq1erWLFi6tSpk9q3b6+CBQs69cRubm46duyYgoOD5eZ28zUgbDabkpKSnOqbESOAESNAYsQIAJD6ESOnVqXz8/NThw4dtGLFCv3111969tlnNWXKFIWEhKhhw4aaPz/1H8SSk5MVHBysy5cv65FHHtGePXuUnJyc4uFsKAIAAAAAZzm9XPc1oaGhGjNmjA4ePKjZs2dr/fr1at68udP9eHp6aseOHbccNQIAAACAjHRHaWTFihXq0KGDOnTooKSkJHXt2jVN/bRt21afffbZnZQCAAAAAGnm9HLdR44c0fTp0zV9+nQdOHBADz/8sD788EM1b95cvr6+aSriypUrmjp1qn755Rfdf//9ypYtm8P2iRMnpqlfAAAAAEiNVAejr7/+WlOnTtWyZcsUHBys9u3bq1OnTipRosQdF/Hnn3+qcuXKkmS/j9E1NpvtjvsHAAAAgFtJdTBq27atGjZsqAULFqhBgwbpek3Q8uXL060vAAAAAHBWqoPRkSNHFBwcnJG1AAAAAIBLpHrYh1AEAAAA4F7FGtkAAAAALI9gBAAAAMDyCEYAAAAALM/p+xhdk5iYqOPHjys5OdmhvUiRIndcFAAAAADcTU4Ho71796pTp05at26dQ7sxRjabTUlJSelWHAAAAADcDU4How4dOsjDw0OLFi1S/vz5uQErAAAAgCzP6WC0detW/fHHHypdunRG1AMAAAAAd53Tiy+Eh4fr5MmTGVELAAAAALiE08HozTff1Msvv6wVK1YoNjZWcXFxDg8AAAAAyGqcnkpXt25dSVKdOnUc2ll8AQAAAEBW5XQwWr58eUbUAQAAAAAu43QwqlWrVkbUAQAAAAAuk+YbvF64cEGHDh1SYmKiQ3v58uXvuCgAAAAAuJucDkYnTpxQx44d9eOPP95wO9cYAQAAAMhqnF6V7sUXX9SZM2e0YcMG+fr6asmSJZoxY4ZKliyp7777LiNqBAAAAIAM5fSI0a+//qqFCxeqSpUqcnNzU9GiRfXYY48pICBAY8eOVcOGDTOiTgAAAADIME6PGJ0/f17BwcGSpKCgIJ04cUKSVK5cOW3evDl9qwMAAACAu8DpYBQWFqaoqChJUoUKFTRlyhQdPXpUH3/8sfLnz5/uBQIAAABARnN6Kl3fvn0VExMjSRoxYoTq16+vyMhIeXl5afr06eldHwAAAABkOKeDUdu2be1f33///fr777+1Z88eFSlSRLlz507X4gAAAADgbkjzfYyu8fPzU+XKldOjFgAAAABwCaeDkTFGc+fO1fLly3X8+HElJyc7bJ8/f366FQcAAAAAd4PTwejFF1/UlClT9Oijjypv3ryy2WwZURcAAAAA3DVOB6OZM2dq/vz5atCgQUbUAwAAAAB3ndPLdQcGBqp48eIZUQsAAAAAuITTwWjkyJEaNWqUEhISMqIeAAAAALjrnJ5K16JFC82ePVvBwcEKCQmRp6enw/bNmzenW3EAAAAAcDc4HYzat2+vP/74Q23btmXxBQAAAAD3BKeD0eLFi/XTTz/poYceyoh6AAAAAOCuc/oao8KFCysgICAjagEAAAAAl3A6GE2YMEEvv/yyDh48mAHlAAAAAMDdZzPGGGcOCAoK0oULF3TlyhX5+fmlWHzh1KlT6VpgWly84uoKAAAAAGQGPqm8eMjpa4wmTZrk7CEAAAAAkKk5PWKUFTBiBAAAAEDKwBGjQ4cO3XJ7kSJFnO0SAAAAAFzK6REjNze3W967KCkp6Y6LulOMGAEAAACQMnDEaMuWLQ7fX758WVu2bNHEiRP1+uuvO9sdAAAAALhcul1jtHjxYr399ttasWJFenR3RxgxAgAAACClfsTI6fsY3UxYWJg2bdqUXt0BAAAAwF3j9FS6uLg4h++NMYqJidHIkSNVsmTJdCsMAAAAAO4Wp4NRjhw5Uiy+YIxR4cKFNWfOnHQrDAAAAADuFqeD0fLlyx2+d3NzU548eVSiRAl5eDjdHQAAAAC4HDd4BQAAAHDPSvflur/77rtU7deoUaPUdgkAAAAAmUKqR4zc3BwXsLPZbPrvoTabjRu8AgAAAMg00n257uTkZIeHn5+f9u3b59CWGUIRAAAAADgr3e5jBAAAAABZFcEIAAAAgOURjAAAAABYXpqDkc1mS3GjVwAAAADIilK9Kl1QUJBDEDpz5owCAgJSrFZ36tSp9K0wDViVDgAAAICUAfcxmjRpUhpLAQAAAIDMLdUjRlkJI0YAAAAApAy4jxEAAAAA3KsIRgAAAAAsj2AEAAAAwPIIRgAAAAAsj2AEAAAAwPJSvVz39Y4cOaLvvvtOhw4dUmJiosO2iRMnpkthAAAAAHC3OB2Mli1bpkaNGql48eLas2eP7rvvPh08eFDGGFWuXDkjagQAAACADOX0VLohQ4ZowIAB2rFjh3x8fDRv3jwdPnxYtWrVUvPmzTOiRgAAAADIUE7f4DV79uzaunWrQkNDFRQUpDVr1qhs2bLatm2bGjdurIMHD2ZQqanHDV4BAAAASBl4g9ds2bLZryvKnz+/9u/fb9928uRJZ7sDAAAAAJdz+hqjatWqac2aNSpTpowaNGigl156STt27ND8+fNVrVq1jKgRAAAAADKU01PpDhw4oPj4eJUvX17nz5/XSy+9pHXr1qlkyZKaOHGiihYtmlG1phpT6QAAAABIqZ9K53QwygoIRgAAAACk1AejNN3HSJISExN1/PhxJScnO7QXKVIkrV0CAAAAgEs4HYz++usvde7cWevWrXNoN8bIZrMpKSkp3YoDAAAAgLvB6WDUsWNHeXh4aNGiRcqfP79sNltG1AUAAAAAd43T1xhly5ZNf/zxh0qXLp1RNd0xrjECAAAAIGXgfYzCw8O5XxEAAACAe4rTwejNN9/Uyy+/rBUrVig2NlZxcXEODwAAAADIapyeSufmdjVL/ffaosy0+AJT6QAAAABIGbhc9/Lly509BAAAAAAyNW7wCgAAAOCelWGLL0jS6tWr1bZtW0VEROjo0aOSpJkzZ2rNmjVp6Q4AAAAAXMrpYDRv3jzVq1dPvr6+2rx5sy5duiRJOnv2rN544410LxAAAAAAMprTwWjMmDH6+OOP9emnn8rT09PeXqNGDW3evDldiwMAAACAu8HpYBQVFaWaNWumaA8MDNSZM2fSVMTMmTNVo0YNFShQQH///bckadKkSVq4cGGa+gMAAAAAZzgdjPLly6d9+/alaF+zZo2KFy/udAEfffSR+vfvrwYNGujMmTP25b5z5MihSZMmOd0fAAAAADjL6WDUtWtX9e3bVxs2bJDNZtM///yjyMhIDRgwQM8//7zTBbz//vv69NNP9eqrr8rd3d3eXqVKFe3YscPp/gAAAADAWU7fx2jw4MFKTk5WnTp1dOHCBdWsWVPe3t4aMGCAXnjhBacLiI6OVqVKlVK0e3t76/z58073BwAAAADOcioYJSUlae3aterVq5cGDhyoffv2KT4+XuHh4fL3909TAcWKFdPWrVtVtGhRh/YlS5aoTJkyaeoTAAAAAJzhVDByd3fX448/rt27dytHjhwKDw+/4wL69++vXr166eLFizLGaOPGjZo9e7bGjh2rzz777I77BwAAAIDbcXoq3X333acDBw6oWLFi6VJAly5d5Ovrq6FDh+rChQtq3bq1ChQooHfffVfPPvtsujwHAAAAANyKzRhjnDlgyZIlGjJkiF577TXdf//9ypYtm8P2gICANBdz4cIFxcfHKzg4OM19SNLFK3d0OAAAAIB7hE8qh4KcDkZubv9byM5ms9m/NsbIZrPZl9tOrdq1a2v+/PnKkSOHQ3tcXJyaNGmiX3/91an+JIIRAAAAgKtSG4ycnkq3fPlyZw+5pRUrVigxMTFF+8WLF7V69ep0fS4AAAAAuBGng1GtWrVuuu3PP/9MdT/bt2+3f71r1y4dO3bM/n1SUpKWLFmiggULOlseAAAAADjN6al0/3Xu3DnNnj1bn332mf74449UT6Vzc3OzT8W7UQm+vr56//331alTJ6drYiodAAAAACkDp9Jds2rVKn3++eeaN2+eChQooGbNmmny5MmpPj46OlrGGBUvXlwbN25Unjx57Nu8vLwUHBwsd3f3tJYHAAAAAKnmVDA6duyYpk+frs8//1xxcXFq0aKFLl26pG+//dbpexpdu6FrcnKyU8cBAAAAQHpzu/0uVz311FMKCwvT9u3bNWnSJP3zzz96//3306WI/fv364UXXlDdunVVt25d9enTR/v370+XvgEAAADgdlIdjH788Ud17txZo0aNUsOGDdNtmttPP/2k8PBwbdy4UeXLl1f58uW1YcMGlS1bVj///HO6PAfurjmzIvXEY7X1QKVyavNsc+24bqENwCo4D2B1nAMA50FWk+pgtGbNGp07d07333+/HnzwQX3wwQc6efLkHRcwePBg9evXTxs2bNDEiRM1ceJEbdiwQS+++KIGDRp0x/3j7lry4w8a/9ZYde/ZS3O+WaCwsNJ6vntnxcbGuro04K7hPIDVcQ4AnAdZkdOr0p0/f15fffWVpk6dqo0bNyopKUkTJ05Up06dlD17dqcL8PHx0Y4dO1SyZEmH9r/++kvly5fXxYsXne6TVelcp82zzVX2vnJ6ZehwSVevIXu8Ti21av2cOnft5uLqgLuD8wBWxzkAcB5kJqldlS7VI0bXZMuWTZ06ddKaNWu0Y8cOvfTSSxo3bpyCg4PVqFEjZ7tTnjx5tHXr1hTtW7duVXBwsNP9wXUuJyZq966dqlY9wt7m5uamatUitH3bFhdWBtw9nAewOs4BgPMgq3I6GF0vLCxMb731lo4cOaLZs2enqY+uXbuqW7duevPNN7V69WqtXr1a48aNU/fu3dW1a9fbHn/p0iXFxcU5PC5dupSmWnBnTp85raSkJOXKlcuhPVeuXOky7RLICjgPYHWcAwDnQVaV5vsYXc/d3V1NmjRRkyZNnD522LBhyp49uyZMmKAhQ4ZIkgoUKKCRI0eqT58+tz1+7NixGjVqlEPbq8NGaOjwkU7XAgAAAMCa0iUY3QmbzaZ+/fqpX79+OnfunCQ5da3SkCFD1L9/f4c24+6drjUidYJyBMnd3T3FRYWxsbHKnTu3i6oC7i7OA1gd5wDAeZBV3dFUuvQwZswYRUdHS7oaiJxdwMHb21sBAQEOD29vgpEreHp5qUx4WW1Y/5u9LTk5WRs2/KbyFSq5sDLg7uE8gNVxDgCcB1mVy4PRN998oxIlSigiIkIffvgh8y6zuOfad9T8uV/ru28X6MD+/RozeqQSEhLUpGkzV5cG3DWcB7A6zgGA8yArcnq57oywc+dORUZGas6cOTpy5Igee+wxtWnTRk2aNJGfn5/T/bFct2vNjvxSM6Z9rpMnTyisdBkNemWoypev4OqygLuK8wBWxzkAcB5kFqldrjtTBKPrrV27VrNmzdI333yjixcvKi4uzuk+CEYAAAAApAy8j1FGy5Ytm3x9feXl5aXLly+7uhwAAAAAFpApglF0dLRef/11lS1bVlWqVNGWLVs0atQoHTt2zNWlAQAAALAAly/XXa1aNW3atEnly5dXx44d1apVKxUsWNDVZQEAAACwEJcHozp16mjq1KkKDw93dSkAAAAALCrTLL6QmJio6OhohYaGysPjzvIaiy8AAAAAkLLQ4gsJCQnq3Lmz/Pz8VLZsWR06dEiS9MILL2jcuHEurg4AAACAFbg8GA0ePFjbtm3TihUr5OPjY2+vW7euvvrqKxdWBgAAAMAqXH6N0bfffquvvvpK1apVk81ms7eXLVtW+/fvd2FlAAAAAKzC5SNGJ06cUHBwcIr28+fPOwQlAAAAAMgoLg9GVapU0eLFi+3fXwtDn332mapXr+6qsgAAAABYiMun0r3xxht64okntGvXLl25ckXvvvuudu3apXXr1mnlypWuLg8AAACABbh8xOihhx7S1q1bdeXKFZUrV05Lly5VcHCwfvvtN91///2uLg8AAACABWSa+xilJ+5jBAAAAEBK/X2MXDKVLi4uTgEBAfavb+XafgAAAACQUVwyYuTu7q6YmBgFBwfLzc3thqvPGWNks9mUlJTkdP+MGAEAAACQMvmI0a+//qqcOXNKkqZNm6bChQvL3d3dYZ/k5GQdOnTIFeUBAAAAsBiXX2N0/ejR9WJjYxUcHMyIEQAAAIA0S+2IkctXpbs2Ze6/4uPj5ePj44KKAAAAAFiNy+5j1L9/f0lXb+g6bNgw+fn52bclJSVpw4YNqlixoouqAwAAAGAlLgtGW7ZskXR1xGjHjh3y8vKyb/Py8lKFChU0YMAAV5UHAAAAwEJcfo1Rx44d9e6776brstxcYwQAAABASv01Ri4PRhmBYAQAAABAykKLLwAAAACAqxGMAAAAAFgewQgAAACA5RGMAAAAAFgewQgAAACA5RGMAAAAAFgewQgAAACA5RGMAAAAAFgewQgAAACA5RGMAAAAAFgewQgAAACA5RGMAAAAAFgewQgAAACA5RGMAAAAAFgewQgAAACA5RGMAAAAAFgewQgAAACA5RGMAAAAAFgewQgAAACA5RGMAAAAAFgewQgAAACA5RGMAAAAAFgewQgAAACA5RGMAAAAAFgewQgAAACA5RGMAAAAAFgewQgAAACA5RGMAAAAAFgewQgAAACA5RGMAAAAAFgewQgAAACA5RGMAAAAAFgewQgAAACA5RGMAAAAAFgewQgAAACA5RGMAAAAAFgewQgAAACA5RGMAAAAAFgewQgAAACA5RGMAAAAAFgewQgAAACA5RGMAAAAAFgewQgAAACA5RGMAAAAAFgewQgAAACA5RGMAAAAAFgewQgAAACA5RGMAAAAAFgewQgAAACA5RGMAAAAAFgewQgAAACA5RGMAAAAAFgewQgAAACA5RGMAAAAAFgewQgAAACA5RGMAAAAAFgewQgAAACA5RGMAAAAAFgewQgAAACA5RGMAAAAAFgewQgAAACA5RGMAAAAAFgewQgAAACA5WW6YHTx4kVXlwAAAADAYjJFMEpOTtZrr72mggULyt/fXwcOHJAkDRs2TJ9//rmLqwMAAABwr8sUwWjMmDGaPn263nrrLXl5ednb77vvPn322WcurAwAAACAFWSKYPTFF1/ok08+UZs2beTu7m5vr1Chgvbs2ePCygAAAABYQaYIRkePHlWJEiVStCcnJ+vy5csuqAgAAACAlWSKYBQeHq7Vq1enaJ87d64qVarkgooAAAAAWImHqwuQpOHDh6t9+/Y6evSokpOTNX/+fEVFRemLL77QokWLXF0eAAAAgHuczRhjXF2EJK1evVqjR4/Wtm3bFB8fr8qVK2v48OF6/PHHne7r4pUMKBAAAABAluOTyqGgTBOM0hPBCAAAAICU+mCUKa4xKl68uGJjY1O0nzlzRsWLF3dBRQAAAACsJFMEo4MHDyopKSlF+6VLl3T06FEXVAQAAADASly6+MJ3331n//qnn35SYGCg/fukpCQtW7ZMISEhLqgMAAAAgJW49BojN7erA1Y2m03/LcPT01MhISGaMGGCnnzySaf65RojAAAAAFLqrzFy6YhRcnKyJKlYsWLatGmTcufO7cpyAAAAAFgUq9IBAAAAuGdlqVXpJGnlypV66qmnVKJECZUoUUKNGjXS6tWrXV0WAAAAAAvIFMHoyy+/VN26deXn56c+ffqoT58+8vX1VZ06dTRr1ixXlwcAAADgHpcpptKVKVNG3bp1U79+/RzaJ06cqE8//VS7d+92qj+m0gEAAACQUj+VLlMEI29vb+3cuVMlSpRwaN+3b5/uu+8+Xbx40an+CEYAAAAApCx2jVHhwoW1bNmyFO2//PKLChcu7IKKAAAAAFiJS5frvuall15Snz59tHXrVkVEREiS1q5dq+nTp+vdd991cXUAAAAA7nWZYiqdJC1YsEATJkywX09UpkwZDRw4UI0bN3a6L6bSAQAAAJCy2DVG6Y1gBAAAAEDKYtcYdenSRStWrHB1GQAAAAAsKlMEoxMnTqh+/foqXLiwBg4cqK1bt7q6JAAAAAAWkmmm0p0+fVrffPONZs2apdWrV6t06dJq06aNWrdurZCQEKf6YiodAAAAACmLX2N05MgRzZ49W1OnTtXevXt15YpzSYdgBAAAAEDKYtcYXe/y5cv6/ffftWHDBh08eFB58+Z1dUkAAAAA7nGZJhgtX75cXbt2Vd68edWhQwcFBARo0aJFOnLkiKtLAwAAAHCPyxRT6QoWLKhTp06pfv36atOmjZ566il5e3unuT+m0gEAAACQstg1Rp9++qmaN2+uHDly3HK/I0eOqECBAnJzu/VAF8EIAAAAgJTFglFqBQQEaOvWrSpevPgt9yMYAQAAAJCy8OILt5KFMhwAAACALCRLBSMAAAAAyAgEIwAAAACWRzACAAAAYHlZKhjZbDZXlwAAAADgHpSlghGLLwAAAADICFlque7Dhw+rQIECcnd3v+V+LNcNAAAAQMoC9zFq1qxZqvedP3++U30TjFxrzqxIzZj2uU6ePKFSYaU1+JVhKle+vKvLAu4qzgNYHecAwHmQWWT6+xgFBgbaHwEBAVq2bJl+//13+/Y//vhDy5YtU2BgoKtKRBos+fEHjX9rrLr37KU53yxQWFhpPd+9s2JjY11dGnDXcB7A6jgHAM6DrChTTKUbNGiQTp06pY8//tg+TS4pKUk9e/ZUQECA3n77baf6Y8TIddo821xl7yunV4YOlyQlJyfr8Tq11Kr1c+rctZuLqwPuDs4DWB3nAMB5kJlk+hGj602dOlUDBgxwuHbI3d1d/fv319SpU11YGZxxOTFRu3ftVLXqEfY2Nzc3VasWoe3btriwMuDu4TyA1XEOAJwHWVWmCEZXrlzRnj17UrTv2bNHycnJtzz20qVLiouLc3hcunQpo0rFLZw+c1pJSUnKlSuXQ3uuXLl08uRJF1UF3F2cB7A6zgGA8yCryhTBqGPHjurcubMmTpyoNWvWaM2aNZowYYK6dOmijh073vLYsWPHOlyvFBgYqLffHHuXKgcAAABwL0jljLuMNX78eOXLl08TJkxQTEyMJCl//vwaOHCgXnrppVseO2TIEPXv39+hzbh7Z1ituLmgHEFyd3dPcVFhbGyscufO7aKqgLuL8wBWxzkAcB5kVZlixMjNzU0vv/yyjh49qjNnzujMmTM6evSoXn755dves8jb21sBAQEOD29vgpEreHp5qUx4WW1Y/5u9LTk5WRs2/KbyFSq5sDLg7uE8gNVxDgCcB1lVphgxul5AQICrS8AdeK59Rw17ZZDKlr1P95Urry9nzlBCQoKaNE39fauArI7zAFbHOQBwHmRFmSYYzZ07V19//bUOHTqkxMREh22bN292UVVwVv0nGuj0qVP68IP3dPLkCYWVLqMPp3ymXAwbw0I4D2B1nAMA50FWlCnuY/Tee+/p1VdfVYcOHfTJJ5+oY8eO2r9/vzZt2qRevXrp9ddfd6o/7mMEAAAAQEr9fYwyRTAqXbq0RowYoVatWil79uzatm2bihcvruHDh+vUqVP64IMPnOqPYAQAAABAymI3eD106JAiIq7eAMvX11fnzp2TJD333HOaPXu2K0sDAAAAYAGZIhjly5dPp06dkiQVKVJE69evlyRFR0crEwxoAQAAALjHZYpgVLt2bX333XeSrt7stV+/fnrsscfUsmVLNW3a1MXVAQAAALjXZYprjJKTk5WcnCwPj6sTAOfMmaN169apZMmS6t69u7y8vJzqj2uMAAAAAEhZbPGF9EYwAgAAACClPhhlmvsYnT59Wp9//rl2794tSQoPD1fHjh2VM2dOF1cGAAAA4F6XKUaMVq1apUaNGikgIEBVqlSRJP3xxx86c+aMvv/+e9WsWdOp/hgxAgAAACBlsal05cqVU/Xq1fXRRx/J3d1dkpSUlKSePXtq3bp12rFjh1P9EYwAAAAASFksGPn6+mrr1q0KCwtzaI+KilLFihWVkJDgVH8EIwAAAABSFrvBa+XKle3XFl1v9+7dqlChggsqAgAAAGAlLlt8Yfv27fav+/Tpo759+2rfvn2qVq2aJGn9+vWaPHmyxo0b56oSAQAAAFiEy6bSubm5yWaz6XZPb7PZlJSU5FTfTKUDAAAAIGWB5bqjo6Nd9dQAAAAA4CBTLL6Q3hgxAgAAACBlgRGj/9q7d6+WL1+u48ePKzk52WHb8OHDXVQVAAAAACvIFCNGn376qZ5//nnlzp1b+fLlk81ms2+z2WzavHmzU/0xYgQAAABAymL3MSpatKh69uypQYMGpUt/BCMAAAAAUha7j9Hp06fVvHlzV5cBAAAAwKIyRTBq3ry5li5d6uoyAAAAAFiUy6bSvffee/avz58/r4kTJ6phw4YqV66cPD09Hfbt06ePU30zlQ4AAACAlAWuMSpWrFiq9rPZbDpw4IBTfROMAAAAAEhZIBhlJIIRAAAAACmLLb4wevRoXbhwIUV7QkKCRo8e7YKKAAAAAFhJphgxcnd3V0xMjIKDgx3aY2NjFRwcrKSkJKf6Y8QIAAAAgJTFRoyMMQ43db1m27ZtypkzpwsqAgAAAGAlqcxPGSMoKEg2m002m02lSpVyCEdJSUmKj49Xjx49XFghAAAAACtw6VS6GTNmyBijTp06adKkSQoMDLRv8/LyUkhIiKpXr+50v0ylAwAAACBlsVXpVq5cqYiIiBT3L0orghEAAAAAKYsFI+nq1Llvv/1Wu3fvliSVLVtWjRo1kru7u9N9EYwAAAAASFksGO3bt08NGjTQ0aNHFRYWJkmKiopS4cKFtXjxYoWGhjrVH8EIAAAAgJTFglGDBg1kjFFkZKR9FbrY2Fi1bdtWbm5uWrx4sVP9EYwAAAAASFksGGXLlk3r169XuXLlHNq3bdumGjVqKD4+3qn+CEYAAAAApCx2HyNvb2+dO3cuRXt8fLy8vLxcUBEAAAAAK8kUwejJJ59Ut27dtGHDBhljZIzR+vXr1aNHDzVq1MjV5QEAAAC4x2WKqXRnzpxR+/bt9f3339uX7L58+bIaN26s6dOnO9zfKDWYSgcAAABAymLXGF2zb98+7dq1S5IUHh6uEiVKpKkfghEAAAAAKfXBKJW7ZbzPP/9c77zzjvbu3StJKlmypF588UV16dLFxZUBAAAAuNdlimA0fPhwTZw4US+88IKqV68uSfrtt9/Ur18/HTp0SKNHj3ZxhQAAAADuZZliKl2ePHn03nvvqVWrVg7ts2fP1gsvvKCTJ0861R9T6QAAAABIWWy57suXL6tKlSop2u+//35duULKAQAAAJCxMkUweu655/TRRx+laP/kk0/Upk0bF1QEAAAAwEpcdo1R//797V/bbDZ99tlnWrp0qapVqyZJ2rBhgw4dOqR27dq5qkQAAAAAFuGya4weffTRVO1ns9n066+/OtU31xgBAAAAkLLofYzSC8EIAAAAgJTFFl8AAAAAAFciGAEAAACwPIIRAAAAAMsjGAEAAACwPIIRAAAAAMsjGAEAAACwPIIRAAAAAMsjGAEAAACwPIIRAAAAAMsjGAEAAACwPIIRAAAAAMsjGAEAAACwPIIRAAAAAMsjGAEAAACwPIIRAAAAAMsjGAEAAACwPIIRAAAAAMuzGWOMq4vAveXSpUsaO3ashgwZIm9vb1eXA7gE5wHAeQBInAdZCcEI6S4uLk6BgYE6e/asAgICXF0O4BKcBwDnASBxHmQlTKUDAAAAYHkEIwAAAACWRzACAAAAYHkEI6Q7b29vjRgxggsMYWmcBwDnASBxHmQlLL4AAAAAwPIYMQIAAABgeQQjAAAAAJZHMAIAAABgeQQjC3rkkUf04osvpnr/b7/9ViVKlJC7u7tTxwGZmc1m07fffpvq/VesWCGbzaYzZ85kWE1AZsH7HZmRs59fUiskJESTJk266faDBw/KZrNp69at6f7cHTp0UJMmTdK9X6QNwQi31b17dz3zzDM6fPiwXnvttQw7iadPn64cOXKke7/AjcTExOiJJ55I1z5HjhypihUrpmufEh9SkfH++4EzIiJCMTExCgwMvGs1ZOSHT0Dic4bk/B8FrcbD1QUgc4uPj9fx48dVr149FShQwNXlAOkiMTFR+fLlc3UZQKbl5eXFOQLAchgxsrhLly5pwIABKliwoLJly6YHH3xQK1askHT1r9TZs2eXJNWuXVs2m02PPPKIZsyYoYULF8pms8lms2nFihWKiIjQoEGDHPo+ceKEPD09tWrVKknS6dOn1a5dOwUFBcnPz09PPPGE9u7da3+ujh076uzZs/Z+R44cedsagdR45JFH1Lt3b7344ovKnTu36tWrl+KvZuvWrVPFihXl4+OjKlWq6Ntvv73hX6//+OMPValSRX5+foqIiFBUVJSkq3+JHDVqlLZt22Z/D0+fPl2tW7dWy5YtHfq4fPmycufOrS+++ELS1fd4nz59FBwcLB8fHz300EPatGmTpKt/RX/00UclSUFBQbLZbOrQoYMkKTk5WWPHjlWxYsXk6+urChUqaO7cuRnwE8S9rEOHDlq5cqXeffddh/fuf0cp16xZo4cffli+vr4qXLiw+vTpo/Pnz0uSXnnlFT344IMp+q5QoYJGjx4t6er7dfTo0SpUqJC8vb1VsWJFLVmyxL5vsWLFJEmVKlWy/39zzWeffaYyZcrIx8dHpUuX1ocffpgBPwlkBVeuXFHv3r0VGBio3Llza9iwYbp255m0fs6QpAsXLqhTp07Knj27ihQpok8++STFc+/Zs0cRERHy8fHRfffdp5UrV9q3JSUlqXPnzvbfx2FhYXr33Xcdjk9KSlL//v2VI0cO5cqVSy+//LJSe9ecTz75RAUKFFBycrJDe+PGjdWpUyf79x999JFCQ0Pl5eWlsLAwzZw5074tJCREktS0aVPZbDb795K0cOFCVa5cWT4+PipevLhGjRqlK1eupKq2e4qB5dSqVcv07dvXGGNMly5dTEREhFm1apXZt2+fefvtt423t7f566+/zKVLl0xUVJSRZObNm2diYmLM2bNnTYsWLUz9+vVNTEyMiYmJMZcuXTIffPCBKVKkiElOTrY/z/vvv+/Q1qhRI1OmTBmzatUqs3XrVlOvXj1TokQJk5iYaC5dumQmTZpkAgIC7P2eO3futjUCqVGrVi3j7+9vBg4caPbs2WP27NljJJkFCxYYY4w5e/asyZkzp2nbtq3ZuXOn+eGHH0ypUqWMJLNlyxZjjDHLly83ksyDDz5oVqxYYXbu3GkefvhhExERYYwx5sKFC+all14yZcuWtb+HL1y4YBYtWmR8fX3t72djjPn++++Nr6+viYuLM8YY06dPH1OgQAHzww8/mJ07d5r27duboKAgExsba65cuWLmzZtnJJmoqCgTExNjzpw5Y4wxZsyYMaZ06dJmyZIlZv/+/WbatGnG29vbrFix4u79cJHlnTlzxlSvXt107drV/t795ZdfjCRz+vRpY4wx+/btM9myZTPvvPOO+euvv8zatWtNpUqVTIcOHYwxxvz5559Gktm3b5+932tte/fuNcYYM3HiRBMQEGBmz55t9uzZY15++WXj6elp/12+ceNGI8n88ssvJiYmxsTGxhpjjPnyyy9N/vz5zbx588yBAwfMvHnzTM6cOc306dPv4k8JmcG13+V9+/Y1e/bsMV9++aXx8/Mzn3zyiTEm7Z8zihYtanLmzGkmT55s9u7da8aOHWvc3NzMnj17jDHGREdHG0mmUKFCZu7cuWbXrl2mS5cuJnv27ObkyZPGGGMSExPN8OHDzaZNm8yBAwfstX311Vf2+t98800TFBRk5s2bZ3bt2mU6d+5ssmfPbho3bnzb137q1Cnj5eVlfvnlF3tbbGysQ9v8+fONp6enmTx5somKijITJkww7u7u5tdffzXGGHP8+HEjyUybNs3ExMSY48ePG2OMWbVqlQkICDDTp083+/fvN0uXLjUhISFm5MiRd/gvlvUQjCzoWjD6+++/jbu7uzl69KjD9jp16pghQ4YYY4w5ffq0kWSWL19u396+ffsUJ/Hx48eNh4eHWbVqlb2tevXqZtCgQcYYY/766y8jyaxdu9a+/eTJk8bX19d8/fXXxhhjpk2bZgIDAx36TU2NwO3UqlXLVKpUyaHt+mD00UcfmVy5cpmEhAT79k8//fSGwej6/5QWL15sJNmPGzFihKlQoYLD81y+fNnkzp3bfPHFF/a2Vq1amZYtWxpjjImPjzeenp4mMjLSvj0xMdEUKFDAvPXWWw7Pfe1DqjHGXLx40fj5+Zl169Y5PF/nzp1Nq1atnPjpAI5/MDMm5Xuuc+fOplu3bg7HrF692ri5udnf/xUqVDCjR4+2bx8yZIh58MEH7d8XKFDAvP766w59PPDAA6Znz57GmP99+Lx2zl0TGhpqZs2a5dD22muvmerVq6fptSLrqlWrlilTpozDH2EHDRpkypQpk+bPGcZcDUZt27a1f5+cnGyCg4PNRx99ZIz533tz3Lhx9n0uX75sChUqZN58882b1turVy/z9NNP27/Pnz+//ff69X2kJhgZY0zjxo1Np06d7N9PmTLFFChQwCQlJRljjImIiDBdu3Z1OKZ58+amQYMG9u+v/7/vmjp16pg33njDoW3mzJkmf/78qarrXsJUOgvbsWOHkpKSVKpUKfn7+9sfK1eu1P79+53qK0+ePHr88ccVGRkpSYqOjtZvv/2mNm3aSJJ2794tDw8Ph6kWuXLlUlhYmHbv3n1XaoS13X///TfdFhUVpfLly8vHx8feVrVq1RvuW758efvX+fPnlyQdP378pn17eHioRYsW9nPj/PnzWrhwof3c2L9/vy5fvqwaNWrYj/H09FTVqlVveW7s27dPFy5c0GOPPeZwbnzxxRecG0h327Zt0/Tp0x3ea/Xq1VNycrKio6MlSW3atNGsWbMkScYYzZ492/4+j4uL0z///OPwPpekGjVq3PJ9fv78ee3fv1+dO3d2eO4xY8bwPreoatWqyWaz2b+vXr269u7dq127dqXpc8Y11/9ut9lsypcvX4rf7dWrV7d/7eHhoSpVqjj0PXnyZN1///3KkyeP/P399cknn+jQoUOSpLNnzyomJsahvmt9pFabNm00b948Xbp0SZIUGRmpZ599Vm5uVz/O79692+lzTLp6fo8ePdrhHOvatatiYmJ04cKFVNd3L2DxBQuLj4+Xu7u7/vjjD7m7uzts8/f3d7q/Nm3aqE+fPnr//fc1a9YslStXTuXKlctUNcK6smXLli79eHp62r++9p/zf+d8/1ebNm1Uq1YtHT9+XD///LN8fX1Vv379O6ojPj5ekrR48WIVLFjQYZu3t/cd9Q38V3x8vLp3764+ffqk2FakSBFJUqtWrTRo0CBt3rxZCQkJOnz4cIrr69LyvJL06aefpriG6b//JwB34vrf7dLV3++3+91+vTlz5mjAgAGaMGGCqlevruzZs+vtt9/Whg0b0q3Gp556SsYYLV68WA888IBWr16td9555477jY+P16hRo9SsWbMU267/g6EVEIwsrFKlSkpKStLx48f18MMPp/o4Ly8vJSUlpWhv3LixunXrpiVLlmjWrFlq166dfVuZMmV05coVbdiwQREREZKk2NhYRUVFKTw8/Kb9prVGwBlhYWH68ssvdenSJXuouLb4gTNudm5ERESocOHC+uqrr/Tjjz+qefPm9v+Er10ku3btWhUtWlTS1cUZNm3aZF8+2cvLS5Ic+g4PD5e3t7cOHTqkWrVqOV0rcL2bvXevqVy5snbt2qUSJUrcdJ9ChQqpVq1aioyMVEJCgh577DEFBwdLkgICAlSgQAGtXbvW4f26du1a++jsjd7nefPmVYECBXTgwAH76BOs7b9BY/369SpZsqTCw8PT9DnDGevXr1fNmjUlXV0E4o8//lDv3r0lXX0vR0REqGfPnvb9rx/VDAwMVP78+bVhw4YUfVSuXDlVz+/j46NmzZopMjJS+/btU1hYmMOxZcqU0dq1a9W+fXt729q1a+2vX7oaAP/7M6hcubKioqJueX5bBcHIwkqVKqU2bdqoXbt2mjBhgipVqqQTJ05o2bJlKl++vBo2bHjD40JCQvTTTz8pKipKuXLlUmBgoDw9PZUtWzY1adJEw4YN0+7du9WqVSv7MSVLllTjxo3VtWtXTZkyRdmzZ9fgwYNVsGBBNW7c2N5vfHy8li1bpgoVKsjPzy/NNQLOaN26tV599VV169ZNgwcP1qFDhzR+/HhJcpiycTshISGKjo7W1q1bVahQIWXPnt0etFq3bq2PP/5Yf/31l5YvX24/Jlu2bHr++ec1cOBA5cyZU0WKFNFbb72lCxcuqHPnzpKkokWLymazadGiRWrQoIF8fX2VPXt2DRgwQP369VNycrIeeughnT17VmvXrlVAQIDDf4zA7YSEhGjDhg06ePCg/P39U/ylfNCgQapWrZp69+6tLl26KFu2bNq1a5d+/vlnffDBB/b92rRpoxEjRigxMTHFX7IHDhyoESNGKDQ0VBUrVtS0adO0detW+zTT4OBg+fr6asmSJSpUqJB8fHwUGBioUaNGqU+fPgoMDFT9+vV16dIl/f777zp9+rT69++f8T8cZCqHDh1S//791b17d23evFnvv/++JkyYkObPGX5+fql+7smTJ6tkyZIqU6aM3nnnHZ0+fdq+IlzJkiX1xRdf6KefflKxYsU0c+ZMbdq0yb7aoiT17dtX48aNU8mSJVW6dGlNnDjR6fvTtWnTRk8++aR27typtm3bOmwbOHCgWrRooUqVKqlu3br6/vvvNX/+fP3yyy/2fUJCQrRs2TLVqFFD3t7eCgoK0vDhw/Xkk0+qSJEieuaZZ+Tm5qZt27bpzz//1JgxY5yqL8tz9UVOuPuuv8j22ioqISEhxtPT0+TPn980bdrUbN++3Rhz48UXjh8/bh577DHj7++fYtsPP/xgJJmaNWumeN5Tp06Z5557zgQGBhpfX19Tr169FCvL9ejRw+TKlctIMiNGjEhVjcDt/PfCcmNSXoC6du1aU758eePl5WXuv/9+M2vWLCPJvirRjRZA2LJli5FkoqOjjTFXF0R4+umnTY4cOewr/1yza9cuI8kULVrU4cJhY4xJSEgwL7zwgsmdO7fx9vY2NWrUMBs3bnTYZ/To0SZfvnzGZrOZ9u3bG2OuXiA8adIkExYWZjw9PU2ePHlMvXr1zMqVK+/o5wXriYqKMtWqVTO+vr729+5/3+8bN260/+7Pli2bKV++fIrFFE6fPm28vb2Nn5+fw0qMxhiTlJRkRo4caQoWLGg8PT1NhQoVzI8//uiwz6effmoKFy5s3NzcTK1ateztkZGRpmLFisbLy8sEBQWZmjVrmvnz56f7zwGZW61atUzPnj1Njx49TEBAgAkKCjKvvPKK/XdqWj9nFC1a1LzzzjsO+1WoUMG+/driC7NmzTJVq1Y1Xl5eJjw83L7amzFXf/936NDBBAYGmhw5cpjnn3/eDB482GFBnsuXL5u+ffuagIAAkyNHDtO/f3/Trl27VC++YMzV8yh//vxGktm/f3+K7R9++KEpXry48fT0NKVKlXJY+McYY7777jtTokQJ4+HhYYoWLWpvX7JkiYmIiDC+vr4mICDAVK1a1b7an5XYjEnlAuoAYCGRkZH2e174+vq6uhwAAJDBmEoHAJK++OILFS9eXAULFtS2bds0aNAgtWjRglAEAIBFEIwAQNKxY8c0fPhwHTt2TPnz51fz5s31+uuvu7osAIAFHDp0yGGRhP/atWuXfQVIZBym0gEAAAAudOXKFR08ePCm20NCQuThwXhGRiMYAQAAALA8N1cXAAAAAACuRjACAAAAYHkEIwAAAACWRzACAAAAYHkEIwAAAACWRzACAKTZsWPH1LdvX5UoUUI+Pj7KmzevatSooY8++kgXLlyw7xcSEiKbzSabzaZs2bKpcuXK+uabb1Jsu9GjQ4cOt6xh7dq18vDwUMWKFe1tTz31lOrXr3/D/VevXi2bzabt27en2FauXDn16NHjhsfNnDlT3t7eOnny5G1+KpLNZtO333572/0AAJkHwQgAkCYHDhxQpUqVtHTpUr3xxhvasmWLfvvtN7388statGiRfvnlF4f9R48erZiYGG3ZskUPPPCAWrZsqXXr1mnTpk2KiYlRTEyM5s2bJ0mKioqyt7377rs3reHMmTNq166d6tSp49DeuXNn/fzzzzpy5EiKY6ZNm6YqVaqofPnyKbZ17txZc+bMUUJCwg2Pa9SokXLnzp2qnw8AIGshGAEA0qRnz57y8PDQ77//rhYtWqhMmTIqXry4GjdurMWLF+upp55y2D979uzKly+fSpUqpcmTJ8vX11fff/+98uTJo3z58ilfvnzKmTOnJCk4ONjeFhgYeNMaevToodatW6t69eoO7U8++aTy5Mmj6dOnO7THx8frm2++UefOnW/YX9u2bZWQkGAPaNdER0drxYoV9uM++ugjhYaGysvLS2FhYZo5c6Z935CQEElS06ZNZbPZ7N9L0sKFC1W5cmX5+PioePHiGjVqlK5cuXLT1wcAuHsIRgAAp8XGxmrp0qXq1auXsmXLdsN9bDbbTY/38PCQp6enEhMT01zDtGnTdODAAY0YMeKG/bdr107Tp0/X9fcx/+abb5SUlKRWrVrdsM/cuXOrcePGmjp1qkP79OnTVahQIT3++ONasGCB+vbtq5deekl//vmnunfvro4dO2r58uWSpE2bNtnri4mJsX+/evVqtWvXTn379tWuXbs0ZcoUTZ8+Xa+//nqafwYAgPRDMAIAOG3fvn0yxigsLMyhPXfu3PL395e/v78GDRp0w2MTExM1duxYnT17VrVr107T8+/du1eDBw/Wl19+KQ8Pjxvu06lTJ+3fv18rV660t02bNk1PP/30LUehOnfurBUrVig6OlqSZIzRjBkz1L59e7m5uWn8+PHq0KGDevbsqVKlSql///5q1qyZxo8fL0nKkyePJClHjhzKly+f/ftRo0Zp8ODBat++vYoXL67HHntMr732mqZMmZKmnwEAIH0RjAAA6Wbjxo3aunWrypYtq0uXLjlsGzRokPz9/eXn56c333xT48aNU8OGDW/b57Wg5e/vrx49eigpKUmtW7fWqFGjVKpUqZseV7p0aUVERNhHf/bt26fVq1ffdBrdNY899pgKFSqkadOmSZKWLVumQ4cOqWPHjpKk3bt3q0aNGg7H1KhRQ7t3775lv9u2bdPo0aMdXk/Xrl0VExPjsFAFAMA1bvxnNgAAbqFEiRKy2WyKiopyaC9evLgkydfXN8UxAwcOVIcOHeTv76+8efPecqrd9bZu3Wr/OiAgQOfOndPvv/+uLVu2qHfv3pKk5ORkGWPk4eGhpUuX2keiOnfurBdeeEGTJ0/WtGnTFBoaqlq1at3y+dzc3NShQwfNmDFDI0eO1LRp0/Too4/aX1taxcfHa9SoUWrWrFmKbT4+PnfUNwDgzjFiBABwWq5cufTYY4/pgw8+0Pnz51N1TO7cuVWiRAnly5cv1aFIuhrCrj2Cg4MVEBCgHTt2aOvWrfZHjx49FBYWpq1bt+rBBx+0H9uiRQu5ublp1qxZ+uKLL9SpU6dUPXfHjh11+PBhzZ8/XwsWLHAYZSpTpozWrl3rsP/atWsVHh5u/97T01NJSUkO+1SuXFlRUVEOr+faw82N/44BwNUYMQIApMmHH36oGjVqqEqVKho5cqTKly8vNzc3bdq0SXv27NH999+fIc/r5uam++67z6EtODhYPj4+Kdr9/f3VsmVLDRkyRHFxcbe9J9I1xYoVU+3atdWtWzd5e3s7jPIMHDhQLVq0UKVKlVS3bl19//33mj9/vsPy5CEhIVq2bJlq1Kghb29vBQUFafjw4XryySdVpEgRPfPMM3Jzc9O2bdv0559/asyYMWn/gQAA0gV/ogIApEloaKi2bNmiunXrasiQIapQoYKqVKmi999/XwMGDNBrr73m6hIlXZ1Od/r0adWrV08FChRw+rjWrVs7THVr0qSJ3n33XY0fP15ly5bVlClTNG3aND3yyCP2fSZMmKCff/5ZhQsXVqVKlSRJ9erV06JFi7R06VI98MADqlatmt555x0VLVo03V4rACDtbOb6dUwBAAAAwIIYMQIAAABgeQQjAAAAAJZHMAIAAABgeQQjAAAAAJZHMAIAAABgeQQjAAAAAJZHMAIAAABgeQQjAAAAAJZHMAIAAABgeQQjAAAAAJZHMAIAAABgef8HWA2Jk61Jt7wAAAAASUVORK5CYII=",
112
- "text/plain": [
113
- "<Figure size 1000x700 with 1 Axes>"
114
- ]
115
- },
116
- "metadata": {},
117
- "output_type": "display_data"
118
- }
119
- ],
120
- "source": [
121
- "import seaborn as sns\n",
122
- "import matplotlib.pyplot as plt\n",
123
- "from sklearn.metrics import confusion_matrix\n",
124
- "import pandas as pd\n",
125
- "\n",
126
- "# Assuming df is your DataFrame\n",
127
- "\n",
128
- "# True labels and predictions\n",
129
- "y_true = filtered_df[\"type\"]\n",
130
- "y_pred = filtered_df[\"gpt_vote\"]\n",
131
- "\n",
132
- "# Compute the confusion matrix\n",
133
- "conf_matrix = confusion_matrix(y_true, y_pred, labels=[\"leftvote\", \"rightvote\", \"tievote\", \"bothbad_vote\"])\n",
134
- "\n",
135
- "# Create a pandas DataFrame from the confusion matrix\n",
136
- "conf_matrix_df = pd.DataFrame(conf_matrix, index=[\"leftvote\", \"rightvote\", \"tievote\", \"bothbad_vote\"], columns=[\"leftvote\", \"rightvote\", \"tievote\", \"bothbad_vote\"])\n",
137
- "\n",
138
- "# Plotting the heatmap\n",
139
- "plt.figure(figsize=(10, 7))\n",
140
- "sns.heatmap(conf_matrix_df, annot=True, fmt=\"d\", cmap=\"Blues\", cbar=False)\n",
141
- "plt.title(\"Arena Human vs GPT-4V Confusion Matrix\")\n",
142
- "plt.xlabel(\"GPT-4V Vote\")\n",
143
- "plt.ylabel(\"Arena Human Vote\")\n",
144
- "plt.show()\n"
145
- ]
146
- },
147
- {
148
- "cell_type": "code",
149
- "execution_count": 46,
150
- "metadata": {},
151
- "outputs": [
152
- {
153
- "name": "stdout",
154
- "output_type": "stream",
155
- "text": [
156
- "Accuracy: 0.5842911877394636\n",
157
- "F1 Score (Macro): 0.514392348541452\n",
158
- "F1 Score (Micro): 0.5842911877394636\n",
159
- "F1 Score (Weighted): 0.5536668839130223\n"
160
- ]
161
- }
162
- ],
163
- "source": [
164
- "from sklearn.metrics import accuracy_score, f1_score\n",
165
- "\n",
166
- "# Assuming df is your DataFrame and it contains 'type' as true labels and 'gpt_vote' as predictions\n",
167
- "y_true = filtered_df['type']\n",
168
- "y_pred = filtered_df['gpt_vote']\n",
169
- "\n",
170
- "# Calculate accuracy\n",
171
- "accuracy = accuracy_score(y_true, y_pred)\n",
172
- "print(f'Accuracy: {accuracy}')\n",
173
- "\n",
174
- "# Calculate F1 score, here using 'macro' average to treat all classes equally\n",
175
- "f1 = f1_score(y_true, y_pred, average='macro')\n",
176
- "print(f'F1 Score (Macro): {f1}')\n",
177
- "\n",
178
- "# If you want to calculate F1 score with other averages, for example 'micro' or 'weighted', you can do:\n",
179
- "f1_micro = f1_score(y_true, y_pred, average='micro')\n",
180
- "print(f'F1 Score (Micro): {f1_micro}')\n",
181
- "\n",
182
- "f1_weighted = f1_score(y_true, y_pred, average='weighted')\n",
183
- "print(f'F1 Score (Weighted): {f1_weighted}')"
184
- ]
185
- },
186
- {
187
- "cell_type": "code",
188
- "execution_count": null,
189
- "metadata": {},
190
- "outputs": [],
191
- "source": []
192
- },
193
- {
194
- "cell_type": "code",
195
- "execution_count": 47,
196
- "metadata": {},
197
- "outputs": [
198
- {
199
- "name": "stdout",
200
- "output_type": "stream",
201
- "text": [
202
- "Cohen's Kappa Score: 0.3442144615665177\n"
203
- ]
204
- }
205
- ],
206
- "source": [
207
- "from sklearn.metrics import cohen_kappa_score\n",
208
- "\n",
209
- "# Assuming df is your DataFrame and it contains 'type' as true labels and 'gpt_vote' as predictions\n",
210
- "y_true = filtered_df['type']\n",
211
- "y_pred = filtered_df['gpt_vote']\n",
212
- "\n",
213
- "# Calculate Cohen's Kappa score\n",
214
- "kappa = cohen_kappa_score(y_true, y_pred)\n",
215
- "print(f'Cohen\\'s Kappa Score: {kappa}')\n"
216
- ]
217
- },
218
- {
219
- "cell_type": "code",
220
- "execution_count": 48,
221
- "metadata": {},
222
- "outputs": [
223
- {
224
- "name": "stdout",
225
- "output_type": "stream",
226
- "text": [
227
- "Accuracy Score: 0.5842911877394636\n"
228
- ]
229
- }
230
- ],
231
- "source": [
232
- "from sklearn.metrics import accuracy_score\n",
233
- "accuracy = accuracy_score(y_true, y_pred)\n",
234
- "print(f'Accuracy Score: {accuracy}')\n"
235
- ]
236
- },
237
- {
238
- "cell_type": "code",
239
- "execution_count": 49,
240
- "metadata": {},
241
- "outputs": [
242
- {
243
- "name": "stdout",
244
- "output_type": "stream",
245
- "text": [
246
- "Pearson Correlation Coefficient: 0.2880096104357029\n"
247
- ]
248
- }
249
- ],
250
- "source": [
251
- "import pandas as pd\n",
252
- "\n",
253
- "# Assuming filtered_df is your DataFrame and it contains 'type' and 'gpt_vote' columns\n",
254
- "# Convert 'type' and 'gpt_vote' to categorical codes\n",
255
- "filtered_df['type_int'] = pd.factorize(filtered_df['type'])[0]\n",
256
- "filtered_df['gpt_vote_int'] = pd.factorize(filtered_df['gpt_vote'])[0]\n",
257
- "\n",
258
- "# Now you can calculate Pearson correlation between these new integer columns\n",
259
- "pearson_correlation = filtered_df['type_int'].corr(filtered_df['gpt_vote_int'])\n",
260
- "print(f'Pearson Correlation Coefficient: {pearson_correlation}')\n"
261
- ]
262
- },
263
- {
264
- "cell_type": "code",
265
- "execution_count": null,
266
- "metadata": {},
267
- "outputs": [],
268
- "source": []
269
- },
270
- {
271
- "cell_type": "code",
272
- "execution_count": null,
273
- "metadata": {},
274
- "outputs": [],
275
- "source": []
276
- },
277
- {
278
- "cell_type": "code",
279
- "execution_count": null,
280
- "metadata": {},
281
- "outputs": [],
282
- "source": []
283
- },
284
- {
285
- "cell_type": "code",
286
- "execution_count": null,
287
- "metadata": {},
288
- "outputs": [],
289
- "source": []
290
- },
291
- {
292
- "cell_type": "code",
293
- "execution_count": null,
294
- "metadata": {},
295
- "outputs": [],
296
- "source": []
297
- }
298
- ],
299
- "metadata": {
300
- "kernelspec": {
301
- "display_name": "otask",
302
- "language": "python",
303
- "name": "python3"
304
- },
305
- "language_info": {
306
- "codemirror_mode": {
307
- "name": "ipython",
308
- "version": 3
309
- },
310
- "file_extension": ".py",
311
- "mimetype": "text/x-python",
312
- "name": "python",
313
- "nbconvert_exporter": "python",
314
- "pygments_lexer": "ipython3",
315
- "version": "3.10.13"
316
- },
317
- "orig_nbformat": 4
318
- },
319
- "nbformat": 4,
320
- "nbformat_minor": 2
321
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
arena_elo/generation_model_info.json DELETED
@@ -1,57 +0,0 @@
1
- {
2
- "LCM": {
3
- "Link": "https://huggingface.co/SimianLuo/LCM_Dreamshaper_v7",
4
- "License": "MIT License",
5
- "Organization": "Tsinghua University"
6
- },
7
- "PlayGround V2": {
8
- "Link": "https://huggingface.co/playgroundai/playground-v2-1024px-aesthetic",
9
- "License": "Playground v2 Community License",
10
- "Organization": "Playground"
11
- },
12
- "PlayGround V2.5": {
13
- "Link": "https://huggingface.co/playgroundai/playground-v2.5-1024px-aesthetic",
14
- "License": "Playground v2.5 Community License",
15
- "Organization": "Playground"
16
- },
17
- "OpenJourney": {
18
- "Link": "https://huggingface.co/prompthero/openjourney",
19
- "License": "creativeml-openrail-m",
20
- "Organization": "PromptHero"
21
- },
22
- "SDXLTurbo": {
23
- "Link": "https://huggingface.co/stabilityai/sdxl-turbo",
24
- "License": "sai-nc-community (other)",
25
- "Organization": "Stability AI"
26
- },
27
- "SDXL": {
28
- "Link": "https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0",
29
- "License": "openrail++",
30
- "Organization": "Stability AI"
31
- },
32
- "PixArtAlpha": {
33
- "Link": "https://huggingface.co/PixArt-alpha/PixArt-XL-2-1024-MS",
34
- "License": "openrail++",
35
- "Organization": "PixArt-alpha"
36
- },
37
- "SDXLLightning": {
38
- "Link": "https://huggingface.co/ByteDance/SDXL-Lightning",
39
- "License": "openrail++",
40
- "Organization": "ByteDance"
41
- },
42
- "StableCascade": {
43
- "Link": "https://huggingface.co/stabilityai/stable-cascade",
44
- "License": "stable-cascade-nc-community (other)",
45
- "Organization": "Stability AI"
46
- },
47
- "LCM(v1.5/XL)": {
48
- "Link": "https://fal.ai/models/fal-ai/fast-lcm-diffusion/api",
49
- "License": "openrail++",
50
- "Organization": "Latent Consistency"
51
- },
52
- "PixArtSigma": {
53
- "Link": "https://fal.ai/models/fal-ai/pixart-sigma",
54
- "License": "openrail++",
55
- "Organization": "PixArt-alpha"
56
- }
57
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
arena_elo/get_latest_data.sh DELETED
@@ -1,17 +0,0 @@
1
-
2
- # set LOGDIR to default if not set before
3
- if [ -z "$LOGDIR" ]; then
4
- export LOGDIR="./vision-arena-logs"
5
- fi
6
- mkdir -p results
7
-
8
-
9
- # # for battle data
10
- python -m elo_rating.clean_battle_data --model_infos_file "./model_infos.json" --mode conv_release
11
- battle_cutoff_date=`cat cut_off_date.txt` && rm cut_off_date.txt && echo "Battle data last updated on $battle_cutoff_date"
12
-
13
- mkdir -p ./results/latest
14
- mkdir -p ./results/$battle_cutoff_date && mv ./clean_battle_conv_$battle_cutoff_date.json ./results/$battle_cutoff_date/clean_battle_conv.json
15
- cp ./results/$battle_cutoff_date/clean_battle_conv.json ./results/latest/clean_battle_conv.json
16
-
17
- echo "Battle data last updated on $battle_cutoff_date" >> ./results/latest/latest_updated_date.txt
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
arena_elo/pyproject.toml DELETED
@@ -1,28 +0,0 @@
1
- [build-system]
2
- requires = ["setuptools>=61.0"]
3
- build-backend = "setuptools.build_meta"
4
-
5
- [project]
6
- name = "arena_elo"
7
- version = "0.2.35"
8
- description = "Elo rating system for WildVision Bench Arena"
9
- readme = "README.md"
10
- requires-python = ">=3.9"
11
- classifiers = [
12
- "Programming Language :: Python :: 3",
13
- "License :: OSI Approved :: Apache Software License",
14
- ]
15
- dependencies = [
16
- "numpy", "prompt_toolkit>=3.0.0", "uvicorn","polyglot", "pyicu", "pycld2", "morfessor", "scikit-learn",
17
- "pytz", "tqdm", "pandas", "plotly", "fire", "Pillow"
18
- ]
19
-
20
- [project.urls]
21
- "Homepage" = "https://github.com/WildVision-Bench/Arena-Elo"
22
- "Bug Tracker" = "https://github.com/WildVision-Bench/Arena-Elo/issues"
23
-
24
- [tool.setuptools.packages.find]
25
- exclude = ["assets*", "benchmark*", "docs", "dist*", "playground*", "scripts*", "tests*"]
26
-
27
- [tool.wheel]
28
- exclude = ["assets*", "benchmark*", "docs", "dist*", "playground*", "scripts*", "tests*"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
arena_elo/requirements.txt DELETED
@@ -1,28 +0,0 @@
1
- -e git+https://github.com/WildVision-Bench/Arena-Elo.git@9dc2fa8543a2e9eda3d5bc01c2212fdfcdd4bfb5#egg=arena_elo
2
- click==8.1.7
3
- fire==0.5.0
4
- h11==0.14.0
5
- joblib==1.3.2
6
- Morfessor==2.0.6
7
- numpy==1.26.4
8
- packaging==23.2
9
- pandas==2.2.0
10
- pillow==10.2.0
11
- plotly==5.18.0
12
- polyglot==16.7.4
13
- prompt-toolkit==3.0.43
14
- pycld2==0.41
15
- PyICU==2.12
16
- python-dateutil==2.8.2
17
- pytz==2024.1
18
- scikit-learn==1.4.0
19
- scipy==1.12.0
20
- six==1.16.0
21
- tenacity==8.2.3
22
- termcolor==2.4.0
23
- threadpoolctl==3.2.0
24
- tqdm==4.66.2
25
- typing_extensions==4.9.0
26
- tzdata==2024.1
27
- uvicorn==0.27.1
28
- wcwidth==0.2.13
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
arena_elo/results/latest/clean_battle_image_editing.json DELETED
@@ -1,1578 +0,0 @@
1
- [
2
- {
3
- "model_a": "CycleDiffusion",
4
- "model_b": "InstructPix2Pix",
5
- "winner": "model_b",
6
- "judge": "arena_user_::1",
7
- "anony": true,
8
- "tstamp": 1707712630.872
9
- },
10
- {
11
- "model_a": "CycleDiffusion",
12
- "model_b": "InstructPix2Pix",
13
- "winner": "model_b",
14
- "judge": "arena_user_::1",
15
- "anony": false,
16
- "tstamp": 1707712699.668
17
- },
18
- {
19
- "model_a": "Pix2PixZero",
20
- "model_b": "MagicBrush",
21
- "winner": "model_a",
22
- "judge": "arena_user_::1",
23
- "anony": true,
24
- "tstamp": 1707712896.0427
25
- },
26
- {
27
- "model_a": "CycleDiffusion",
28
- "model_b": "InstructPix2Pix",
29
- "winner": "model_b",
30
- "judge": "arena_user_::1",
31
- "anony": false,
32
- "tstamp": 1707712929.7061
33
- },
34
- {
35
- "model_a": "CycleDiffusion",
36
- "model_b": "InstructPix2Pix",
37
- "winner": "model_b",
38
- "judge": "arena_user_::1",
39
- "anony": true,
40
- "tstamp": 1707713147.0445
41
- },
42
- {
43
- "model_a": "CycleDiffusion",
44
- "model_b": "PNP",
45
- "winner": "model_b",
46
- "judge": "arena_user_::1",
47
- "anony": true,
48
- "tstamp": 1707713198.9284
49
- },
50
- {
51
- "model_a": "CycleDiffusion",
52
- "model_b": "Prompt2prompt",
53
- "winner": "model_b",
54
- "judge": "arena_user_::1",
55
- "anony": true,
56
- "tstamp": 1707713210.1306
57
- },
58
- {
59
- "model_a": "Prompt2prompt",
60
- "model_b": "SDEdit",
61
- "winner": "model_a",
62
- "judge": "arena_user_::1",
63
- "anony": true,
64
- "tstamp": 1707713747.5115
65
- },
66
- {
67
- "model_a": "PNP",
68
- "model_b": "Pix2PixZero",
69
- "winner": "model_a",
70
- "judge": "arena_user_::1",
71
- "anony": true,
72
- "tstamp": 1707715613.7226
73
- },
74
- {
75
- "model_a": "CycleDiffusion",
76
- "model_b": "MagicBrush",
77
- "winner": "model_b",
78
- "judge": "arena_user_::1",
79
- "anony": true,
80
- "tstamp": 1707765708.2644
81
- },
82
- {
83
- "model_a": "PNP",
84
- "model_b": "CycleDiffusion",
85
- "winner": "model_a",
86
- "judge": "arena_user_::1",
87
- "anony": true,
88
- "tstamp": 1707765861.2742
89
- },
90
- {
91
- "model_a": "PNP",
92
- "model_b": "CycleDiffusion",
93
- "winner": "model_a",
94
- "judge": "arena_user_::1",
95
- "anony": false,
96
- "tstamp": 1707765975.0206
97
- },
98
- {
99
- "model_a": "PNP",
100
- "model_b": "CycleDiffusion",
101
- "winner": "model_a",
102
- "judge": "arena_user_::1",
103
- "anony": true,
104
- "tstamp": 1707768866.9065
105
- },
106
- {
107
- "model_a": "SDEdit",
108
- "model_b": "MagicBrush",
109
- "winner": "model_b",
110
- "judge": "arena_user_::1",
111
- "anony": true,
112
- "tstamp": 1707771673.2989
113
- },
114
- {
115
- "model_a": "SDEdit",
116
- "model_b": "MagicBrush",
117
- "winner": "model_b",
118
- "judge": "arena_user_::1",
119
- "anony": true,
120
- "tstamp": 1707784377.6617
121
- },
122
- {
123
- "model_a": "SDEdit",
124
- "model_b": "MagicBrush",
125
- "winner": "model_b",
126
- "judge": "arena_user_::1",
127
- "anony": true,
128
- "tstamp": 1707784466.8915
129
- },
130
- {
131
- "model_a": "CycleDiffusion",
132
- "model_b": "PNP",
133
- "winner": "model_b",
134
- "judge": "arena_user_::1",
135
- "anony": true,
136
- "tstamp": 1707784983.9581
137
- },
138
- {
139
- "model_a": "MagicBrush",
140
- "model_b": "SDEdit",
141
- "winner": "model_a",
142
- "judge": "arena_user_::1",
143
- "anony": true,
144
- "tstamp": 1707785277.16
145
- },
146
- {
147
- "model_a": "MagicBrush",
148
- "model_b": "SDEdit",
149
- "winner": "model_a",
150
- "judge": "arena_user_::1",
151
- "anony": true,
152
- "tstamp": 1707795299.0619
153
- },
154
- {
155
- "model_a": "MagicBrush",
156
- "model_b": "SDEdit",
157
- "winner": "tie (bothbad)",
158
- "judge": "arena_user_::1",
159
- "anony": true,
160
- "tstamp": 1707795798.752
161
- },
162
- {
163
- "model_a": "SDEdit",
164
- "model_b": "Prompt2prompt",
165
- "winner": "model_b",
166
- "judge": "arena_user_::1",
167
- "anony": false,
168
- "tstamp": 1707796435.7996
169
- },
170
- {
171
- "model_a": "SDEdit",
172
- "model_b": "CycleDiffusion",
173
- "winner": "model_b",
174
- "judge": "arena_user_::1",
175
- "anony": false,
176
- "tstamp": 1707797278.7369
177
- },
178
- {
179
- "model_a": "SDEdit",
180
- "model_b": "CycleDiffusion",
181
- "winner": "model_a",
182
- "judge": "arena_user_::1",
183
- "anony": false,
184
- "tstamp": 1707797279.6004
185
- },
186
- {
187
- "model_a": "SDEdit",
188
- "model_b": "Prompt2prompt",
189
- "winner": "model_b",
190
- "judge": "arena_user_::1",
191
- "anony": true,
192
- "tstamp": 1707805086.9739
193
- },
194
- {
195
- "model_a": "PNP",
196
- "model_b": "SDEdit",
197
- "winner": "model_a",
198
- "judge": "arena_user_::1",
199
- "anony": true,
200
- "tstamp": 1707805220.3253
201
- },
202
- {
203
- "model_a": "InstructPix2Pix",
204
- "model_b": "CycleDiffusion",
205
- "winner": "tie (bothbad)",
206
- "judge": "arena_user_::1",
207
- "anony": true,
208
- "tstamp": 1707805332.6322
209
- },
210
- {
211
- "model_a": "InstructPix2Pix",
212
- "model_b": "Prompt2prompt",
213
- "winner": "model_b",
214
- "judge": "arena_user_::1",
215
- "anony": true,
216
- "tstamp": 1707805476.0509
217
- },
218
- {
219
- "model_a": "InstructPix2Pix",
220
- "model_b": "Prompt2prompt",
221
- "winner": "model_b",
222
- "judge": "arena_user_::1",
223
- "anony": true,
224
- "tstamp": 1707818374.3438
225
- },
226
- {
227
- "model_a": "PNP",
228
- "model_b": "Prompt2prompt",
229
- "winner": "model_b",
230
- "judge": "arena_user_::1",
231
- "anony": true,
232
- "tstamp": 1707834631.9088
233
- },
234
- {
235
- "model_a": "InstructPix2Pix",
236
- "model_b": "SDEdit",
237
- "winner": "model_a",
238
- "judge": "arena_user_::1",
239
- "anony": true,
240
- "tstamp": 1707834954.0147
241
- },
242
- {
243
- "model_a": "Prompt2prompt",
244
- "model_b": "Pix2PixZero",
245
- "winner": "tie (bothbad)",
246
- "judge": "arena_user_::1",
247
- "anony": true,
248
- "tstamp": 1707835366.544
249
- },
250
- {
251
- "model_a": "PNP",
252
- "model_b": "SDEdit",
253
- "winner": "model_a",
254
- "judge": "arena_user_::1",
255
- "anony": true,
256
- "tstamp": 1707835643.6178
257
- },
258
- {
259
- "model_a": "MagicBrush",
260
- "model_b": "InstructPix2Pix",
261
- "winner": "tie (bothbad)",
262
- "judge": "arena_user_::1",
263
- "anony": true,
264
- "tstamp": 1707835789.25
265
- },
266
- {
267
- "model_a": "MagicBrush",
268
- "model_b": "PNP",
269
- "winner": "tie (bothbad)",
270
- "judge": "arena_user_::1",
271
- "anony": true,
272
- "tstamp": 1707836852.671
273
- },
274
- {
275
- "model_a": "MagicBrush",
276
- "model_b": "InstructPix2Pix",
277
- "winner": "model_a",
278
- "judge": "arena_user_::1",
279
- "anony": false,
280
- "tstamp": 1707836952.6082
281
- },
282
- {
283
- "model_a": "CycleDiffusion",
284
- "model_b": "SDEdit",
285
- "winner": "tie (bothbad)",
286
- "judge": "arena_user_::1",
287
- "anony": false,
288
- "tstamp": 1707837020.7148
289
- },
290
- {
291
- "model_a": "InstructPix2Pix",
292
- "model_b": "PNP",
293
- "winner": "model_a",
294
- "judge": "arena_user_::1",
295
- "anony": true,
296
- "tstamp": 1707837226.2259
297
- },
298
- {
299
- "model_a": "Prompt2prompt",
300
- "model_b": "Pix2PixZero",
301
- "winner": "model_a",
302
- "judge": "arena_user_::1",
303
- "anony": true,
304
- "tstamp": 1707838166.1449
305
- },
306
- {
307
- "model_a": "InstructPix2Pix",
308
- "model_b": "MagicBrush",
309
- "winner": "tie (bothbad)",
310
- "judge": "arena_user_::1",
311
- "anony": true,
312
- "tstamp": 1707838405.0013
313
- },
314
- {
315
- "model_a": "MagicBrush",
316
- "model_b": "CycleDiffusion",
317
- "winner": "model_a",
318
- "judge": "arena_user_::1",
319
- "anony": true,
320
- "tstamp": 1707839133.3126
321
- },
322
- {
323
- "model_a": "Prompt2prompt",
324
- "model_b": "InstructPix2Pix",
325
- "winner": "model_a",
326
- "judge": "arena_user_::1",
327
- "anony": true,
328
- "tstamp": 1707839484.6824
329
- },
330
- {
331
- "model_a": "PNP",
332
- "model_b": "InstructPix2Pix",
333
- "winner": "tie (bothbad)",
334
- "judge": "arena_user_::1",
335
- "anony": true,
336
- "tstamp": 1707850104.2499
337
- },
338
- {
339
- "model_a": "InstructPix2Pix",
340
- "model_b": "Pix2PixZero",
341
- "winner": "model_a",
342
- "judge": "arena_user_::1",
343
- "anony": true,
344
- "tstamp": 1707851384.7689
345
- },
346
- {
347
- "model_a": "PNP",
348
- "model_b": "MagicBrush",
349
- "winner": "model_b",
350
- "judge": "arena_user_::1",
351
- "anony": true,
352
- "tstamp": 1707851936.9466
353
- },
354
- {
355
- "model_a": "CycleDiffusion",
356
- "model_b": "MagicBrush",
357
- "winner": "tie (bothbad)",
358
- "judge": "arena_user_::1",
359
- "anony": true,
360
- "tstamp": 1707852836.3291
361
- },
362
- {
363
- "model_a": "CycleDiffusion",
364
- "model_b": "MagicBrush",
365
- "winner": "tie (bothbad)",
366
- "judge": "arena_user_::1",
367
- "anony": false,
368
- "tstamp": 1707852878.673
369
- },
370
- {
371
- "model_a": "Prompt2prompt",
372
- "model_b": "InstructPix2Pix",
373
- "winner": "model_a",
374
- "judge": "arena_user_::1",
375
- "anony": true,
376
- "tstamp": 1707853008.1359
377
- },
378
- {
379
- "model_a": "InstructPix2Pix",
380
- "model_b": "Pix2PixZero",
381
- "winner": "model_a",
382
- "judge": "arena_user_::1",
383
- "anony": false,
384
- "tstamp": 1707856807.6229
385
- },
386
- {
387
- "model_a": "MagicBrush",
388
- "model_b": "Pix2PixZero",
389
- "winner": "tie (bothbad)",
390
- "judge": "arena_user_::1",
391
- "anony": false,
392
- "tstamp": 1707863740.3507
393
- },
394
- {
395
- "model_a": "MagicBrush",
396
- "model_b": "PNP",
397
- "winner": "model_b",
398
- "judge": "arena_user_::1",
399
- "anony": true,
400
- "tstamp": 1707866312.1118
401
- },
402
- {
403
- "model_a": "Pix2PixZero",
404
- "model_b": "Prompt2prompt",
405
- "winner": "model_b",
406
- "judge": "arena_user_::1",
407
- "anony": true,
408
- "tstamp": 1707883083.3533
409
- },
410
- {
411
- "model_a": "Pix2PixZero",
412
- "model_b": "InstructPix2Pix",
413
- "winner": "model_b",
414
- "judge": "arena_user_::1",
415
- "anony": true,
416
- "tstamp": 1707883181.1397
417
- },
418
- {
419
- "model_a": "Pix2PixZero",
420
- "model_b": "Prompt2prompt",
421
- "winner": "model_b",
422
- "judge": "arena_user_::1",
423
- "anony": true,
424
- "tstamp": 1707883187.9173
425
- },
426
- {
427
- "model_a": "PNP",
428
- "model_b": "Prompt2prompt",
429
- "winner": "model_a",
430
- "judge": "arena_user_::1",
431
- "anony": true,
432
- "tstamp": 1707883507.587
433
- },
434
- {
435
- "model_a": "Prompt2prompt",
436
- "model_b": "CycleDiffusion",
437
- "winner": "model_a",
438
- "judge": "arena_user_::1",
439
- "anony": true,
440
- "tstamp": 1707883939.6125
441
- },
442
- {
443
- "model_a": "Prompt2prompt",
444
- "model_b": "MagicBrush",
445
- "winner": "model_b",
446
- "judge": "arena_user_::1",
447
- "anony": true,
448
- "tstamp": 1707892689.4407
449
- },
450
- {
451
- "model_a": "MagicBrush",
452
- "model_b": "InstructPix2Pix",
453
- "winner": "model_b",
454
- "judge": "arena_user_::1",
455
- "anony": true,
456
- "tstamp": 1707908988.749
457
- },
458
- {
459
- "model_a": "Prompt2prompt",
460
- "model_b": "InstructPix2Pix",
461
- "winner": "model_a",
462
- "judge": "arena_user_::1",
463
- "anony": true,
464
- "tstamp": 1707912639.2701
465
- },
466
- {
467
- "model_a": "MagicBrush",
468
- "model_b": "Pix2PixZero",
469
- "winner": "model_a",
470
- "judge": "arena_user_::1",
471
- "anony": false,
472
- "tstamp": 1707917685.9574
473
- },
474
- {
475
- "model_a": "MagicBrush",
476
- "model_b": "InstructPix2Pix",
477
- "winner": "tie (bothbad)",
478
- "judge": "arena_user_::1",
479
- "anony": false,
480
- "tstamp": 1707919429.336
481
- },
482
- {
483
- "model_a": "InstructPix2Pix",
484
- "model_b": "CycleDiffusion",
485
- "winner": "model_a",
486
- "judge": "arena_user_::1",
487
- "anony": true,
488
- "tstamp": 1707932651.9192
489
- },
490
- {
491
- "model_a": "MagicBrush",
492
- "model_b": "InstructPix2Pix",
493
- "winner": "model_a",
494
- "judge": "arena_user_::1",
495
- "anony": true,
496
- "tstamp": 1707932749.3107
497
- },
498
- {
499
- "model_a": "Prompt2prompt",
500
- "model_b": "PNP",
501
- "winner": "model_a",
502
- "judge": "arena_user_::1",
503
- "anony": true,
504
- "tstamp": 1707933208.5797
505
- },
506
- {
507
- "model_a": "MagicBrush",
508
- "model_b": "Pix2PixZero",
509
- "winner": "model_a",
510
- "judge": "arena_user_::1",
511
- "anony": false,
512
- "tstamp": 1707945335.6341
513
- },
514
- {
515
- "model_a": "MagicBrush",
516
- "model_b": "PNP",
517
- "winner": "model_a",
518
- "judge": "arena_user_::1",
519
- "anony": false,
520
- "tstamp": 1708031168.6838
521
- },
522
- {
523
- "model_a": "Pix2PixZero",
524
- "model_b": "PNP",
525
- "winner": "model_b",
526
- "judge": "arena_user_::1",
527
- "anony": false,
528
- "tstamp": 1708038931.5388
529
- },
530
- {
531
- "model_a": "Pix2PixZero",
532
- "model_b": "CycleDiffusion",
533
- "winner": "tie (bothbad)",
534
- "judge": "arena_user_::1",
535
- "anony": true,
536
- "tstamp": 1708057382.78
537
- },
538
- {
539
- "model_a": "PNP",
540
- "model_b": "InstructPix2Pix",
541
- "winner": "model_b",
542
- "judge": "arena_user_::1",
543
- "anony": true,
544
- "tstamp": 1708093689.8237
545
- },
546
- {
547
- "model_a": "MagicBrush",
548
- "model_b": "PNP",
549
- "winner": "model_b",
550
- "judge": "arena_user_::1",
551
- "anony": true,
552
- "tstamp": 1708093910.4683
553
- },
554
- {
555
- "model_a": "Pix2PixZero",
556
- "model_b": "Prompt2prompt",
557
- "winner": "model_b",
558
- "judge": "arena_user_::1",
559
- "anony": false,
560
- "tstamp": 1708095090.8232
561
- },
562
- {
563
- "model_a": "Pix2PixZero",
564
- "model_b": "Prompt2prompt",
565
- "winner": "model_a",
566
- "judge": "arena_user_::1",
567
- "anony": false,
568
- "tstamp": 1708095305.4665
569
- },
570
- {
571
- "model_a": "InstructPix2Pix",
572
- "model_b": "Prompt2prompt",
573
- "winner": "model_b",
574
- "judge": "arena_user_::1",
575
- "anony": true,
576
- "tstamp": 1708140553.1694
577
- },
578
- {
579
- "model_a": "MagicBrush",
580
- "model_b": "Prompt2prompt",
581
- "winner": "model_a",
582
- "judge": "arena_user_::1",
583
- "anony": true,
584
- "tstamp": 1708145512.3656
585
- },
586
- {
587
- "model_a": "Pix2PixZero",
588
- "model_b": "Prompt2prompt",
589
- "winner": "tie (bothbad)",
590
- "judge": "arena_user_::1",
591
- "anony": true,
592
- "tstamp": 1708145724.4127
593
- },
594
- {
595
- "model_a": "Pix2PixZero",
596
- "model_b": "PNP",
597
- "winner": "model_b",
598
- "judge": "arena_user_::1",
599
- "anony": true,
600
- "tstamp": 1708146846.5098
601
- },
602
- {
603
- "model_a": "PNP",
604
- "model_b": "MagicBrush",
605
- "winner": "model_a",
606
- "judge": "arena_user_::1",
607
- "anony": true,
608
- "tstamp": 1708189738.4864
609
- },
610
- {
611
- "model_a": "Prompt2prompt",
612
- "model_b": "InstructPix2Pix",
613
- "winner": "model_b",
614
- "judge": "arena_user_::1",
615
- "anony": true,
616
- "tstamp": 1708235874.9246
617
- },
618
- {
619
- "model_a": "Pix2PixZero",
620
- "model_b": "PNP",
621
- "winner": "model_b",
622
- "judge": "arena_user_::1",
623
- "anony": false,
624
- "tstamp": 1708257619.7115
625
- },
626
- {
627
- "model_a": "MagicBrush",
628
- "model_b": "Pix2PixZero",
629
- "winner": "tie (bothbad)",
630
- "judge": "arena_user_::1",
631
- "anony": true,
632
- "tstamp": 1708341265.7655
633
- },
634
- {
635
- "model_a": "MagicBrush",
636
- "model_b": "InstructPix2Pix",
637
- "winner": "model_b",
638
- "judge": "arena_user_::1",
639
- "anony": true,
640
- "tstamp": 1708350183.3086
641
- },
642
- {
643
- "model_a": "MagicBrush",
644
- "model_b": "Pix2PixZero",
645
- "winner": "tie (bothbad)",
646
- "judge": "arena_user_::1",
647
- "anony": true,
648
- "tstamp": 1708399707.1681
649
- },
650
- {
651
- "model_a": "PNP",
652
- "model_b": "MagicBrush",
653
- "winner": "model_a",
654
- "judge": "arena_user_::1",
655
- "anony": true,
656
- "tstamp": 1708441502.4707
657
- },
658
- {
659
- "model_a": "InstructPix2Pix",
660
- "model_b": "MagicBrush",
661
- "winner": "model_a",
662
- "judge": "arena_user_::1",
663
- "anony": true,
664
- "tstamp": 1708441716.8195
665
- },
666
- {
667
- "model_a": "InstructPix2Pix",
668
- "model_b": "MagicBrush",
669
- "winner": "model_b",
670
- "judge": "arena_user_::1",
671
- "anony": false,
672
- "tstamp": 1708546759.2009
673
- },
674
- {
675
- "model_a": "InstructPix2Pix",
676
- "model_b": "MagicBrush",
677
- "winner": "model_a",
678
- "judge": "arena_user_::1",
679
- "anony": false,
680
- "tstamp": 1708546805.4892
681
- },
682
- {
683
- "model_a": "Pix2PixZero",
684
- "model_b": "CycleDiffusion",
685
- "winner": "tie (bothbad)",
686
- "judge": "arena_user_::1",
687
- "anony": true,
688
- "tstamp": 1708547082.7124
689
- },
690
- {
691
- "model_a": "InstructPix2Pix",
692
- "model_b": "MagicBrush",
693
- "winner": "model_b",
694
- "judge": "arena_user_::1",
695
- "anony": false,
696
- "tstamp": 1708547166.9685
697
- },
698
- {
699
- "model_a": "InstructPix2Pix",
700
- "model_b": "MagicBrush",
701
- "winner": "model_b",
702
- "judge": "arena_user_::1",
703
- "anony": false,
704
- "tstamp": 1708547293.7107
705
- },
706
- {
707
- "model_a": "CycleDiffusion",
708
- "model_b": "PNP",
709
- "winner": "tie (bothbad)",
710
- "judge": "arena_user_::1",
711
- "anony": true,
712
- "tstamp": 1708575046.0529
713
- },
714
- {
715
- "model_a": "CycleDiffusion",
716
- "model_b": "MagicBrush",
717
- "winner": "tie (bothbad)",
718
- "judge": "arena_user_::1",
719
- "anony": true,
720
- "tstamp": 1708615466.9264
721
- },
722
- {
723
- "model_a": "CycleDiffusion",
724
- "model_b": "MagicBrush",
725
- "winner": "model_b",
726
- "judge": "arena_user_::1",
727
- "anony": false,
728
- "tstamp": 1708615516.3341
729
- },
730
- {
731
- "model_a": "InstructPix2Pix",
732
- "model_b": "PNP",
733
- "winner": "model_b",
734
- "judge": "arena_user_::1",
735
- "anony": false,
736
- "tstamp": 1709205399.0098
737
- },
738
- {
739
- "model_a": "InstructPix2Pix",
740
- "model_b": "PNP",
741
- "winner": "model_b",
742
- "judge": "arena_user_::1",
743
- "anony": false,
744
- "tstamp": 1709205767.8923
745
- },
746
- {
747
- "model_a": "PNP",
748
- "model_b": "InstructPix2Pix",
749
- "winner": "model_b",
750
- "judge": "arena_user_::1",
751
- "anony": true,
752
- "tstamp": 1709443700.05
753
- },
754
- {
755
- "model_a": "MagicBrush",
756
- "model_b": "Pix2PixZero",
757
- "winner": "model_a",
758
- "judge": "arena_user_::1",
759
- "anony": true,
760
- "tstamp": 1709702898.9291
761
- },
762
- {
763
- "model_a": "CycleDiffusion",
764
- "model_b": "Prompt2prompt",
765
- "winner": "tie (bothbad)",
766
- "judge": "arena_user_::1",
767
- "anony": true,
768
- "tstamp": 1710091925.1861
769
- },
770
- {
771
- "model_a": "MagicBrush",
772
- "model_b": "InstructPix2Pix",
773
- "winner": "tie (bothbad)",
774
- "judge": "arena_user_::1",
775
- "anony": true,
776
- "tstamp": 1710517781.1525
777
- },
778
- {
779
- "model_a": "MagicBrush",
780
- "model_b": "InstructPix2Pix",
781
- "winner": "tie (bothbad)",
782
- "judge": "arena_user_::1",
783
- "anony": false,
784
- "tstamp": 1710517859.2942
785
- },
786
- {
787
- "model_a": "Pix2PixZero",
788
- "model_b": "CycleDiffusion",
789
- "winner": "tie (bothbad)",
790
- "judge": "arena_user_::1",
791
- "anony": true,
792
- "tstamp": 1710535672.9791
793
- },
794
- {
795
- "model_a": "InfEdit",
796
- "model_b": "MagicBrush",
797
- "winner": "model_a",
798
- "judge": "arena_user_10.16.25.191",
799
- "anony": false,
800
- "tstamp": 1714359818.6646
801
- },
802
- {
803
- "model_a": "InstructPix2Pix",
804
- "model_b": "Prompt2prompt",
805
- "winner": "tie (bothbad)",
806
- "judge": "arena_user_10.16.25.191",
807
- "anony": true,
808
- "tstamp": 1714363016.9972
809
- },
810
- {
811
- "model_a": "InfEdit",
812
- "model_b": "CosXLEdit",
813
- "winner": "model_a",
814
- "judge": "arena_user_10.16.25.191",
815
- "anony": true,
816
- "tstamp": 1714715956.3416
817
- },
818
- {
819
- "model_a": "Pix2PixZero",
820
- "model_b": "Prompt2prompt",
821
- "winner": "tie (bothbad)",
822
- "judge": "arena_user_10.16.2.201",
823
- "anony": false,
824
- "tstamp": 1714759928.3804
825
- },
826
- {
827
- "model_a": "PNP",
828
- "model_b": "InstructPix2Pix",
829
- "winner": "model_a",
830
- "judge": "arena_user_10.16.17.217",
831
- "anony": true,
832
- "tstamp": 1715246275.0118
833
- },
834
- {
835
- "model_a": "SDEdit",
836
- "model_b": "CosXLEdit",
837
- "winner": "tie (bothbad)",
838
- "judge": "arena_user_10.16.15.199",
839
- "anony": true,
840
- "tstamp": 1715247590.2235
841
- },
842
- {
843
- "model_a": "CycleDiffusion",
844
- "model_b": "CosXLEdit",
845
- "winner": "model_b",
846
- "judge": "arena_user_10.16.41.118",
847
- "anony": false,
848
- "tstamp": 1715406266.2562
849
- },
850
- {
851
- "model_a": "CycleDiffusion",
852
- "model_b": "CosXLEdit",
853
- "winner": "model_a",
854
- "judge": "arena_user_10.16.41.118",
855
- "anony": false,
856
- "tstamp": 1715406354.5284
857
- },
858
- {
859
- "model_a": "CycleDiffusion",
860
- "model_b": "CosXLEdit",
861
- "winner": "model_b",
862
- "judge": "arena_user_10.16.2.201",
863
- "anony": false,
864
- "tstamp": 1715406371.8227
865
- },
866
- {
867
- "model_a": "CycleDiffusion",
868
- "model_b": "CosXLEdit",
869
- "winner": "model_b",
870
- "judge": "arena_user_10.16.41.118",
871
- "anony": false,
872
- "tstamp": 1715406418.5066
873
- },
874
- {
875
- "model_a": "CycleDiffusion",
876
- "model_b": "CosXLEdit",
877
- "winner": "model_b",
878
- "judge": "arena_user_10.16.25.191",
879
- "anony": false,
880
- "tstamp": 1715406449.9401
881
- },
882
- {
883
- "model_a": "CycleDiffusion",
884
- "model_b": "CosXLEdit",
885
- "winner": "model_b",
886
- "judge": "arena_user_10.16.41.118",
887
- "anony": false,
888
- "tstamp": 1715406466.5778
889
- },
890
- {
891
- "model_a": "InfEdit",
892
- "model_b": "CycleDiffusion",
893
- "winner": "model_a",
894
- "judge": "arena_user_10.16.2.201",
895
- "anony": true,
896
- "tstamp": 1715620708.6361
897
- },
898
- {
899
- "model_a": "Prompt2prompt",
900
- "model_b": "CosXLEdit",
901
- "winner": "model_a",
902
- "judge": "arena_user_10.16.41.118",
903
- "anony": false,
904
- "tstamp": 1715621013.5373
905
- },
906
- {
907
- "model_a": "MagicBrush",
908
- "model_b": "CycleDiffusion",
909
- "winner": "tie (bothbad)",
910
- "judge": "arena_user_10.16.2.201",
911
- "anony": true,
912
- "tstamp": 1715661224.0507
913
- },
914
- {
915
- "model_a": "SDEdit",
916
- "model_b": "PNP",
917
- "winner": "tie (bothbad)",
918
- "judge": "arena_user_10.16.41.118",
919
- "anony": true,
920
- "tstamp": 1715661259.6143
921
- },
922
- {
923
- "model_a": "Pix2PixZero",
924
- "model_b": "Prompt2prompt",
925
- "winner": "tie (bothbad)",
926
- "judge": "arena_user_10.16.41.118",
927
- "anony": true,
928
- "tstamp": 1715661288.6018
929
- },
930
- {
931
- "model_a": "InstructPix2Pix",
932
- "model_b": "Prompt2prompt",
933
- "winner": "model_b",
934
- "judge": "arena_user_10.16.25.191",
935
- "anony": true,
936
- "tstamp": 1715661310.3621
937
- },
938
- {
939
- "model_a": "CosXLEdit",
940
- "model_b": "InstructPix2Pix",
941
- "winner": "tie (bothbad)",
942
- "judge": "arena_user_10.16.25.191",
943
- "anony": true,
944
- "tstamp": 1715718742.1258
945
- },
946
- {
947
- "model_a": "MagicBrush",
948
- "model_b": "PNP",
949
- "winner": "model_a",
950
- "judge": "arena_user_10.16.2.201",
951
- "anony": true,
952
- "tstamp": 1715718773.1054
953
- },
954
- {
955
- "model_a": "SDEdit",
956
- "model_b": "CosXLEdit",
957
- "winner": "tie (bothbad)",
958
- "judge": "arena_user_10.16.2.201",
959
- "anony": true,
960
- "tstamp": 1715718785.2832
961
- },
962
- {
963
- "model_a": "InstructPix2Pix",
964
- "model_b": "SDEdit",
965
- "winner": "tie (bothbad)",
966
- "judge": "arena_user_10.16.2.201",
967
- "anony": true,
968
- "tstamp": 1715718804.143
969
- },
970
- {
971
- "model_a": "InfEdit",
972
- "model_b": "CosXLEdit",
973
- "winner": "model_b",
974
- "judge": "arena_user_10.16.25.191",
975
- "anony": true,
976
- "tstamp": 1715718826.0248
977
- },
978
- {
979
- "model_a": "InfEdit",
980
- "model_b": "Prompt2prompt",
981
- "winner": "model_a",
982
- "judge": "arena_user_10.16.2.201",
983
- "anony": true,
984
- "tstamp": 1715718869.0041
985
- },
986
- {
987
- "model_a": "InfEdit",
988
- "model_b": "CosXLEdit",
989
- "winner": "model_b",
990
- "judge": "arena_user_10.16.2.201",
991
- "anony": true,
992
- "tstamp": 1715718904.9307
993
- },
994
- {
995
- "model_a": "Prompt2prompt",
996
- "model_b": "Pix2PixZero",
997
- "winner": "model_a",
998
- "judge": "arena_user_10.16.25.191",
999
- "anony": true,
1000
- "tstamp": 1715718933.1272
1001
- },
1002
- {
1003
- "model_a": "Pix2PixZero",
1004
- "model_b": "MagicBrush",
1005
- "winner": "tie (bothbad)",
1006
- "judge": "arena_user_10.16.2.201",
1007
- "anony": true,
1008
- "tstamp": 1715718954.8497
1009
- },
1010
- {
1011
- "model_a": "MagicBrush",
1012
- "model_b": "PNP",
1013
- "winner": "model_b",
1014
- "judge": "arena_user_10.16.25.191",
1015
- "anony": true,
1016
- "tstamp": 1715718966.8633
1017
- },
1018
- {
1019
- "model_a": "CycleDiffusion",
1020
- "model_b": "Prompt2prompt",
1021
- "winner": "tie (bothbad)",
1022
- "judge": "arena_user_10.16.25.191",
1023
- "anony": true,
1024
- "tstamp": 1715719000.6673
1025
- },
1026
- {
1027
- "model_a": "MagicBrush",
1028
- "model_b": "Pix2PixZero",
1029
- "winner": "tie (bothbad)",
1030
- "judge": "arena_user_10.16.25.191",
1031
- "anony": true,
1032
- "tstamp": 1715719019.5495
1033
- },
1034
- {
1035
- "model_a": "InfEdit",
1036
- "model_b": "Prompt2prompt",
1037
- "winner": "model_a",
1038
- "judge": "arena_user_10.16.25.191",
1039
- "anony": true,
1040
- "tstamp": 1715719035.903
1041
- },
1042
- {
1043
- "model_a": "MagicBrush",
1044
- "model_b": "Pix2PixZero",
1045
- "winner": "model_a",
1046
- "judge": "arena_user_10.16.25.191",
1047
- "anony": true,
1048
- "tstamp": 1715719046.925
1049
- },
1050
- {
1051
- "model_a": "CycleDiffusion",
1052
- "model_b": "CosXLEdit",
1053
- "winner": "tie (bothbad)",
1054
- "judge": "arena_user_10.16.2.201",
1055
- "anony": true,
1056
- "tstamp": 1715719059.6291
1057
- },
1058
- {
1059
- "model_a": "Prompt2prompt",
1060
- "model_b": "SDEdit",
1061
- "winner": "tie (bothbad)",
1062
- "judge": "arena_user_10.16.15.199",
1063
- "anony": true,
1064
- "tstamp": 1715719076.6727
1065
- },
1066
- {
1067
- "model_a": "MagicBrush",
1068
- "model_b": "PNP",
1069
- "winner": "model_a",
1070
- "judge": "arena_user_10.16.25.191",
1071
- "anony": true,
1072
- "tstamp": 1715719086.7836
1073
- },
1074
- {
1075
- "model_a": "CycleDiffusion",
1076
- "model_b": "MagicBrush",
1077
- "winner": "model_b",
1078
- "judge": "arena_user_10.16.25.191",
1079
- "anony": true,
1080
- "tstamp": 1715719109.8071
1081
- },
1082
- {
1083
- "model_a": "Prompt2prompt",
1084
- "model_b": "InstructPix2Pix",
1085
- "winner": "model_b",
1086
- "judge": "arena_user_10.16.25.191",
1087
- "anony": true,
1088
- "tstamp": 1715719122.8237
1089
- },
1090
- {
1091
- "model_a": "MagicBrush",
1092
- "model_b": "SDEdit",
1093
- "winner": "model_a",
1094
- "judge": "arena_user_10.16.15.199",
1095
- "anony": true,
1096
- "tstamp": 1715719134.1345
1097
- },
1098
- {
1099
- "model_a": "SDEdit",
1100
- "model_b": "CycleDiffusion",
1101
- "winner": "tie (bothbad)",
1102
- "judge": "arena_user_10.16.17.217",
1103
- "anony": true,
1104
- "tstamp": 1715719153.4359
1105
- },
1106
- {
1107
- "model_a": "Pix2PixZero",
1108
- "model_b": "MagicBrush",
1109
- "winner": "tie (bothbad)",
1110
- "judge": "arena_user_10.16.17.217",
1111
- "anony": true,
1112
- "tstamp": 1715719160.5285
1113
- },
1114
- {
1115
- "model_a": "MagicBrush",
1116
- "model_b": "InstructPix2Pix",
1117
- "winner": "model_b",
1118
- "judge": "arena_user_10.16.15.199",
1119
- "anony": true,
1120
- "tstamp": 1715719171.4473
1121
- },
1122
- {
1123
- "model_a": "InstructPix2Pix",
1124
- "model_b": "SDEdit",
1125
- "winner": "tie (bothbad)",
1126
- "judge": "arena_user_10.16.2.201",
1127
- "anony": true,
1128
- "tstamp": 1715719184.6227
1129
- },
1130
- {
1131
- "model_a": "CosXLEdit",
1132
- "model_b": "MagicBrush",
1133
- "winner": "model_a",
1134
- "judge": "arena_user_10.16.2.201",
1135
- "anony": true,
1136
- "tstamp": 1715719210.0429
1137
- },
1138
- {
1139
- "model_a": "CycleDiffusion",
1140
- "model_b": "MagicBrush",
1141
- "winner": "model_b",
1142
- "judge": "arena_user_10.16.41.118",
1143
- "anony": true,
1144
- "tstamp": 1715719219.6447
1145
- },
1146
- {
1147
- "model_a": "PNP",
1148
- "model_b": "Pix2PixZero",
1149
- "winner": "tie (bothbad)",
1150
- "judge": "arena_user_10.16.41.118",
1151
- "anony": true,
1152
- "tstamp": 1715719237.7036
1153
- },
1154
- {
1155
- "model_a": "PNP",
1156
- "model_b": "CycleDiffusion",
1157
- "winner": "tie (bothbad)",
1158
- "judge": "arena_user_10.16.25.191",
1159
- "anony": true,
1160
- "tstamp": 1715719249.4321
1161
- },
1162
- {
1163
- "model_a": "Prompt2prompt",
1164
- "model_b": "Pix2PixZero",
1165
- "winner": "model_a",
1166
- "judge": "arena_user_10.16.2.201",
1167
- "anony": true,
1168
- "tstamp": 1715719257.5877
1169
- },
1170
- {
1171
- "model_a": "CosXLEdit",
1172
- "model_b": "Pix2PixZero",
1173
- "winner": "tie (bothbad)",
1174
- "judge": "arena_user_10.16.25.191",
1175
- "anony": true,
1176
- "tstamp": 1715719273.7637
1177
- },
1178
- {
1179
- "model_a": "PNP",
1180
- "model_b": "CosXLEdit",
1181
- "winner": "model_b",
1182
- "judge": "arena_user_10.16.17.217",
1183
- "anony": true,
1184
- "tstamp": 1715719288.4629
1185
- },
1186
- {
1187
- "model_a": "Pix2PixZero",
1188
- "model_b": "PNP",
1189
- "winner": "model_b",
1190
- "judge": "arena_user_10.16.41.118",
1191
- "anony": true,
1192
- "tstamp": 1715719299.1712
1193
- },
1194
- {
1195
- "model_a": "PNP",
1196
- "model_b": "MagicBrush",
1197
- "winner": "model_b",
1198
- "judge": "arena_user_10.16.2.201",
1199
- "anony": true,
1200
- "tstamp": 1715719306.5928
1201
- },
1202
- {
1203
- "model_a": "InstructPix2Pix",
1204
- "model_b": "PNP",
1205
- "winner": "tie (bothbad)",
1206
- "judge": "arena_user_10.16.15.199",
1207
- "anony": true,
1208
- "tstamp": 1715719356.0694
1209
- },
1210
- {
1211
- "model_a": "Prompt2prompt",
1212
- "model_b": "CosXLEdit",
1213
- "winner": "model_a",
1214
- "judge": "arena_user_10.16.25.191",
1215
- "anony": true,
1216
- "tstamp": 1715719368.0491
1217
- },
1218
- {
1219
- "model_a": "Prompt2prompt",
1220
- "model_b": "CycleDiffusion",
1221
- "winner": "tie (bothbad)",
1222
- "judge": "arena_user_10.16.41.118",
1223
- "anony": true,
1224
- "tstamp": 1715719379.185
1225
- },
1226
- {
1227
- "model_a": "CycleDiffusion",
1228
- "model_b": "Prompt2prompt",
1229
- "winner": "tie (bothbad)",
1230
- "judge": "arena_user_10.16.2.201",
1231
- "anony": true,
1232
- "tstamp": 1715719389.0771
1233
- },
1234
- {
1235
- "model_a": "Pix2PixZero",
1236
- "model_b": "MagicBrush",
1237
- "winner": "model_b",
1238
- "judge": "arena_user_10.16.17.217",
1239
- "anony": true,
1240
- "tstamp": 1715719397.7162
1241
- },
1242
- {
1243
- "model_a": "PNP",
1244
- "model_b": "InstructPix2Pix",
1245
- "winner": "tie (bothbad)",
1246
- "judge": "arena_user_10.16.2.201",
1247
- "anony": true,
1248
- "tstamp": 1715719406.4165
1249
- },
1250
- {
1251
- "model_a": "Pix2PixZero",
1252
- "model_b": "PNP",
1253
- "winner": "model_b",
1254
- "judge": "arena_user_10.16.25.191",
1255
- "anony": true,
1256
- "tstamp": 1715719429.1002
1257
- },
1258
- {
1259
- "model_a": "CosXLEdit",
1260
- "model_b": "MagicBrush",
1261
- "winner": "model_a",
1262
- "judge": "arena_user_10.16.2.201",
1263
- "anony": true,
1264
- "tstamp": 1715719435.4694
1265
- },
1266
- {
1267
- "model_a": "PNP",
1268
- "model_b": "SDEdit",
1269
- "winner": "tie (bothbad)",
1270
- "judge": "arena_user_10.16.2.201",
1271
- "anony": true,
1272
- "tstamp": 1715719454.4526
1273
- },
1274
- {
1275
- "model_a": "InfEdit",
1276
- "model_b": "PNP",
1277
- "winner": "tie (bothbad)",
1278
- "judge": "arena_user_10.16.17.217",
1279
- "anony": true,
1280
- "tstamp": 1715719470.154
1281
- },
1282
- {
1283
- "model_a": "MagicBrush",
1284
- "model_b": "PNP",
1285
- "winner": "model_a",
1286
- "judge": "arena_user_10.16.41.118",
1287
- "anony": true,
1288
- "tstamp": 1715719482.3114
1289
- },
1290
- {
1291
- "model_a": "SDEdit",
1292
- "model_b": "PNP",
1293
- "winner": "tie",
1294
- "judge": "arena_user_10.16.2.201",
1295
- "anony": true,
1296
- "tstamp": 1715719499.9643
1297
- },
1298
- {
1299
- "model_a": "InstructPix2Pix",
1300
- "model_b": "MagicBrush",
1301
- "winner": "model_b",
1302
- "judge": "arena_user_10.16.2.201",
1303
- "anony": true,
1304
- "tstamp": 1715719513.7317
1305
- },
1306
- {
1307
- "model_a": "InfEdit",
1308
- "model_b": "PNP",
1309
- "winner": "model_a",
1310
- "judge": "arena_user_10.16.15.199",
1311
- "anony": true,
1312
- "tstamp": 1715719527.69
1313
- },
1314
- {
1315
- "model_a": "Prompt2prompt",
1316
- "model_b": "MagicBrush",
1317
- "winner": "tie (bothbad)",
1318
- "judge": "arena_user_10.16.41.118",
1319
- "anony": true,
1320
- "tstamp": 1715719542.751
1321
- },
1322
- {
1323
- "model_a": "Pix2PixZero",
1324
- "model_b": "InfEdit",
1325
- "winner": "tie (bothbad)",
1326
- "judge": "arena_user_10.16.15.199",
1327
- "anony": true,
1328
- "tstamp": 1715719560.9912
1329
- },
1330
- {
1331
- "model_a": "PNP",
1332
- "model_b": "Pix2PixZero",
1333
- "winner": "tie (bothbad)",
1334
- "judge": "arena_user_10.16.2.201",
1335
- "anony": true,
1336
- "tstamp": 1715719575.3291
1337
- },
1338
- {
1339
- "model_a": "PNP",
1340
- "model_b": "CosXLEdit",
1341
- "winner": "model_b",
1342
- "judge": "arena_user_10.16.17.217",
1343
- "anony": true,
1344
- "tstamp": 1715719581.9552
1345
- },
1346
- {
1347
- "model_a": "Pix2PixZero",
1348
- "model_b": "Prompt2prompt",
1349
- "winner": "tie (bothbad)",
1350
- "judge": "arena_user_10.16.25.191",
1351
- "anony": true,
1352
- "tstamp": 1715719591.9907
1353
- },
1354
- {
1355
- "model_a": "CosXLEdit",
1356
- "model_b": "SDEdit",
1357
- "winner": "model_a",
1358
- "judge": "arena_user_10.16.2.201",
1359
- "anony": true,
1360
- "tstamp": 1715719601.8819
1361
- },
1362
- {
1363
- "model_a": "InfEdit",
1364
- "model_b": "MagicBrush",
1365
- "winner": "model_b",
1366
- "judge": "arena_user_10.16.41.118",
1367
- "anony": true,
1368
- "tstamp": 1715719612.1837
1369
- },
1370
- {
1371
- "model_a": "SDEdit",
1372
- "model_b": "InstructPix2Pix",
1373
- "winner": "tie (bothbad)",
1374
- "judge": "arena_user_10.16.2.201",
1375
- "anony": true,
1376
- "tstamp": 1715719620.469
1377
- },
1378
- {
1379
- "model_a": "InstructPix2Pix",
1380
- "model_b": "MagicBrush",
1381
- "winner": "tie (bothbad)",
1382
- "judge": "arena_user_10.16.41.118",
1383
- "anony": true,
1384
- "tstamp": 1715719627.34
1385
- },
1386
- {
1387
- "model_a": "MagicBrush",
1388
- "model_b": "Prompt2prompt",
1389
- "winner": "model_a",
1390
- "judge": "arena_user_10.16.2.201",
1391
- "anony": true,
1392
- "tstamp": 1715719632.694
1393
- },
1394
- {
1395
- "model_a": "Prompt2prompt",
1396
- "model_b": "SDEdit",
1397
- "winner": "tie (bothbad)",
1398
- "judge": "arena_user_10.16.41.118",
1399
- "anony": true,
1400
- "tstamp": 1715719652.2038
1401
- },
1402
- {
1403
- "model_a": "SDEdit",
1404
- "model_b": "Prompt2prompt",
1405
- "winner": "tie (bothbad)",
1406
- "judge": "arena_user_10.16.25.191",
1407
- "anony": true,
1408
- "tstamp": 1715719661.8855
1409
- },
1410
- {
1411
- "model_a": "CosXLEdit",
1412
- "model_b": "Prompt2prompt",
1413
- "winner": "tie (bothbad)",
1414
- "judge": "arena_user_10.16.2.201",
1415
- "anony": true,
1416
- "tstamp": 1715719677.2949
1417
- },
1418
- {
1419
- "model_a": "MagicBrush",
1420
- "model_b": "Prompt2prompt",
1421
- "winner": "model_a",
1422
- "judge": "arena_user_10.16.2.201",
1423
- "anony": true,
1424
- "tstamp": 1715719687.3022
1425
- },
1426
- {
1427
- "model_a": "SDEdit",
1428
- "model_b": "Prompt2prompt",
1429
- "winner": "model_b",
1430
- "judge": "arena_user_10.16.2.201",
1431
- "anony": true,
1432
- "tstamp": 1715719699.47
1433
- },
1434
- {
1435
- "model_a": "Pix2PixZero",
1436
- "model_b": "InfEdit",
1437
- "winner": "model_b",
1438
- "judge": "arena_user_10.16.2.201",
1439
- "anony": true,
1440
- "tstamp": 1715719706.2375
1441
- },
1442
- {
1443
- "model_a": "CosXLEdit",
1444
- "model_b": "Prompt2prompt",
1445
- "winner": "model_a",
1446
- "judge": "arena_user_10.16.17.217",
1447
- "anony": true,
1448
- "tstamp": 1715719717.3564
1449
- },
1450
- {
1451
- "model_a": "InstructPix2Pix",
1452
- "model_b": "CosXLEdit",
1453
- "winner": "tie (bothbad)",
1454
- "judge": "arena_user_10.16.25.191",
1455
- "anony": true,
1456
- "tstamp": 1715719722.5542
1457
- },
1458
- {
1459
- "model_a": "InfEdit",
1460
- "model_b": "InstructPix2Pix",
1461
- "winner": "tie (bothbad)",
1462
- "judge": "arena_user_10.16.41.118",
1463
- "anony": true,
1464
- "tstamp": 1715719728.5417
1465
- },
1466
- {
1467
- "model_a": "MagicBrush",
1468
- "model_b": "SDEdit",
1469
- "winner": "model_a",
1470
- "judge": "arena_user_10.16.2.201",
1471
- "anony": true,
1472
- "tstamp": 1715719737.2385
1473
- },
1474
- {
1475
- "model_a": "MagicBrush",
1476
- "model_b": "Pix2PixZero",
1477
- "winner": "tie (bothbad)",
1478
- "judge": "arena_user_10.16.15.199",
1479
- "anony": true,
1480
- "tstamp": 1715815138.5243
1481
- },
1482
- {
1483
- "model_a": "CosXLEdit",
1484
- "model_b": "Prompt2prompt",
1485
- "winner": "model_b",
1486
- "judge": "arena_user_10.16.17.217",
1487
- "anony": true,
1488
- "tstamp": 1715815152.0033
1489
- },
1490
- {
1491
- "model_a": "Pix2PixZero",
1492
- "model_b": "Prompt2prompt",
1493
- "winner": "tie (bothbad)",
1494
- "judge": "arena_user_10.16.41.118",
1495
- "anony": true,
1496
- "tstamp": 1715815169.0475
1497
- },
1498
- {
1499
- "model_a": "InstructPix2Pix",
1500
- "model_b": "SDEdit",
1501
- "winner": "model_b",
1502
- "judge": "arena_user_10.16.41.118",
1503
- "anony": true,
1504
- "tstamp": 1715815187.1917
1505
- },
1506
- {
1507
- "model_a": "InstructPix2Pix",
1508
- "model_b": "Pix2PixZero",
1509
- "winner": "tie (bothbad)",
1510
- "judge": "arena_user_10.16.2.201",
1511
- "anony": true,
1512
- "tstamp": 1715815197.5233
1513
- },
1514
- {
1515
- "model_a": "Pix2PixZero",
1516
- "model_b": "SDEdit",
1517
- "winner": "tie (bothbad)",
1518
- "judge": "arena_user_10.16.2.201",
1519
- "anony": true,
1520
- "tstamp": 1715815209.8285
1521
- },
1522
- {
1523
- "model_a": "CycleDiffusion",
1524
- "model_b": "MagicBrush",
1525
- "winner": "model_b",
1526
- "judge": "arena_user_10.16.2.201",
1527
- "anony": true,
1528
- "tstamp": 1715815228.6736
1529
- },
1530
- {
1531
- "model_a": "InfEdit",
1532
- "model_b": "Pix2PixZero",
1533
- "winner": "tie (bothbad)",
1534
- "judge": "arena_user_10.16.2.201",
1535
- "anony": true,
1536
- "tstamp": 1715815236.3935
1537
- },
1538
- {
1539
- "model_a": "SDEdit",
1540
- "model_b": "PNP",
1541
- "winner": "tie (bothbad)",
1542
- "judge": "arena_user_10.16.25.191",
1543
- "anony": true,
1544
- "tstamp": 1715815265.9705
1545
- },
1546
- {
1547
- "model_a": "MagicBrush",
1548
- "model_b": "SDEdit",
1549
- "winner": "tie (bothbad)",
1550
- "judge": "arena_user_10.16.15.199",
1551
- "anony": true,
1552
- "tstamp": 1715815278.5019
1553
- },
1554
- {
1555
- "model_a": "CycleDiffusion",
1556
- "model_b": "CosXLEdit",
1557
- "winner": "tie (bothbad)",
1558
- "judge": "arena_user_10.16.15.199",
1559
- "anony": true,
1560
- "tstamp": 1715815294.5978
1561
- },
1562
- {
1563
- "model_a": "MagicBrush",
1564
- "model_b": "InfEdit",
1565
- "winner": "model_a",
1566
- "judge": "arena_user_10.16.17.217",
1567
- "anony": true,
1568
- "tstamp": 1715815325.4468
1569
- },
1570
- {
1571
- "model_a": "MagicBrush",
1572
- "model_b": "Pix2PixZero",
1573
- "winner": "model_a",
1574
- "judge": "arena_user_10.16.41.118",
1575
- "anony": true,
1576
- "tstamp": 1715913098.6617
1577
- }
1578
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
arena_elo/results/latest/clean_battle_t2i_generation.json DELETED
The diff for this file is too large to render. See raw diff
 
arena_elo/results/latest/elo_results_image_editing.pkl DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:837f11fd6cda1fe2d6a5cc1c239a207725ad0157b16282303cb684427ddc7e9d
3
- size 62484
 
 
 
 
arena_elo/results/latest/elo_results_t2i_generation.pkl DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:93808a9ce2f497109d0fc708e4055b6463a692502ef541ff28352f52b612916d
3
- size 68172
 
 
 
 
arena_elo/results/latest/image_editing_leaderboard.csv DELETED
@@ -1,10 +0,0 @@
1
- key,Model,Arena Elo rating (anony),Arena Elo rating (full),License,Organization,Link
2
- CosXLEdit,CosXLEdit,1097.63559213644,1085.7285800995926,cosxl-nc-community,Stability AI,https://huggingface.co/spaces/multimodalart/cosxl
3
- MagicBrush,MagicBrush,1075.1489922450316,1086.8819832924794,CC-BY-4.0,"The Ohio State University, University of Waterloo",https://osu-nlp-group.github.io/MagicBrush
4
- InfEdit,InfEdit,1065.4719519196174,1090.684638162955,Apache-2.0,"University of Michigan, University of California, Berkeley",https://huggingface.co/spaces/sled-umich/InfEdit
5
- Prompt2prompt,Prompt2prompt,1063.1432047252297,1060.8146250689238,Apache-2.0,"Google, Tel Aviv University",https://prompt-to-prompt.github.io
6
- InstructPix2Pix,InstructPix2Pix,1043.9312648233226,1028.7932718869638,"Copyright 2023 Timothy Brooks, Aleksander Holynski, Alexei A. Efros","University of California, Berkeley",https://www.timothybrooks.com/instruct-pix2pix
7
- PNP,PNP,1022.4342554377677,1043.322342347598,-,Weizmann Institute of Science,https://github.com/MichalGeyer/plug-and-play
8
- Pix2PixZero,Pix2PixZero,891.2979039265506,886.7359371585381,MIT License,"Carnegie Mellon University, Adobe Research",https://pix2pixzero.github.io
9
- SDEdit,SDEdit,890.443823405714,880.5508125882768,MIT License,Stanford University,https://sde-image-editing.github.io
10
- CycleDiffusion,CycleDiffusion,850.4930113803264,836.4878093946726,X11,Carnegie Mellon University,https://github.com/ChenWu98/cycle-diffusion
 
 
 
 
 
 
 
 
 
 
 
arena_elo/results/latest/t2i_generation_leaderboard.csv DELETED
@@ -1,12 +0,0 @@
1
- key,Model,Arena Elo rating (anony),Arena Elo rating (full),License,Organization,Link
2
- SDXL,SDXL,0,0,openrail++,Stability AI,https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0
3
- SDXLTurbo,SDXLTurbo,0,0,sai-nc-community (other),Stability AI,https://huggingface.co/stabilityai/sdxl-turbo
4
- LCM(v1.5/XL),LCM(v1.5/XL),0,0,openrail++,Latent Consistency,https://fal.ai/models/fal-ai/fast-lcm-diffusion/api
5
- OpenJourney,OpenJourney,0,0,creativeml-openrail-m,PromptHero,https://huggingface.co/prompthero/openjourney
6
- LCM,LCM,0,0,MIT License,Tsinghua University,https://huggingface.co/SimianLuo/LCM_Dreamshaper_v7
7
- PixArtAlpha,PixArtAlpha,0,0,openrail++,PixArt-alpha,https://huggingface.co/PixArt-alpha/PixArt-XL-2-1024-MS
8
- PixArtSigma,PixArtSigma,0,0,openrail++,PixArt-alpha,https://fal.ai/models/fal-ai/pixart-sigma
9
- StableCascade,StableCascade,0,0,stable-cascade-nc-community (other),Stability AI,https://huggingface.co/stabilityai/stable-cascade
10
- PlayGround V2.5,PlayGround V2.5,0,0,Playground v2.5 Community License,Playground,https://huggingface.co/playgroundai/playground-v2.5-1024px-aesthetic
11
- PlayGround V2,PlayGround V2,0,0,Playground v2 Community License,Playground,https://huggingface.co/playgroundai/playground-v2-1024px-aesthetic
12
- SDXLLightning,SDXLLightning,0,0,openrail++,ByteDance,https://huggingface.co/ByteDance/SDXL-Lightning
 
 
 
 
 
 
 
 
 
 
 
 
 
arena_elo/simple_test.py DELETED
@@ -1,16 +0,0 @@
1
- import pickle
2
- with open("./results/latest/elo_results.pkl",'rb') as f:
3
- data = pickle.load(f)
4
- print()
5
- df = data["anony"]["leaderboard_table_df"]
6
- # sort by rating
7
- df = df.sort_values(by=["rating"], ascending=False)
8
- print(df)
9
-
10
- print()
11
-
12
- df = data["full"]["leaderboard_table_df"]
13
- # sort by rating
14
- df = df.sort_values(by=["rating"], ascending=False)
15
- print(df)
16
- print('done')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
arena_elo/update_elo_rating.sh DELETED
@@ -1,49 +0,0 @@
1
- # set LOGDIR to default if not set before
2
- if [ -z "$LOGDIR" ]; then
3
- echo "LOGDIR is not set. Using default '../GenAI-Arena-hf-logs/vote_log'"
4
- export LOGDIR="../GenAI-Arena-hf-logs/vote_log"
5
- fi
6
-
7
- mkdir -p results
8
-
9
- # # for battle data
10
- python -m elo_rating.clean_battle_data --task_name "image_editing"
11
- edition_battle_cutoff_date=`cat cut_off_date.txt` && rm cut_off_date.txt && echo "Image editing battle data last updated on $edition_battle_cutoff_date"
12
-
13
- python -m elo_rating.clean_battle_data --task_name "t2i_generation"
14
- generation_battle_cutoff_date=`cat cut_off_date.txt` && rm cut_off_date.txt && echo "T2I image generation battle data last updated on $generation_battle_cutoff_date"
15
-
16
- mkdir -p ./results/$edition_battle_cutoff_date
17
- mkdir -p ./results/$generation_battle_cutoff_date
18
-
19
- cp clean_battle_image_editing_$edition_battle_cutoff_date.json ./results/latest/clean_battle_image_editing.json
20
- cp clean_battle_t2i_generation_$generation_battle_cutoff_date.json ./results/latest/clean_battle_t2i_generation.json
21
- mv clean_battle_image_editing_$edition_battle_cutoff_date.json ./results/$edition_battle_cutoff_date/clean_battle_image_editing.json
22
- mv clean_battle_t2i_generation_$generation_battle_cutoff_date.json ./results/$generation_battle_cutoff_date/clean_battle_t2i_generation.json
23
-
24
-
25
- python3 -m elo_rating.elo_analysis --clean-battle-file ./results/$edition_battle_cutoff_date/clean_battle_image_editing.json
26
- mv ./elo_results_$edition_battle_cutoff_date.pkl ./results/$edition_battle_cutoff_date/elo_results_image_editing.pkl
27
-
28
- python3 -m elo_rating.elo_analysis --clean-battle-file ./results/$generation_battle_cutoff_date/clean_battle_t2i_generation.json
29
- mv ./elo_results_$generation_battle_cutoff_date.pkl ./results/$generation_battle_cutoff_date/elo_results_t2i_generation.pkl
30
-
31
- # generat the leaderboard
32
-
33
- python -m elo_rating.generate_leaderboard \
34
- --model_info_file "./edition_model_info.json" \
35
- --elo_rating_pkl "./results/$edition_battle_cutoff_date/elo_results_image_editing.pkl" \
36
- --output_csv "./results/$edition_battle_cutoff_date/image_editing_leaderboard.csv"
37
-
38
- python -m elo_rating.generate_leaderboard \
39
- --model_info_file "./generation_model_info.json" \
40
- --elo_rating_pkl "./results/$generation_battle_cutoff_date/elo_results_t2i_generation.pkl" \
41
- --output_csv "./results/$generation_battle_cutoff_date/t2i_generation_leaderboard.csv"
42
-
43
- mkdir -p ./results/latest
44
- cp ./results/$edition_battle_cutoff_date/image_editing_leaderboard.csv ./results/latest/image_editing_leaderboard.csv
45
- cp ./results/$generation_battle_cutoff_date/t2i_generation_leaderboard.csv ./results/latest/t2i_generation_leaderboard.csv
46
- cp ./results/$edition_battle_cutoff_date/elo_results_image_editing.pkl ./results/latest/elo_results_image_editing.pkl
47
- cp ./results/$generation_battle_cutoff_date/elo_results_t2i_generation.pkl ./results/latest/elo_results_t2i_generation.pkl
48
-
49
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
serve/leaderboard.py CHANGED
@@ -16,10 +16,8 @@ import time
16
  import gradio as gr
17
  import numpy as np
18
  import pandas as pd
19
-
20
-
21
- basic_component_values = [None] * 6
22
- leader_component_values = [None] * 5
23
 
24
 
25
  # def make_leaderboard_md(elo_results):
@@ -36,111 +34,30 @@ leader_component_values = [None] * 5
36
  # """
37
  # return leaderboard_md
38
 
39
- def make_leaderboard_md(elo_results):
40
  leaderboard_md = f"""
41
  # πŸ† K-Sort-Arena Leaderboard
42
  """
43
 
44
  return leaderboard_md
45
 
46
-
47
- def make_leaderboard_md_live(elo_results):
48
- leaderboard_md = f"""
49
- # Leaderboard
50
- Last updated: {elo_results["last_updated_datetime"]}
51
- {elo_results["leaderboard_table"]}
52
- """
53
- return leaderboard_md
54
-
55
-
56
  def model_hyperlink(model_name, link):
57
  return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
58
 
59
 
60
- def load_leaderboard_table_csv(filename, add_hyperlink=True):
61
- df = pd.read_csv(filename)
62
- for col in df.columns:
63
- if "Arena Elo rating" in col:
64
- df[col] = df[col].apply(lambda x: int(x) if x != "-" else np.nan)
65
- elif col == "MMLU":
66
- df[col] = df[col].apply(lambda x: round(x * 100, 1) if x != "-" else np.nan)
67
- elif col == "MT-bench (win rate %)":
68
- df[col] = df[col].apply(lambda x: round(x, 1) if x != "-" else np.nan)
69
- elif col == "MT-bench (score)":
70
- df[col] = df[col].apply(lambda x: round(x, 2) if x != "-" else np.nan)
71
-
72
- if add_hyperlink and col == "Model":
73
- df[col] = df.apply(lambda row: model_hyperlink(row[col], row["Link"]), axis=1)
74
- return df
75
-
76
-
77
- def build_basic_stats_tab():
78
- empty = "Loading ..."
79
- basic_component_values[:] = [empty, None, empty, empty, empty, empty]
80
-
81
- md0 = gr.Markdown(empty)
82
- gr.Markdown("#### Figure 1: Number of model calls and votes")
83
- plot_1 = gr.Plot(show_label=False)
84
- with gr.Row():
85
- with gr.Column():
86
- md1 = gr.Markdown(empty)
87
- with gr.Column():
88
- md2 = gr.Markdown(empty)
89
- with gr.Row():
90
- with gr.Column():
91
- md3 = gr.Markdown(empty)
92
- with gr.Column():
93
- md4 = gr.Markdown(empty)
94
- return [md0, plot_1, md1, md2, md3, md4]
95
-
96
- def get_arena_table(arena_df, model_table_df):
97
- # sort by rating
98
- arena_df = arena_df.sort_values(by=["rating"], ascending=False)
99
- values = []
100
- for i in range(len(arena_df)):
101
- row = []
102
- model_key = arena_df.index[i]
103
- model_name = model_table_df[model_table_df["key"] == model_key]["Model"].values[
104
- 0
105
- ]
106
-
107
- # rank
108
- row.append(i + 1)
109
- # model display name
110
- row.append(model_name)
111
- # elo rating
112
- row.append(0), #round(arena_df.iloc[i]["rating"])
113
- upper_diff = round(arena_df.iloc[i]["rating_q975"] - arena_df.iloc[i]["rating"])
114
- lower_diff = round(arena_df.iloc[i]["rating"] - arena_df.iloc[i]["rating_q025"])
115
- row.append(0) #f"+{upper_diff}/-{lower_diff}"
116
- # num battles
117
- row.append(0) #round(arena_df.iloc[i]["num_battles"])
118
- # Organization
119
- row.append(
120
- model_table_df[model_table_df["key"] == model_key]["Organization"].values[0]
121
- )
122
- # license
123
- row.append(
124
- model_table_df[model_table_df["key"] == model_key]["License"].values[0]
125
- )
126
-
127
- values.append(row)
128
- return values
129
-
130
- def make_arena_leaderboard_md(elo_results):
131
- arena_df = elo_results["leaderboard_table_df"]
132
- last_updated = elo_results["last_updated_datetime"]
133
- total_votes = sum(arena_df["num_battles"]) // 2
134
- total_models = len(arena_df)
135
 
136
  leaderboard_md = f"""
137
  Total #models: **{total_models}**(anonymous). Total #votes: **{total_votes}** (Equivalent to **{total_votes*6}** votes for one-on-one games).
138
- Last updated: {last_updated}.
139
  """
140
 
141
  return leaderboard_md
142
 
143
 
 
144
  def build_leaderboard_tab(elo_results_file, leaderboard_table_file, show_plot=False):
145
  if elo_results_file is None: # Do live update
146
  md = "Loading ..."
@@ -198,7 +115,7 @@ def build_leaderboard_tab(elo_results_file, leaderboard_table_file, show_plot=Fa
198
 
199
  if not show_plot:
200
  gr.Markdown(
201
- """ ## We are still collecting more votes on more models. The ranking will be updated very fruquently. Please stay tuned!
202
  """,
203
  elem_id="leaderboard_markdown",
204
  )
@@ -213,4 +130,36 @@ def build_leaderboard_tab(elo_results_file, leaderboard_table_file, show_plot=Fa
213
  gr.Markdown(acknowledgment_md)
214
 
215
  # return [md_1, plot_1, plot_2, plot_3, plot_4]
216
- return [md_1]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  import gradio as gr
17
  import numpy as np
18
  import pandas as pd
19
+ import json
20
+ from datetime import datetime
 
 
21
 
22
 
23
  # def make_leaderboard_md(elo_results):
 
34
  # """
35
  # return leaderboard_md
36
 
37
+ def make_leaderboard_md():
38
  leaderboard_md = f"""
39
  # πŸ† K-Sort-Arena Leaderboard
40
  """
41
 
42
  return leaderboard_md
43
 
 
 
 
 
 
 
 
 
 
 
44
  def model_hyperlink(model_name, link):
45
  return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
46
 
47
 
48
+ def make_arena_leaderboard_md(total_models, total_votes):
49
+ last_updated = datetime.now()
50
+ last_updated = last_updated.strftime("%Y-%m-%d")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
 
52
  leaderboard_md = f"""
53
  Total #models: **{total_models}**(anonymous). Total #votes: **{total_votes}** (Equivalent to **{total_votes*6}** votes for one-on-one games).
54
+ \n Last updated: {last_updated}.
55
  """
56
 
57
  return leaderboard_md
58
 
59
 
60
+ '''
61
  def build_leaderboard_tab(elo_results_file, leaderboard_table_file, show_plot=False):
62
  if elo_results_file is None: # Do live update
63
  md = "Loading ..."
 
115
 
116
  if not show_plot:
117
  gr.Markdown(
118
+ """ ## The leaderboard is updated frequently and continues to incorporate new models.
119
  """,
120
  elem_id="leaderboard_markdown",
121
  )
 
130
  gr.Markdown(acknowledgment_md)
131
 
132
  # return [md_1, plot_1, plot_2, plot_3, plot_4]
133
+ return [md_1]
134
+ '''
135
+
136
+
137
+
138
+ def make_arena_leaderboard_data(results):
139
+ import pandas as pd
140
+ df = pd.DataFrame(results)
141
+ return df
142
+
143
+ def build_leaderboard_tab(score_result_file = 'sorted_score_list.json'):
144
+ with open(score_result_file, "r") as json_file:
145
+ data = json.load(json_file)
146
+ score_results = data["sorted_score_list"]
147
+ total_models = data["total_models"]
148
+ total_votes = data["total_votes"]
149
+
150
+ md = make_leaderboard_md()
151
+ md_1 = gr.Markdown(md, elem_id="leaderboard_markdown")
152
+
153
+ with gr.Tab("Arena Score", id=0):
154
+ md = make_arena_leaderboard_md(total_models, total_votes)
155
+ gr.Markdown(md, elem_id="leaderboard_markdown")
156
+ md = make_arena_leaderboard_data(score_results)
157
+ gr.Dataframe(md)
158
+
159
+ gr.Markdown(
160
+ """ ## The leaderboard is updated frequently and continues to incorporate new models.
161
+ """,
162
+ elem_id="leaderboard_markdown",
163
+ )
164
+ from .utils import acknowledgment_md
165
+ gr.Markdown(acknowledgment_md)