DongfuJiang commited on
Commit
a56205d
·
2 Parent(s): ea5c481 c4c1eb9

Merge branch 'main' of https://huggingface.co/spaces/TIGER-Lab/GenAI-Arena

Browse files
README.md CHANGED
@@ -4,7 +4,8 @@ emoji: 📈
4
  colorFrom: purple
5
  colorTo: pink
6
  sdk: gradio
7
- sdk_version: 4.21.0
 
8
  app_file: app.py
9
  pinned: false
10
  license: mit
 
4
  colorFrom: purple
5
  colorTo: pink
6
  sdk: gradio
7
+ sdk_version: 4.41.0
8
+ python_version: 3.12
9
  app_file: app.py
10
  pinned: false
11
  license: mit
app.py CHANGED
@@ -9,6 +9,13 @@ from pathlib import Path
9
  from serve.constants import SERVER_PORT, ROOT_PATH, ELO_RESULTS_DIR
10
  from model.pre_download import pre_download_all_models, pre_download_video_models
11
 
 
 
 
 
 
 
 
12
  def build_combine_demo(models, elo_results_file, leaderboard_table_file):
13
 
14
  with gr.Blocks(
@@ -99,6 +106,8 @@ if __name__ == "__main__":
99
  elo_results_dir = ELO_RESULTS_DIR
100
  models = ModelManager()
101
 
 
 
102
  pre_download_all_models()
103
 
104
  elo_results_file, leaderboard_table_file = load_elo_results(elo_results_dir)
 
9
  from serve.constants import SERVER_PORT, ROOT_PATH, ELO_RESULTS_DIR
10
  from model.pre_download import pre_download_all_models, pre_download_video_models
11
 
12
+ def debug_packages():
13
+ import pkg_resources
14
+
15
+ installed_packages = pkg_resources.working_set
16
+ for package in installed_packages:
17
+ print(f"{package.key}=={package.version}")
18
+
19
  def build_combine_demo(models, elo_results_file, leaderboard_table_file):
20
 
21
  with gr.Blocks(
 
106
  elo_results_dir = ELO_RESULTS_DIR
107
  models = ModelManager()
108
 
109
+ debug_packages()
110
+
111
  pre_download_all_models()
112
 
113
  elo_results_file, leaderboard_table_file = load_elo_results(elo_results_dir)
arena_elo/elo_rating/clean_battle_data.py CHANGED
@@ -21,42 +21,6 @@ from .basic_stats import get_log_files, NUM_SERVERS, LOG_ROOT_DIR
21
  from .utils import detect_language, get_time_stamp_from_date
22
 
23
  VOTES = ["tievote", "leftvote", "rightvote", "bothbad_vote"]
24
- IDENTITY_WORDS = [
25
- "vicuna",
26
- "lmsys",
27
- "koala",
28
- "uc berkeley",
29
- "open assistant",
30
- "laion",
31
- "chatglm",
32
- "chatgpt",
33
- "gpt-4",
34
- "openai",
35
- "anthropic",
36
- "claude",
37
- "bard",
38
- "palm",
39
- "lamda",
40
- "google",
41
- "llama",
42
- "qianwan",
43
- "alibaba",
44
- "mistral",
45
- "zhipu",
46
- "KEG lab",
47
- "01.AI",
48
- "AI2",
49
- "Tülu",
50
- "Tulu",
51
- "NETWORK ERROR DUE TO HIGH TRAFFIC. PLEASE REGENERATE OR REFRESH THIS PAGE.",
52
- "$MODERATION$ YOUR INPUT VIOLATES OUR CONTENT MODERATION GUIDELINES.",
53
- "API REQUEST ERROR. Please increase the number of max tokens.",
54
- "**API REQUEST ERROR** Reason: The response was blocked.",
55
- "**API REQUEST ERROR**",
56
- ]
57
-
58
- for i in range(len(IDENTITY_WORDS)):
59
- IDENTITY_WORDS[i] = IDENTITY_WORDS[i].lower()
60
 
61
  def parse_model_name(model_name):
62
  return NotImplementedError()
@@ -79,23 +43,12 @@ def to_openai_format(messages):
79
 
80
 
81
  def replace_model_name(old_name, tstamp):
82
-
83
  replace_dict = {
84
- "bard": "palm-2",
85
- "claude-v1": "claude-1",
86
- "claude-instant-v1": "claude-instant-1",
87
- "oasst-sft-1-pythia-12b": "oasst-pythia-12b",
88
- "claude-2": "claude-2.0",
89
  "PlayGroundV2": "PlayGround V2",
90
  "PlayGroundV2.5": "PlayGround V2.5",
91
  "FluxTimestep": "FLUX1schnell",
92
  "FluxGuidance": "FLUX1dev"
93
  }
94
- if old_name in ["gpt-4", "gpt-3.5-turbo"]:
95
- if tstamp > 1687849200:
96
- old_name += "-0613"
97
- else:
98
- old_name += "-0314"
99
  if old_name in replace_dict:
100
  old_name = replace_dict[old_name]
101
  if "Flux" in old_name:
@@ -198,32 +151,6 @@ def clean_battle_data(
198
  print(f"Model names mismatch: {models_public} vs {models_hidden}")
199
  ct_invalid += 1
200
  continue
201
-
202
- # # Detect langauge
203
- # state = row["states"][0]
204
- # if state["offset"] >= len(state["messages"]):
205
- # ct_invalid += 1
206
- # continue
207
- # lang_code = detect_language(state["messages"][state["offset"]][1])
208
-
209
- # # Drop conversations if the model names are leaked
210
- # leaked_identity = False
211
- # messages = ""
212
- # for i in range(2):
213
- # state = row["states"][i]
214
- # for turn_idx, (role, msg) in enumerate(
215
- # state["messages"][state["offset"] :]
216
- # ):
217
- # if msg:
218
- # messages += msg.lower()
219
- # for word in IDENTITY_WORDS:
220
- # if word in messages:
221
- # leaked_identity = True
222
- # break
223
-
224
- # if leaked_identity:
225
- # ct_leaked_identity += 1
226
- # continue
227
 
228
  def preprocess_model_name(m):
229
  if m == "Playground v2":
@@ -239,7 +166,6 @@ def clean_battle_data(
239
  for _model in models:
240
  try:
241
  platform, model_name, task = _model.split("_")
242
- #platform, model_name, task = parse_model_name(_model)
243
  except ValueError:
244
  valid = False
245
  break
@@ -251,21 +177,13 @@ def clean_battle_data(
251
  continue
252
  for i, _model in enumerate(models):
253
  platform, model_name, task = _model.split("_")
254
- #platform, model_name, task = parse_model_name(_model)
255
  models[i] = model_name
256
-
257
- # if not all(x.startswith("imagenhub_") and x.endswith("_edition") for x in models):
258
- # # print(f"Invalid model names: {models}")
259
- # ct_invalid += 1
260
- # continue
261
-
262
- # models = [x[len("imagenhub_"):-len("_edition")] for x in models]
263
  elif task_name == "t2i_generation":
264
  valid = True
265
  for _model in models:
266
  try:
267
  platform, model_name, task = _model.split("_")
268
- #platform, model_name, task = parse_model_name(_model)
269
  except ValueError:
270
  valid = False
271
  break
@@ -277,24 +195,13 @@ def clean_battle_data(
277
  continue
278
  for i, _model in enumerate(models):
279
  platform, model_name, task = _model.split("_")
280
- #platform, model_name, task = parse_model_name(_model)
281
  models[i] = model_name
282
- # if not all("playground" in x.lower() or (x.startswith("imagenhub_") and x.endswith("_generation")) for x in models):
283
- # print(f"Invalid model names: {models}")
284
- # ct_invalid += 1
285
- # continue
286
- # models = [x[len("imagenhub_"):-len("_generation")] for x in models]
287
- # for i, model_name in enumerate(models):
288
- # mode
289
- # if model_name.startswith("imagenhub_"):
290
- # models[i] = model_name[len("imagenhub_"):-len("_generation")]
291
 
292
  elif task_name == "video_generation":
293
  valid = True
294
  for _model in models:
295
  try:
296
  platform, model_name, task = _model.split("_")
297
- #platform, model_name, task = parse_model_name(_model)
298
  except ValueError:
299
  valid = False
300
  break
@@ -306,32 +213,17 @@ def clean_battle_data(
306
  continue
307
  for i, _model in enumerate(models):
308
  platform, model_name, task = _model.split("_")
309
- #platform, model_name, task = parse_model_name(_model)
310
  models[i] = model_name
311
 
312
  else:
313
  raise ValueError(f"Invalid task_name: {task_name}")
314
 
315
- # if "Flux" in models[0] or "Flux" in models[1]:
316
- # print(f"Invalid model names: {models}")
317
- # exit(1)
318
  models = [replace_model_name(m, row["tstamp"]) for m in models]
319
 
320
  # Exclude certain models
321
  if exclude_model_names and any(x in exclude_model_names for x in models):
322
  ct_invalid += 1
323
  continue
324
-
325
- # if models[0] not in model_infos or models[1] not in model_infos:
326
- # continue
327
-
328
- # # Exclude votes before the starting date
329
- # if model_infos and (model_infos[models[0]]["starting_from"] > row["tstamp"] or model_infos[models[1]]["starting_from"] > row["tstamp"]):
330
- # print(f"Invalid vote before the valid starting date for {models[0]} and {models[1]}")
331
- # ct_invalid += 1
332
- # continue
333
-
334
-
335
 
336
  if mode == "conv_release":
337
  # assert the two images are the same
@@ -357,12 +249,6 @@ def clean_battle_data(
357
 
358
 
359
  question_id = row["states"][0]["conv_id"]
360
- # conversation_a = to_openai_format(
361
- # row["states"][0]["messages"][row["states"][0]["offset"] :]
362
- # )
363
- # conversation_b = to_openai_format(
364
- # row["states"][1]["messages"][row["states"][1]["offset"] :]
365
- # )
366
 
367
  ip = row["ip"]
368
  if ip not in all_ips:
@@ -386,11 +272,7 @@ def clean_battle_data(
386
  model_b=models[1],
387
  winner=convert_type[row["type"]],
388
  judge=f"arena_user_{user_id}",
389
- # conversation_a=conversation_a,
390
- # conversation_b=conversation_b,
391
- # turn=len(conversation_a) // 2,
392
  anony=anony,
393
- # language=lang_code,
394
  tstamp=row["tstamp"],
395
  )
396
  )
@@ -458,14 +340,6 @@ if __name__ == "__main__":
458
  print(battles[i])
459
  output = f"clean_battle_{args.task_name}_{cutoff_date}.json"
460
  elif args.mode == "conv_release":
461
- # new_battles = []
462
- # for x in battles:
463
- # if not x["anony"]:
464
- # continue
465
- # for key in []:
466
- # del x[key]
467
- # new_battles.append(x)
468
- # battles = new_battles
469
  output = f"clean_battle_{args.task_name}_conv_{cutoff_date}.json"
470
 
471
  with open(output, "w") as fout:
 
21
  from .utils import detect_language, get_time_stamp_from_date
22
 
23
  VOTES = ["tievote", "leftvote", "rightvote", "bothbad_vote"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
  def parse_model_name(model_name):
26
  return NotImplementedError()
 
43
 
44
 
45
  def replace_model_name(old_name, tstamp):
 
46
  replace_dict = {
 
 
 
 
 
47
  "PlayGroundV2": "PlayGround V2",
48
  "PlayGroundV2.5": "PlayGround V2.5",
49
  "FluxTimestep": "FLUX1schnell",
50
  "FluxGuidance": "FLUX1dev"
51
  }
 
 
 
 
 
52
  if old_name in replace_dict:
53
  old_name = replace_dict[old_name]
54
  if "Flux" in old_name:
 
151
  print(f"Model names mismatch: {models_public} vs {models_hidden}")
152
  ct_invalid += 1
153
  continue
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
154
 
155
  def preprocess_model_name(m):
156
  if m == "Playground v2":
 
166
  for _model in models:
167
  try:
168
  platform, model_name, task = _model.split("_")
 
169
  except ValueError:
170
  valid = False
171
  break
 
177
  continue
178
  for i, _model in enumerate(models):
179
  platform, model_name, task = _model.split("_")
 
180
  models[i] = model_name
181
+
 
 
 
 
 
 
182
  elif task_name == "t2i_generation":
183
  valid = True
184
  for _model in models:
185
  try:
186
  platform, model_name, task = _model.split("_")
 
187
  except ValueError:
188
  valid = False
189
  break
 
195
  continue
196
  for i, _model in enumerate(models):
197
  platform, model_name, task = _model.split("_")
 
198
  models[i] = model_name
 
 
 
 
 
 
 
 
 
199
 
200
  elif task_name == "video_generation":
201
  valid = True
202
  for _model in models:
203
  try:
204
  platform, model_name, task = _model.split("_")
 
205
  except ValueError:
206
  valid = False
207
  break
 
213
  continue
214
  for i, _model in enumerate(models):
215
  platform, model_name, task = _model.split("_")
 
216
  models[i] = model_name
217
 
218
  else:
219
  raise ValueError(f"Invalid task_name: {task_name}")
220
 
 
 
 
221
  models = [replace_model_name(m, row["tstamp"]) for m in models]
222
 
223
  # Exclude certain models
224
  if exclude_model_names and any(x in exclude_model_names for x in models):
225
  ct_invalid += 1
226
  continue
 
 
 
 
 
 
 
 
 
 
 
227
 
228
  if mode == "conv_release":
229
  # assert the two images are the same
 
249
 
250
 
251
  question_id = row["states"][0]["conv_id"]
 
 
 
 
 
 
252
 
253
  ip = row["ip"]
254
  if ip not in all_ips:
 
272
  model_b=models[1],
273
  winner=convert_type[row["type"]],
274
  judge=f"arena_user_{user_id}",
 
 
 
275
  anony=anony,
 
276
  tstamp=row["tstamp"],
277
  )
278
  )
 
340
  print(battles[i])
341
  output = f"clean_battle_{args.task_name}_{cutoff_date}.json"
342
  elif args.mode == "conv_release":
 
 
 
 
 
 
 
 
343
  output = f"clean_battle_{args.task_name}_conv_{cutoff_date}.json"
344
 
345
  with open(output, "w") as fout:
model/model_manager.py CHANGED
@@ -66,6 +66,7 @@ class ModelManager:
66
  pipe = self.load_model_pipe(model_name)
67
  result = pipe(prompt=prompt)
68
  else:
 
69
  result = ''
70
  return result
71
 
@@ -75,6 +76,7 @@ class ModelManager:
75
  pipe = self.load_model_pipe(model_name)
76
  result = pipe(prompt=prompt)
77
  else:
 
78
  result = ''
79
  return result
80
 
 
66
  pipe = self.load_model_pipe(model_name)
67
  result = pipe(prompt=prompt)
68
  else:
69
+ print(f'The prompt "{prompt}" is not safe')
70
  result = ''
71
  return result
72
 
 
76
  pipe = self.load_model_pipe(model_name)
77
  result = pipe(prompt=prompt)
78
  else:
79
+ print(f'The prompt "{prompt}" is not safe')
80
  result = ''
81
  return result
82
 
model/model_registry.py CHANGED
@@ -258,6 +258,7 @@ register_model_info(
258
  "AnimateDiff Turbo is a lightning version of AnimateDiff.",
259
  )
260
 
 
261
  register_model_info(
262
  ["videogenhub_LaVie_generation"],
263
  "LaVie",
@@ -265,6 +266,7 @@ register_model_info(
265
  "LaVie is a video generation model with cascaded latent diffusion models.",
266
  )
267
 
 
268
  register_model_info(
269
  ["videogenhub_VideoCrafter2_generation"],
270
  "VideoCrafter2",
@@ -285,7 +287,7 @@ register_model_info(
285
  "https://github.com/hpcaitech/Open-Sora",
286
  "A community-driven opensource implementation of Sora.",
287
  )
288
-
289
  register_model_info(
290
  ["videogenhub_OpenSora12_generation"],
291
  "OpenSora v1.2",
@@ -301,7 +303,7 @@ register_model_info(
301
  )
302
 
303
  register_model_info(
304
- ["videogenhub_T2VTurbo_generation"],
305
  "T2V-Turbo",
306
  "https://github.com/Ji4chenLi/t2v-turbo",
307
  "Video Consistency Model with Mixed Reward Feedback.",
 
258
  "AnimateDiff Turbo is a lightning version of AnimateDiff.",
259
  )
260
 
261
+ """
262
  register_model_info(
263
  ["videogenhub_LaVie_generation"],
264
  "LaVie",
 
266
  "LaVie is a video generation model with cascaded latent diffusion models.",
267
  )
268
 
269
+
270
  register_model_info(
271
  ["videogenhub_VideoCrafter2_generation"],
272
  "VideoCrafter2",
 
287
  "https://github.com/hpcaitech/Open-Sora",
288
  "A community-driven opensource implementation of Sora.",
289
  )
290
+ """
291
  register_model_info(
292
  ["videogenhub_OpenSora12_generation"],
293
  "OpenSora v1.2",
 
303
  )
304
 
305
  register_model_info(
306
+ ["fal_T2VTurbo_generation"],
307
  "T2V-Turbo",
308
  "https://github.com/Ji4chenLi/t2v-turbo",
309
  "Video Consistency Model with Mixed Reward Feedback.",
model/models/__init__.py CHANGED
@@ -17,10 +17,14 @@ IMAGE_EDITION_MODELS = ['imagenhub_CycleDiffusion_edition', 'imagenhub_Pix2PixZe
17
  'imagenhub_InfEdit_edition', 'imagenhub_CosXLEdit_edition', 'imagenhub_UltraEdit_edition']
18
  VIDEO_GENERATION_MODELS = ['fal_AnimateDiff_text2video',
19
  'fal_AnimateDiffTurbo_text2video',
20
- 'videogenhub_LaVie_generation',
21
- 'videogenhub_VideoCrafter2_generation',
22
- 'videogenhub_ModelScope_generation', 'videogenhub_CogVideoX_generation', 'videogenhub_OpenSora12_generation',
23
- 'videogenhub_OpenSora_generation', 'videogenhub_T2VTurbo_generation','fal_StableVideoDiffusion_text2video']
 
 
 
 
24
  MUSEUM_UNSUPPORTED_MODELS = ['videogenhub_OpenSoraPlan_generation']
25
  DESIRED_APPEAR_MODEL = ['videogenhub_T2VTurbo_generation','fal_StableVideoDiffusion_text2video']
26
 
 
17
  'imagenhub_InfEdit_edition', 'imagenhub_CosXLEdit_edition', 'imagenhub_UltraEdit_edition']
18
  VIDEO_GENERATION_MODELS = ['fal_AnimateDiff_text2video',
19
  'fal_AnimateDiffTurbo_text2video',
20
+ #'videogenhub_LaVie_generation',
21
+ #'videogenhub_VideoCrafter2_generation',
22
+ #'videogenhub_ModelScope_generation',
23
+ 'videogenhub_CogVideoX_generation', 'videogenhub_OpenSora12_generation',
24
+ #'videogenhub_OpenSora_generation',
25
+ #'videogenhub_T2VTurbo_generation',
26
+ 'fal_T2VTurbo_text2video',
27
+ 'fal_StableVideoDiffusion_text2video']
28
  MUSEUM_UNSUPPORTED_MODELS = ['videogenhub_OpenSoraPlan_generation']
29
  DESIRED_APPEAR_MODEL = ['videogenhub_T2VTurbo_generation','fal_StableVideoDiffusion_text2video']
30
 
model/models/fal_api_models.py CHANGED
@@ -7,7 +7,7 @@ import base64
7
 
8
  FAL_MODEl_NAME_MAP = {"SDXL": "fast-sdxl", "SDXLTurbo": "fast-turbo-diffusion", "SDXLLightning": "fast-lightning-sdxl",
9
  "LCM(v1.5/XL)": "fast-lcm-diffusion", "PixArtSigma": "pixart-sigma", "StableCascade": "stable-cascade",
10
- "AuraFlow": "aura-flow", "FLUX1schnell": "flux/schnell", "FLUX1dev": "flux/dev"}
11
 
12
  class FalModel():
13
  def __init__(self, model_name, model_type):
 
7
 
8
  FAL_MODEl_NAME_MAP = {"SDXL": "fast-sdxl", "SDXLTurbo": "fast-turbo-diffusion", "SDXLLightning": "fast-lightning-sdxl",
9
  "LCM(v1.5/XL)": "fast-lcm-diffusion", "PixArtSigma": "pixart-sigma", "StableCascade": "stable-cascade",
10
+ "AuraFlow": "aura-flow", "FLUX1schnell": "flux/schnell", "FLUX1dev": "flux/dev", "T2VTurbo": "t2v-turbo"}
11
 
12
  class FalModel():
13
  def __init__(self, model_name, model_type):
requirements.txt CHANGED
@@ -4,7 +4,7 @@ flask_cors
4
  faiss-cpu
5
  fire
6
  h5py
7
- -e git+https://github.com/facebookresearch/xformers.git@main#egg=xformers
8
  numpy>=1.23.5
9
  pandas<2.0.0
10
  peft>=0.12
@@ -27,8 +27,6 @@ torch-fidelity>=0.3.0
27
  setuptools>=59.5.0
28
  transformers
29
  torchmetrics>=0.6.0
30
- lpips
31
- image-reward
32
  kornia>=0.6
33
  diffusers>=0.18.0
34
  accelerate>=0.20.3
 
4
  faiss-cpu
5
  fire
6
  h5py
7
+ xformers
8
  numpy>=1.23.5
9
  pandas<2.0.0
10
  peft>=0.12
 
27
  setuptools>=59.5.0
28
  transformers
29
  torchmetrics>=0.6.0
 
 
30
  kornia>=0.6
31
  diffusers>=0.18.0
32
  accelerate>=0.20.3