Spaces:
Running
on
Zero
Running
on
Zero
DongfuJiang
commited on
Merge branch 'main' of https://huggingface.co/spaces/TIGER-Lab/GenAI-Arena
Browse files- README.md +1 -1
- app.py +1 -1
- arena_elo/video_generation_model_info.json +1 -1
- model/model_registry.py +8 -10
- model/models/__init__.py +1 -1
- model/models/fal_api_models.py +0 -18
- requirements.txt +1 -1
- serve/leaderboard.py +0 -39
README.md
CHANGED
@@ -7,7 +7,7 @@ sdk: gradio
|
|
7 |
sdk_version: 4.41.0
|
8 |
python_version: 3.12
|
9 |
app_file: app.py
|
10 |
-
pinned:
|
11 |
license: mit
|
12 |
tags:
|
13 |
- arena
|
|
|
7 |
sdk_version: 4.41.0
|
8 |
python_version: 3.12
|
9 |
app_file: app.py
|
10 |
+
pinned: true
|
11 |
license: mit
|
12 |
tags:
|
13 |
- arena
|
app.py
CHANGED
@@ -97,7 +97,7 @@ if __name__ == "__main__":
|
|
97 |
root_path = ROOT_PATH
|
98 |
elo_results_dir = ELO_RESULTS_DIR
|
99 |
models = ModelManager(enable_nsfw=False, do_pre_download=True, do_debug_packages=True)
|
100 |
-
# models = ModelManager(enable_nsfw=False,
|
101 |
|
102 |
elo_results_file, leaderboard_table_file = load_elo_results(elo_results_dir)
|
103 |
demo = build_combine_demo(models, elo_results_file, leaderboard_table_file)
|
|
|
97 |
root_path = ROOT_PATH
|
98 |
elo_results_dir = ELO_RESULTS_DIR
|
99 |
models = ModelManager(enable_nsfw=False, do_pre_download=True, do_debug_packages=True)
|
100 |
+
# models = ModelManager(enable_nsfw=False, do_pre_download=False, do_debug_packages=False)
|
101 |
|
102 |
elo_results_file, leaderboard_table_file = load_elo_results(elo_results_dir)
|
103 |
demo = build_combine_demo(models, elo_results_file, leaderboard_table_file)
|
arena_elo/video_generation_model_info.json
CHANGED
@@ -31,7 +31,7 @@
|
|
31 |
},
|
32 |
"StableVideoDiffusion": {
|
33 |
"Link": "https://fal.ai/models/fal-ai/fast-svd/text-to-video/api",
|
34 |
-
"License": "
|
35 |
"Organization": "Stability AI"
|
36 |
},
|
37 |
"T2VTurbo": {
|
|
|
31 |
},
|
32 |
"StableVideoDiffusion": {
|
33 |
"Link": "https://fal.ai/models/fal-ai/fast-svd/text-to-video/api",
|
34 |
+
"License": "SVD-nc-community",
|
35 |
"Organization": "Stability AI"
|
36 |
},
|
37 |
"T2VTurbo": {
|
model/model_registry.py
CHANGED
@@ -258,15 +258,6 @@ register_model_info(
|
|
258 |
"AnimateDiff Turbo is a lightning version of AnimateDiff.",
|
259 |
)
|
260 |
|
261 |
-
"""
|
262 |
-
register_model_info(
|
263 |
-
["videogenhub_LaVie_generation"],
|
264 |
-
"LaVie",
|
265 |
-
"https://github.com/Vchitect/LaVie",
|
266 |
-
"LaVie is a video generation model with cascaded latent diffusion models.",
|
267 |
-
)
|
268 |
-
|
269 |
-
|
270 |
register_model_info(
|
271 |
["videogenhub_VideoCrafter2_generation"],
|
272 |
"VideoCrafter2",
|
@@ -274,6 +265,13 @@ register_model_info(
|
|
274 |
"VideoCrafter2 is a T2V model that disentangling motion from appearance.",
|
275 |
)
|
276 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
277 |
register_model_info(
|
278 |
["videogenhub_ModelScope_generation"],
|
279 |
"ModelScope",
|
@@ -303,7 +301,7 @@ register_model_info(
|
|
303 |
)
|
304 |
|
305 |
register_model_info(
|
306 |
-
["
|
307 |
"T2V-Turbo",
|
308 |
"https://github.com/Ji4chenLi/t2v-turbo",
|
309 |
"Video Consistency Model with Mixed Reward Feedback.",
|
|
|
258 |
"AnimateDiff Turbo is a lightning version of AnimateDiff.",
|
259 |
)
|
260 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
261 |
register_model_info(
|
262 |
["videogenhub_VideoCrafter2_generation"],
|
263 |
"VideoCrafter2",
|
|
|
265 |
"VideoCrafter2 is a T2V model that disentangling motion from appearance.",
|
266 |
)
|
267 |
|
268 |
+
"""
|
269 |
+
register_model_info(
|
270 |
+
["videogenhub_LaVie_generation"],
|
271 |
+
"LaVie",
|
272 |
+
"https://github.com/Vchitect/LaVie",
|
273 |
+
"LaVie is a video generation model with cascaded latent diffusion models.",
|
274 |
+
)
|
275 |
register_model_info(
|
276 |
["videogenhub_ModelScope_generation"],
|
277 |
"ModelScope",
|
|
|
301 |
)
|
302 |
|
303 |
register_model_info(
|
304 |
+
["fal_T2VTurbo_text2video"],
|
305 |
"T2V-Turbo",
|
306 |
"https://github.com/Ji4chenLi/t2v-turbo",
|
307 |
"Video Consistency Model with Mixed Reward Feedback.",
|
model/models/__init__.py
CHANGED
@@ -18,7 +18,7 @@ IMAGE_EDITION_MODELS = ['imagenhub_CycleDiffusion_edition', 'imagenhub_Pix2PixZe
|
|
18 |
VIDEO_GENERATION_MODELS = ['fal_AnimateDiff_text2video',
|
19 |
'fal_AnimateDiffTurbo_text2video',
|
20 |
#'videogenhub_LaVie_generation',
|
21 |
-
|
22 |
#'videogenhub_ModelScope_generation',
|
23 |
'videogenhub_CogVideoX_generation', 'videogenhub_OpenSora12_generation',
|
24 |
#'videogenhub_OpenSora_generation',
|
|
|
18 |
VIDEO_GENERATION_MODELS = ['fal_AnimateDiff_text2video',
|
19 |
'fal_AnimateDiffTurbo_text2video',
|
20 |
#'videogenhub_LaVie_generation',
|
21 |
+
'videogenhub_VideoCrafter2_generation',
|
22 |
#'videogenhub_ModelScope_generation',
|
23 |
'videogenhub_CogVideoX_generation', 'videogenhub_OpenSora12_generation',
|
24 |
#'videogenhub_OpenSora_generation',
|
model/models/fal_api_models.py
CHANGED
@@ -54,24 +54,6 @@ class FalModel():
|
|
54 |
return result
|
55 |
elif self.model_type == "image2image":
|
56 |
raise NotImplementedError("image2image model is not implemented yet")
|
57 |
-
# assert "image" in kwargs or "image_url" in kwargs, "image or image_url is required for image2image model"
|
58 |
-
# if "image" in kwargs:
|
59 |
-
# image_url = None
|
60 |
-
# pass
|
61 |
-
# handler = fal_client.submit(
|
62 |
-
# f"fal-ai/{self.model_name}",
|
63 |
-
# arguments={
|
64 |
-
# "image_url": image_url
|
65 |
-
# },
|
66 |
-
# )
|
67 |
-
#
|
68 |
-
# for event in handler.iter_events():
|
69 |
-
# if isinstance(event, fal_client.InProgress):
|
70 |
-
# print('Request in progress')
|
71 |
-
# print(event.logs)
|
72 |
-
#
|
73 |
-
# result = handler.get()
|
74 |
-
# return result
|
75 |
elif self.model_type == "text2video":
|
76 |
assert "prompt" in kwargs, "prompt is required for text2video model"
|
77 |
if self.model_name == 'AnimateDiff':
|
|
|
54 |
return result
|
55 |
elif self.model_type == "image2image":
|
56 |
raise NotImplementedError("image2image model is not implemented yet")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
57 |
elif self.model_type == "text2video":
|
58 |
assert "prompt" in kwargs, "prompt is required for text2video model"
|
59 |
if self.model_name == 'AnimateDiff':
|
requirements.txt
CHANGED
@@ -68,4 +68,4 @@ tensorboard
|
|
68 |
timm
|
69 |
wandb
|
70 |
pandarallel
|
71 |
-
kaleido
|
|
|
68 |
timm
|
69 |
wandb
|
70 |
pandarallel
|
71 |
+
kaleido
|
serve/leaderboard.py
CHANGED
@@ -22,20 +22,6 @@ basic_component_values = [None] * 6
|
|
22 |
leader_component_values = [None] * 5
|
23 |
|
24 |
|
25 |
-
# def make_leaderboard_md(elo_results):
|
26 |
-
# leaderboard_md = f"""
|
27 |
-
# # π Chatbot Arena Leaderboard
|
28 |
-
# | [Blog](https://lmsys.org/blog/2023-05-03-arena/) | [GitHub](https://github.com/lm-sys/FastChat) | [Paper](https://arxiv.org/abs/2306.05685) | [Dataset](https://github.com/lm-sys/FastChat/blob/main/docs/dataset_release.md) | [Twitter](https://twitter.com/lmsysorg) | [Discord](https://discord.gg/HSWAKCrnFx) |
|
29 |
-
|
30 |
-
# This leaderboard is based on the following three benchmarks.
|
31 |
-
# - [Chatbot Arena](https://lmsys.org/blog/2023-05-03-arena/) - a crowdsourced, randomized battle platform. We use 100K+ user votes to compute Elo ratings.
|
32 |
-
# - [MT-Bench](https://arxiv.org/abs/2306.05685) - a set of challenging multi-turn questions. We use GPT-4 to grade the model responses.
|
33 |
-
# - [MMLU](https://arxiv.org/abs/2009.03300) (5-shot) - a test to measure a model's multitask accuracy on 57 tasks.
|
34 |
-
|
35 |
-
# π» Code: The Arena Elo ratings are computed by this [notebook]({notebook_url}). The MT-bench scores (single-answer grading on a scale of 10) are computed by [fastchat.llm_judge](https://github.com/lm-sys/FastChat/tree/main/fastchat/llm_judge). The MMLU scores are mostly computed by [InstructEval](https://github.com/declare-lab/instruct-eval). Higher values are better for all benchmarks. Empty cells mean not available. Last updated: November, 2023.
|
36 |
-
# """
|
37 |
-
# return leaderboard_md
|
38 |
-
|
39 |
def make_leaderboard_md(elo_results):
|
40 |
leaderboard_md = f"""
|
41 |
# π GenAI-Arena Leaderboard
|
@@ -324,31 +310,6 @@ def build_leaderboard_tab(elo_results_file, leaderboard_table_file, show_plot=Tr
|
|
324 |
|
325 |
leader_component_values[:] = [md, p1, p2, p3, p4]
|
326 |
|
327 |
-
"""
|
328 |
-
with gr.Row():
|
329 |
-
with gr.Column():
|
330 |
-
gr.Markdown(
|
331 |
-
"#### Figure 1: Fraction of Model A Wins for All Non-tied A vs. B Battles"
|
332 |
-
)
|
333 |
-
plot_1 = gr.Plot(p1, show_label=False)
|
334 |
-
with gr.Column():
|
335 |
-
gr.Markdown(
|
336 |
-
"#### Figure 2: Battle Count for Each Combination of Models (without Ties)"
|
337 |
-
)
|
338 |
-
plot_2 = gr.Plot(p2, show_label=False)
|
339 |
-
with gr.Row():
|
340 |
-
with gr.Column():
|
341 |
-
gr.Markdown(
|
342 |
-
"#### Figure 3: Bootstrap of Elo Estimates (1000 Rounds of Random Sampling)"
|
343 |
-
)
|
344 |
-
plot_3 = gr.Plot(p3, show_label=False)
|
345 |
-
with gr.Column():
|
346 |
-
gr.Markdown(
|
347 |
-
"#### Figure 4: Average Win Rate Against All Other Models (Assuming Uniform Sampling and No Ties)"
|
348 |
-
)
|
349 |
-
plot_4 = gr.Plot(p4, show_label=False)
|
350 |
-
"""
|
351 |
-
|
352 |
from .utils import acknowledgment_md
|
353 |
|
354 |
gr.Markdown(acknowledgment_md)
|
|
|
22 |
leader_component_values = [None] * 5
|
23 |
|
24 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
def make_leaderboard_md(elo_results):
|
26 |
leaderboard_md = f"""
|
27 |
# π GenAI-Arena Leaderboard
|
|
|
310 |
|
311 |
leader_component_values[:] = [md, p1, p2, p3, p4]
|
312 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
313 |
from .utils import acknowledgment_md
|
314 |
|
315 |
gr.Markdown(acknowledgment_md)
|