Spaces:

TIGER-Lab
/

GenAI-Arena

Running on Zero

App Files Files Community

DongfuJiang commited on Aug 19, 2024

Commit

7e1bd0d

1 Parent(s): a56205d

update

Browse files

Files changed (9) hide show

app.py +2 -13
arena_elo/elo_rating/elo_analysis.py +1 -1
arena_elo/generation_model_info.json +20 -0
arena_elo/results/20240818/elo_results_t2i_generation.pkl +2 -2
arena_elo/results/20240818/t2i_generation_leaderboard.csv +17 -14
arena_elo/results/latest/elo_results_t2i_generation.pkl +2 -2
arena_elo/results/latest/t2i_generation_leaderboard.csv +17 -14
model/model_manager.py +13 -2
serve/leaderboard.py +21 -3

app.py CHANGED Viewed

@@ -7,14 +7,6 @@ from serve.leaderboard import build_leaderboard_tab
 from model.model_manager import ModelManager
 from pathlib import Path
 from serve.constants import SERVER_PORT, ROOT_PATH, ELO_RESULTS_DIR
-from model.pre_download import pre_download_all_models, pre_download_video_models
-def debug_packages():
-    import pkg_resources
-    installed_packages = pkg_resources.working_set
-    for package in installed_packages:
-        print(f"{package.key}=={package.version}")
 def build_combine_demo(models, elo_results_file, leaderboard_table_file):
@@ -104,11 +96,8 @@ if __name__ == "__main__":
     server_port = int(SERVER_PORT)
     root_path = ROOT_PATH
     elo_results_dir = ELO_RESULTS_DIR
-    models = ModelManager()
-    debug_packages()
-    pre_download_all_models()
     elo_results_file, leaderboard_table_file = load_elo_results(elo_results_dir)
     demo = build_combine_demo(models, elo_results_file, leaderboard_table_file)

 from model.model_manager import ModelManager
 from pathlib import Path
 from serve.constants import SERVER_PORT, ROOT_PATH, ELO_RESULTS_DIR
 def build_combine_demo(models, elo_results_file, leaderboard_table_file):
     server_port = int(SERVER_PORT)
     root_path = ROOT_PATH
     elo_results_dir = ELO_RESULTS_DIR
+    models = ModelManager(enable_nsfw=True, pre_download=True, debug_packages=True)
+    # models = ModelManager(enable_nsfw=False, pre_download=False, debug_packages=False)
     elo_results_file, leaderboard_table_file = load_elo_results(elo_results_dir)
     demo = build_combine_demo(models, elo_results_file, leaderboard_table_file)

arena_elo/elo_rating/elo_analysis.py CHANGED Viewed

@@ -381,7 +381,7 @@ if __name__ == "__main__":
         "--rating-system", type=str, choices=["bt", "elo"], default="bt"
     )
     parser.add_argument("--exclude-tie", action="store_true", default=False)
-    parser.add_argument("--min_num_battles_per_model", type=int, default=50)
     args = parser.parse_args()
     np.random.seed(42)

         "--rating-system", type=str, choices=["bt", "elo"], default="bt"
     )
     parser.add_argument("--exclude-tie", action="store_true", default=False)
+    parser.add_argument("--min_num_battles_per_model", type=int, default=25)
     args = parser.parse_args()
     np.random.seed(42)

arena_elo/generation_model_info.json CHANGED Viewed

@@ -63,5 +63,25 @@
         "Link": "https://fal.ai/models/fal-ai/pixart-sigma",
         "License": "openrail++",
         "Organization": "PixArt-alpha"
     }
 }

         "Link": "https://fal.ai/models/fal-ai/pixart-sigma",
         "License": "openrail++",
         "Organization": "PixArt-alpha"
+    },
+    "FLUX1schnell": {
+        "Link": "https://huggingface.co/docs/diffusers/main/en/api/pipelines/flux",
+        "License": "flux-1-dev-non-commercial-license (other)",
+        "Organization": "Black Forest Labs"
+    },
+    "FLUX1dev": {
+        "Link": "https://huggingface.co/docs/diffusers/main/en/api/pipelines/flux",
+        "License": "flux-1-dev-non-commercial-license (other)",
+        "Organization": "Black Forest Labs"
+    },
+    "AuraFlow": {
+        "Link": "https://huggingface.co/fal/AuraFlow",
+        "License": "Apache-2.0",
+        "Organization": "Fal.AI"
+    },
+    "Kolors": {
+        "Link": "https://huggingface.co/Kwai-Kolors/Kolors",
+        "License": "Apache-2.0",
+        "Organization": "Kwai Kolors"
     }
 }

arena_elo/results/20240818/elo_results_t2i_generation.pkl CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fa69fb8e450bd988c80b346d4f102c5be8bd74605030f0d5a295a71b66230ccd
-size 76790

 version https://git-lfs.github.com/spec/v1
+oid sha256:4935474b3d38916a2a46738fa4a4e57a34c59abe3c61111a9e79a46187a24b38
+size 86085

arena_elo/results/20240818/t2i_generation_leaderboard.csv CHANGED Viewed

@@ -1,15 +1,18 @@
 key,Model,Arena Elo rating (anony),Arena Elo rating (full),License,Organization,Link
-PlayGround V2.5,PlayGround V2.5,1141.5188909673816,1141.7574769353046,Playground v2.5 Community License,Playground,https://huggingface.co/playgroundai/playground-v2.5-1024px-aesthetic
-FLUX1schnell,FLUX1schnell,1107.0181424085022,1112.4579783184781,N/A,N/A,N/A
-PlayGround V2,PlayGround V2,1083.848194525069,1081.1558760457797,Playground v2 Community License,Playground,https://huggingface.co/playgroundai/playground-v2-1024px-aesthetic
-HunyuanDiT,HunyuanDiT,1061.8376119998818,1045.8437711625322,tencent-hunyuan-community,Tencent,https://huggingface.co/Tencent-Hunyuan/HunyuanDiT
-StableCascade,StableCascade,1048.1457195754278,1050.1691240248065,stable-cascade-nc-community (other),Stability AI,https://huggingface.co/stabilityai/stable-cascade
-SDXLLightning,SDXLLightning,1034.30472320188,1037.2932871766393,openrail++,ByteDance,https://huggingface.co/ByteDance/SDXL-Lightning
-PixArtAlpha,PixArtAlpha,1033.912116989071,1022.557442237083,openrail++,PixArt-alpha,https://huggingface.co/PixArt-alpha/PixArt-XL-2-1024-MS
-PixArtSigma,PixArtSigma,1029.1408183981687,1027.4953277391191,openrail++,PixArt-alpha,https://fal.ai/models/fal-ai/pixart-sigma
-SD3,SD3,1006.8458978216129,1000.2491511468914,stabilityai-nc-research-community,Stability AI,https://huggingface.co/stabilityai/stable-diffusion-3-medium
-SDXL,SDXL,975.3595050901268,974.4701361426086,openrail++,Stability AI,https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0
-SDXLTurbo,SDXLTurbo,922.5038319935961,918.6139210049913,sai-nc-community (other),Stability AI,https://huggingface.co/stabilityai/sdxl-turbo
-LCM(v1.5/XL),LCM(v1.5/XL),916.2179215619182,908.6161501509581,openrail++,Latent Consistency,https://fal.ai/models/fal-ai/fast-lcm-diffusion/api
-OpenJourney,OpenJourney,838.9432903688682,832.1138397704597,creativeml-openrail-m,PromptHero,https://huggingface.co/prompthero/openjourney
-LCM,LCM,800.403335098498,812.6668778296877,MIT License,Tsinghua University,https://huggingface.co/SimianLuo/LCM_Dreamshaper_v7

 key,Model,Arena Elo rating (anony),Arena Elo rating (full),License,Organization,Link
+FLUX1dev,FLUX1dev,1139.2171997788664,1154.9401991259183,flux-1-dev-non-commercial-license (other),Black Forest Labs,https://huggingface.co/docs/diffusers/main/en/api/pipelines/flux
+PlayGround V2.5,PlayGround V2.5,1132.1836548838864,1132.5651835501171,Playground v2.5 Community License,Playground,https://huggingface.co/playgroundai/playground-v2.5-1024px-aesthetic
+PlayGround V2,PlayGround V2,1075.3571099849078,1072.7594472644441,Playground v2 Community License,Playground,https://huggingface.co/playgroundai/playground-v2-1024px-aesthetic
+FLUX1schnell,FLUX1schnell,1066.505710109784,1064.6512761628912,flux-1-dev-non-commercial-license (other),Black Forest Labs,https://huggingface.co/docs/diffusers/main/en/api/pipelines/flux
+HunyuanDiT,HunyuanDiT,1051.1795507568463,1036.2184409268364,tencent-hunyuan-community,Tencent,https://huggingface.co/Tencent-Hunyuan/HunyuanDiT
+StableCascade,StableCascade,1038.864872104377,1041.0470474695244,stable-cascade-nc-community (other),Stability AI,https://huggingface.co/stabilityai/stable-cascade
+AuraFlow,AuraFlow,1034.6965881363633,1028.364203196634,Apache-2.0,Fal.AI,https://huggingface.co/fal/AuraFlow
+PixArtAlpha,PixArtAlpha,1025.6380572404505,1014.320702980116,openrail++,PixArt-alpha,https://huggingface.co/PixArt-alpha/PixArt-XL-2-1024-MS
+SDXLLightning,SDXLLightning,1025.1762643276309,1028.287019099912,openrail++,ByteDance,https://huggingface.co/ByteDance/SDXL-Lightning
+PixArtSigma,PixArtSigma,1020.4950012337554,1019.0949741744585,openrail++,PixArt-alpha,https://fal.ai/models/fal-ai/pixart-sigma
+SD3,SD3,993.6426659727981,987.8044800091614,stabilityai-nc-research-community,Stability AI,https://huggingface.co/stabilityai/stable-diffusion-3-medium
+Kolors,Kolors,984.8923136492953,980.8110707842453,Apache-2.0,Kwai Kolors,https://huggingface.co/Kwai-Kolors/Kolors
+SDXL,SDXL,966.8046145579953,966.0264408253988,openrail++,Stability AI,https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0
+SDXLTurbo,SDXLTurbo,915.0182076955812,911.0171286468213,sai-nc-community (other),Stability AI,https://huggingface.co/stabilityai/sdxl-turbo
+LCM(v1.5/XL),LCM(v1.5/XL),907.8353926442691,900.2703944909691,openrail++,Latent Consistency,https://fal.ai/models/fal-ai/fast-lcm-diffusion/api
+OpenJourney,OpenJourney,830.3596812818465,823.5680806695481,creativeml-openrail-m,PromptHero,https://huggingface.co/prompthero/openjourney
+LCM,LCM,792.1331156413471,804.405883013325,MIT License,Tsinghua University,https://huggingface.co/SimianLuo/LCM_Dreamshaper_v7

arena_elo/results/latest/elo_results_t2i_generation.pkl CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fa69fb8e450bd988c80b346d4f102c5be8bd74605030f0d5a295a71b66230ccd
-size 76790

 version https://git-lfs.github.com/spec/v1
+oid sha256:4935474b3d38916a2a46738fa4a4e57a34c59abe3c61111a9e79a46187a24b38
+size 86085

arena_elo/results/latest/t2i_generation_leaderboard.csv CHANGED Viewed

@@ -1,15 +1,18 @@
 key,Model,Arena Elo rating (anony),Arena Elo rating (full),License,Organization,Link
-PlayGround V2.5,PlayGround V2.5,1141.5188909673816,1141.7574769353046,Playground v2.5 Community License,Playground,https://huggingface.co/playgroundai/playground-v2.5-1024px-aesthetic
-FLUX1schnell,FLUX1schnell,1107.0181424085022,1112.4579783184781,N/A,N/A,N/A
-PlayGround V2,PlayGround V2,1083.848194525069,1081.1558760457797,Playground v2 Community License,Playground,https://huggingface.co/playgroundai/playground-v2-1024px-aesthetic
-HunyuanDiT,HunyuanDiT,1061.8376119998818,1045.8437711625322,tencent-hunyuan-community,Tencent,https://huggingface.co/Tencent-Hunyuan/HunyuanDiT
-StableCascade,StableCascade,1048.1457195754278,1050.1691240248065,stable-cascade-nc-community (other),Stability AI,https://huggingface.co/stabilityai/stable-cascade
-SDXLLightning,SDXLLightning,1034.30472320188,1037.2932871766393,openrail++,ByteDance,https://huggingface.co/ByteDance/SDXL-Lightning
-PixArtAlpha,PixArtAlpha,1033.912116989071,1022.557442237083,openrail++,PixArt-alpha,https://huggingface.co/PixArt-alpha/PixArt-XL-2-1024-MS
-PixArtSigma,PixArtSigma,1029.1408183981687,1027.4953277391191,openrail++,PixArt-alpha,https://fal.ai/models/fal-ai/pixart-sigma
-SD3,SD3,1006.8458978216129,1000.2491511468914,stabilityai-nc-research-community,Stability AI,https://huggingface.co/stabilityai/stable-diffusion-3-medium
-SDXL,SDXL,975.3595050901268,974.4701361426086,openrail++,Stability AI,https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0
-SDXLTurbo,SDXLTurbo,922.5038319935961,918.6139210049913,sai-nc-community (other),Stability AI,https://huggingface.co/stabilityai/sdxl-turbo
-LCM(v1.5/XL),LCM(v1.5/XL),916.2179215619182,908.6161501509581,openrail++,Latent Consistency,https://fal.ai/models/fal-ai/fast-lcm-diffusion/api
-OpenJourney,OpenJourney,838.9432903688682,832.1138397704597,creativeml-openrail-m,PromptHero,https://huggingface.co/prompthero/openjourney
-LCM,LCM,800.403335098498,812.6668778296877,MIT License,Tsinghua University,https://huggingface.co/SimianLuo/LCM_Dreamshaper_v7

 key,Model,Arena Elo rating (anony),Arena Elo rating (full),License,Organization,Link
+FLUX1dev,FLUX1dev,1139.2171997788664,1154.9401991259183,flux-1-dev-non-commercial-license (other),Black Forest Labs,https://huggingface.co/docs/diffusers/main/en/api/pipelines/flux
+PlayGround V2.5,PlayGround V2.5,1132.1836548838864,1132.5651835501171,Playground v2.5 Community License,Playground,https://huggingface.co/playgroundai/playground-v2.5-1024px-aesthetic
+PlayGround V2,PlayGround V2,1075.3571099849078,1072.7594472644441,Playground v2 Community License,Playground,https://huggingface.co/playgroundai/playground-v2-1024px-aesthetic
+FLUX1schnell,FLUX1schnell,1066.505710109784,1064.6512761628912,flux-1-dev-non-commercial-license (other),Black Forest Labs,https://huggingface.co/docs/diffusers/main/en/api/pipelines/flux
+HunyuanDiT,HunyuanDiT,1051.1795507568463,1036.2184409268364,tencent-hunyuan-community,Tencent,https://huggingface.co/Tencent-Hunyuan/HunyuanDiT
+StableCascade,StableCascade,1038.864872104377,1041.0470474695244,stable-cascade-nc-community (other),Stability AI,https://huggingface.co/stabilityai/stable-cascade
+AuraFlow,AuraFlow,1034.6965881363633,1028.364203196634,Apache-2.0,Fal.AI,https://huggingface.co/fal/AuraFlow
+PixArtAlpha,PixArtAlpha,1025.6380572404505,1014.320702980116,openrail++,PixArt-alpha,https://huggingface.co/PixArt-alpha/PixArt-XL-2-1024-MS
+SDXLLightning,SDXLLightning,1025.1762643276309,1028.287019099912,openrail++,ByteDance,https://huggingface.co/ByteDance/SDXL-Lightning
+PixArtSigma,PixArtSigma,1020.4950012337554,1019.0949741744585,openrail++,PixArt-alpha,https://fal.ai/models/fal-ai/pixart-sigma
+SD3,SD3,993.6426659727981,987.8044800091614,stabilityai-nc-research-community,Stability AI,https://huggingface.co/stabilityai/stable-diffusion-3-medium
+Kolors,Kolors,984.8923136492953,980.8110707842453,Apache-2.0,Kwai Kolors,https://huggingface.co/Kwai-Kolors/Kolors
+SDXL,SDXL,966.8046145579953,966.0264408253988,openrail++,Stability AI,https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0
+SDXLTurbo,SDXLTurbo,915.0182076955812,911.0171286468213,sai-nc-community (other),Stability AI,https://huggingface.co/stabilityai/sdxl-turbo
+LCM(v1.5/XL),LCM(v1.5/XL),907.8353926442691,900.2703944909691,openrail++,Latent Consistency,https://fal.ai/models/fal-ai/fast-lcm-diffusion/api
+OpenJourney,OpenJourney,830.3596812818465,823.5680806695481,creativeml-openrail-m,PromptHero,https://huggingface.co/prompthero/openjourney
+LCM,LCM,792.1331156413471,804.405883013325,MIT License,Tsinghua University,https://huggingface.co/SimianLuo/LCM_Dreamshaper_v7

model/model_manager.py CHANGED Viewed

@@ -7,12 +7,19 @@ import spaces
 from PIL import Image
 from .models import IMAGE_GENERATION_MODELS, IMAGE_EDITION_MODELS, VIDEO_GENERATION_MODELS, MUSEUM_UNSUPPORTED_MODELS, DESIRED_APPEAR_MODEL, load_pipeline
 from .fetch_museum_results import draw_from_imagen_museum, draw2_from_imagen_museum, draw_from_videogen_museum, draw2_from_videogen_museum
 from transformers import AutoTokenizer, AutoModelForCausalLM
 import torch
 class ModelManager:
-    def __init__(self, enable_nsfw=True):
         self.model_ig_list = IMAGE_GENERATION_MODELS
         self.model_ie_list = IMAGE_EDITION_MODELS
         self.model_vg_list = VIDEO_GENERATION_MODELS
@@ -21,6 +28,10 @@ class ModelManager:
         self.enable_nsfw = enable_nsfw
         self.load_guard(enable_nsfw)
         self.loaded_models = {}
     def load_model_pipe(self, model_name):
         if not model_name in self.loaded_models:

 from PIL import Image
 from .models import IMAGE_GENERATION_MODELS, IMAGE_EDITION_MODELS, VIDEO_GENERATION_MODELS, MUSEUM_UNSUPPORTED_MODELS, DESIRED_APPEAR_MODEL, load_pipeline
 from .fetch_museum_results import draw_from_imagen_museum, draw2_from_imagen_museum, draw_from_videogen_museum, draw2_from_videogen_museum
+from .pre_download import pre_download_all_models, pre_download_video_models
 from transformers import AutoTokenizer, AutoModelForCausalLM
 import torch
+def debug_packages():
+    import pkg_resources
+    installed_packages = pkg_resources.working_set
+    for package in installed_packages:
+        print(f"{package.key}=={package.version}")
 class ModelManager:
+    def __init__(self, enable_nsfw=False, pre_download=False, debug_packages=False):
         self.model_ig_list = IMAGE_GENERATION_MODELS
         self.model_ie_list = IMAGE_EDITION_MODELS
         self.model_vg_list = VIDEO_GENERATION_MODELS
         self.enable_nsfw = enable_nsfw
         self.load_guard(enable_nsfw)
         self.loaded_models = {}
+        if pre_download:
+            pre_download_all_models()
+        if debug_packages:
+            debug_packages()
     def load_model_pipe(self, model_name):
         if not model_name in self.loaded_models:

serve/leaderboard.py CHANGED Viewed

@@ -107,13 +107,24 @@ def get_full_table(anony_arena_df, full_arena_df, model_table_df):
         if model_key in anony_arena_df.index:
             idx = anony_arena_df.index.get_loc(model_key)
             row.append(round(anony_arena_df.iloc[idx]["rating"]))
         else:
             row.append(np.nan)
         if model_key in full_arena_df.index:
             idx = full_arena_df.index.get_loc(model_key)
             row.append(round(full_arena_df.iloc[idx]["rating"]))
         else:
             row.append(np.nan)
         # row.append(model_table_df.iloc[i]["MT-bench (score)"])
         # row.append(model_table_df.iloc[i]["Num Battles"])
         # row.append(model_table_df.iloc[i]["MMLU"])
@@ -124,6 +135,9 @@ def get_full_table(anony_arena_df, full_arena_df, model_table_df):
         values.append(row)
     values.sort(key=lambda x: -x[1] if not np.isnan(x[1]) else 1e9)
     return values
@@ -244,7 +258,7 @@ def build_leaderboard_tab(elo_results_file, leaderboard_table_file, show_plot=Tr
                     value=arena_table_vals,
                     elem_id="arena_leaderboard_dataframe",
                     height=700,
-                    column_widths=[50, 200, 100, 100, 100, 150, 150],
                     wrap=True,
                 )
             with gr.Tab("Full Leaderboard", id=1):
@@ -253,16 +267,20 @@ def build_leaderboard_tab(elo_results_file, leaderboard_table_file, show_plot=Tr
                 full_table_vals = get_full_table(anony_arena_df, full_arena_df, model_table_df)
                 gr.Dataframe(
                     headers=[
                         "🤖 Model",
                         "⭐ Arena Elo (anony)",
                         "⭐ Arena Elo (full)",
                         "Organization",
                         "License",
                     ],
-                    datatype=["markdown", "number", "number", "str", "str"],
                     value=full_table_vals,
                     elem_id="full_leaderboard_dataframe",
-                    column_widths=[200, 100, 100, 100, 150, 150],
                     height=700,
                     wrap=True,
                 )

         if model_key in anony_arena_df.index:
             idx = anony_arena_df.index.get_loc(model_key)
             row.append(round(anony_arena_df.iloc[idx]["rating"]))
+            upper_diff = round(anony_arena_df.iloc[idx]["rating_q975"] - anony_arena_df.iloc[idx]["rating"])
+            lower_diff = round(anony_arena_df.iloc[idx]["rating"] - anony_arena_df.iloc[idx]["rating_q025"])
+            row.append(f"+{upper_diff}/-{lower_diff}")
         else:
             row.append(np.nan)
+            row.append("N/A")
         if model_key in full_arena_df.index:
             idx = full_arena_df.index.get_loc(model_key)
             row.append(round(full_arena_df.iloc[idx]["rating"]))
+            upper_diff = round(full_arena_df.iloc[idx]["rating_q975"] - full_arena_df.iloc[idx]["rating"])
+            lower_diff = round(full_arena_df.iloc[idx]["rating"] - full_arena_df.iloc[idx]["rating_q025"])
+            row.append(f"+{upper_diff}/-{lower_diff}")
+            row.append(round(full_arena_df.iloc[idx]["num_battles"]))
         else:
             row.append(np.nan)
+            row.append("N/A")
+            row.append(np.nan)
         # row.append(model_table_df.iloc[i]["MT-bench (score)"])
         # row.append(model_table_df.iloc[i]["Num Battles"])
         # row.append(model_table_df.iloc[i]["MMLU"])
         values.append(row)
     values.sort(key=lambda x: -x[1] if not np.isnan(x[1]) else 1e9)
+    # insert rank
+    for i, row in enumerate(values):
+        row.insert(0, i + 1)
     return values
                     value=arena_table_vals,
                     elem_id="arena_leaderboard_dataframe",
                     height=700,
+                    column_widths=[30, 50, 30, 30, 30, 70, 150],
                     wrap=True,
                 )
             with gr.Tab("Full Leaderboard", id=1):
                 full_table_vals = get_full_table(anony_arena_df, full_arena_df, model_table_df)
                 gr.Dataframe(
                     headers=[
+                        "Rank",
                         "🤖 Model",
                         "⭐ Arena Elo (anony)",
+                        "📊 95% CI",
                         "⭐ Arena Elo (full)",
+                        "📊 95% CI",
+                        "🗳️ Votes",
                         "Organization",
                         "License",
                     ],
+                    datatype=["str", "markdown", "number", "str", "number", "str", "number", "str", "str"],
                     value=full_table_vals,
                     elem_id="full_leaderboard_dataframe",
+                    column_widths=[30, 50, 30, 30, 30, 30, 30, 70, 150],
                     height=700,
                     wrap=True,
                 )