Spaces:

TIGER-Lab
/

GenAI-Arena

Running on Zero

App Files Files Community

DongfuJiang commited on Aug 19, 2024

Commit

a56205d

2 Parent(s): ea5c481 c4c1eb9

Merge branch 'main' of https://huggingface.co/spaces/TIGER-Lab/GenAI-Arena

Browse files

Files changed (8) hide show

README.md +2 -1
app.py +9 -0
arena_elo/elo_rating/clean_battle_data.py +1 -127
model/model_manager.py +2 -0
model/model_registry.py +4 -2
model/models/__init__.py +8 -4
model/models/fal_api_models.py +1 -1
requirements.txt +1 -3

README.md CHANGED Viewed

@@ -4,7 +4,8 @@ emoji: 📈
 colorFrom: purple
 colorTo: pink
 sdk: gradio
-sdk_version: 4.21.0
 app_file: app.py
 pinned: false
 license: mit

 colorFrom: purple
 colorTo: pink
 sdk: gradio
+sdk_version: 4.41.0
+python_version: 3.12
 app_file: app.py
 pinned: false
 license: mit

app.py CHANGED Viewed

@@ -9,6 +9,13 @@ from pathlib import Path
 from serve.constants import SERVER_PORT, ROOT_PATH, ELO_RESULTS_DIR
 from model.pre_download import pre_download_all_models, pre_download_video_models
 def build_combine_demo(models, elo_results_file, leaderboard_table_file):
     with gr.Blocks(
@@ -99,6 +106,8 @@ if __name__ == "__main__":
     elo_results_dir = ELO_RESULTS_DIR
     models = ModelManager()
     pre_download_all_models()
     elo_results_file, leaderboard_table_file = load_elo_results(elo_results_dir)

 from serve.constants import SERVER_PORT, ROOT_PATH, ELO_RESULTS_DIR
 from model.pre_download import pre_download_all_models, pre_download_video_models
+def debug_packages():
+    import pkg_resources
+    installed_packages = pkg_resources.working_set
+    for package in installed_packages:
+        print(f"{package.key}=={package.version}")
 def build_combine_demo(models, elo_results_file, leaderboard_table_file):
     with gr.Blocks(
     elo_results_dir = ELO_RESULTS_DIR
     models = ModelManager()
+    debug_packages()
     pre_download_all_models()
     elo_results_file, leaderboard_table_file = load_elo_results(elo_results_dir)

arena_elo/elo_rating/clean_battle_data.py CHANGED Viewed

@@ -21,42 +21,6 @@ from .basic_stats import get_log_files, NUM_SERVERS, LOG_ROOT_DIR
 from .utils import detect_language, get_time_stamp_from_date
 VOTES = ["tievote", "leftvote", "rightvote", "bothbad_vote"]
-IDENTITY_WORDS = [
-    "vicuna",
-    "lmsys",
-    "koala",
-    "uc berkeley",
-    "open assistant",
-    "laion",
-    "chatglm",
-    "chatgpt",
-    "gpt-4",
-    "openai",
-    "anthropic",
-    "claude",
-    "bard",
-    "palm",
-    "lamda",
-    "google",
-    "llama",
-    "qianwan",
-    "alibaba",
-    "mistral",
-    "zhipu",
-    "KEG lab",
-    "01.AI",
-    "AI2",
-    "Tülu",
-    "Tulu",
-    "NETWORK ERROR DUE TO HIGH TRAFFIC. PLEASE REGENERATE OR REFRESH THIS PAGE.",
-    "$MODERATION$ YOUR INPUT VIOLATES OUR CONTENT MODERATION GUIDELINES.",
-    "API REQUEST ERROR. Please increase the number of max tokens.",
-    "**API REQUEST ERROR** Reason: The response was blocked.",
-    "**API REQUEST ERROR**",
-]
-for i in range(len(IDENTITY_WORDS)):
-    IDENTITY_WORDS[i] = IDENTITY_WORDS[i].lower()
 def parse_model_name(model_name):
     return NotImplementedError()
@@ -79,23 +43,12 @@ def to_openai_format(messages):
 def replace_model_name(old_name, tstamp):
     replace_dict = {
-        "bard": "palm-2",
-        "claude-v1": "claude-1",
-        "claude-instant-v1": "claude-instant-1",
-        "oasst-sft-1-pythia-12b": "oasst-pythia-12b",
-        "claude-2": "claude-2.0",
         "PlayGroundV2": "PlayGround V2",
         "PlayGroundV2.5": "PlayGround V2.5",
         "FluxTimestep": "FLUX1schnell",
         "FluxGuidance": "FLUX1dev"
     }
-    if old_name in ["gpt-4", "gpt-3.5-turbo"]:
-        if tstamp > 1687849200:
-            old_name += "-0613"
-        else:
-            old_name += "-0314"
     if old_name in replace_dict:
         old_name =  replace_dict[old_name]
     if "Flux" in old_name:
@@ -198,32 +151,6 @@ def clean_battle_data(
                 print(f"Model names mismatch: {models_public} vs {models_hidden}")
                 ct_invalid += 1
                 continue
-        # # Detect langauge
-        # state = row["states"][0]
-        # if state["offset"] >= len(state["messages"]):
-        #     ct_invalid += 1
-        #     continue
-        # lang_code = detect_language(state["messages"][state["offset"]][1])
-        # # Drop conversations if the model names are leaked
-        # leaked_identity = False
-        # messages = ""
-        # for i in range(2):
-        #     state = row["states"][i]
-        #     for turn_idx, (role, msg) in enumerate(
-        #         state["messages"][state["offset"] :]
-        #     ):
-        #         if msg:
-        #             messages += msg.lower()
-        # for word in IDENTITY_WORDS:
-        #     if word in messages:
-        #         leaked_identity = True
-        #         break
-        # if leaked_identity:
-        #     ct_leaked_identity += 1
-        #     continue
         def preprocess_model_name(m):
             if m == "Playground v2":
@@ -239,7 +166,6 @@ def clean_battle_data(
             for _model in models:
                 try:
                     platform, model_name, task = _model.split("_")
-                    #platform, model_name, task = parse_model_name(_model)
                 except ValueError:
                     valid = False
                     break
@@ -251,21 +177,13 @@ def clean_battle_data(
                 continue
             for i, _model in enumerate(models):
                 platform, model_name, task = _model.split("_")
-                #platform, model_name, task = parse_model_name(_model)
                 models[i] = model_name
-            # if not all(x.startswith("imagenhub_") and x.endswith("_edition") for x in models):
-            #     # print(f"Invalid model names: {models}")
-            #     ct_invalid += 1
-            #     continue
-            # models = [x[len("imagenhub_"):-len("_edition")] for x in models]
         elif task_name == "t2i_generation":
             valid = True
             for _model in models:
                 try:
                     platform, model_name, task = _model.split("_")
-                    #platform, model_name, task = parse_model_name(_model)
                 except ValueError:
                     valid = False
                     break
@@ -277,24 +195,13 @@ def clean_battle_data(
                 continue
             for i, _model in enumerate(models):
                 platform, model_name, task = _model.split("_")
-                #platform, model_name, task = parse_model_name(_model)
                 models[i] = model_name
-            # if not all("playground" in x.lower() or (x.startswith("imagenhub_") and x.endswith("_generation")) for x in models):
-            #     print(f"Invalid model names: {models}")
-            #     ct_invalid += 1
-            #     continue
-            # models = [x[len("imagenhub_"):-len("_generation")] for x in models]
-            # for i, model_name in enumerate(models):
-            #     mode
-            #     if model_name.startswith("imagenhub_"):
-            #         models[i] = model_name[len("imagenhub_"):-len("_generation")]
         elif task_name == "video_generation":
             valid = True
             for _model in models:
                 try:
                     platform, model_name, task = _model.split("_")
-                    #platform, model_name, task = parse_model_name(_model)
                 except ValueError:
                     valid = False
                     break
@@ -306,32 +213,17 @@ def clean_battle_data(
                 continue
             for i, _model in enumerate(models):
                 platform, model_name, task = _model.split("_")
-                #platform, model_name, task = parse_model_name(_model)
                 models[i] = model_name
         else:
             raise ValueError(f"Invalid task_name: {task_name}")
-        # if "Flux" in models[0] or "Flux" in models[1]:
-        #     print(f"Invalid model names: {models}")
-        #     exit(1)
         models = [replace_model_name(m, row["tstamp"]) for m in models]
         # Exclude certain models
         if exclude_model_names and any(x in exclude_model_names for x in models):
             ct_invalid += 1
             continue
-        # if models[0] not in model_infos or models[1] not in model_infos:
-        #     continue
-        # # Exclude votes before the starting date
-        # if model_infos and (model_infos[models[0]]["starting_from"] > row["tstamp"] or model_infos[models[1]]["starting_from"] > row["tstamp"]):
-        #     print(f"Invalid vote before the valid starting date for {models[0]} and {models[1]}")
-        #     ct_invalid += 1
-        #     continue
         if mode == "conv_release":
             # assert the two images are the same
@@ -357,12 +249,6 @@ def clean_battle_data(
         question_id = row["states"][0]["conv_id"]
-        # conversation_a = to_openai_format(
-        #     row["states"][0]["messages"][row["states"][0]["offset"] :]
-        # )
-        # conversation_b = to_openai_format(
-        #     row["states"][1]["messages"][row["states"][1]["offset"] :]
-        # )
         ip = row["ip"]
         if ip not in all_ips:
@@ -386,11 +272,7 @@ def clean_battle_data(
                 model_b=models[1],
                 winner=convert_type[row["type"]],
                 judge=f"arena_user_{user_id}",
-                # conversation_a=conversation_a,
-                # conversation_b=conversation_b,
-                # turn=len(conversation_a) // 2,
                 anony=anony,
-                # language=lang_code,
                 tstamp=row["tstamp"],
             )
         )
@@ -458,14 +340,6 @@ if __name__ == "__main__":
             print(battles[i])
         output = f"clean_battle_{args.task_name}_{cutoff_date}.json"
     elif args.mode == "conv_release":
-        # new_battles = []
-        # for x in battles:
-        #     if not x["anony"]:
-        #         continue
-        #     for key in []:
-        #         del x[key]
-        #     new_battles.append(x)
-        # battles = new_battles
         output = f"clean_battle_{args.task_name}_conv_{cutoff_date}.json"
     with open(output, "w") as fout:

 from .utils import detect_language, get_time_stamp_from_date
 VOTES = ["tievote", "leftvote", "rightvote", "bothbad_vote"]
 def parse_model_name(model_name):
     return NotImplementedError()
 def replace_model_name(old_name, tstamp):
     replace_dict = {
         "PlayGroundV2": "PlayGround V2",
         "PlayGroundV2.5": "PlayGround V2.5",
         "FluxTimestep": "FLUX1schnell",
         "FluxGuidance": "FLUX1dev"
     }
     if old_name in replace_dict:
         old_name =  replace_dict[old_name]
     if "Flux" in old_name:
                 print(f"Model names mismatch: {models_public} vs {models_hidden}")
                 ct_invalid += 1
                 continue
         def preprocess_model_name(m):
             if m == "Playground v2":
             for _model in models:
                 try:
                     platform, model_name, task = _model.split("_")
                 except ValueError:
                     valid = False
                     break
                 continue
             for i, _model in enumerate(models):
                 platform, model_name, task = _model.split("_")
                 models[i] = model_name
         elif task_name == "t2i_generation":
             valid = True
             for _model in models:
                 try:
                     platform, model_name, task = _model.split("_")
                 except ValueError:
                     valid = False
                     break
                 continue
             for i, _model in enumerate(models):
                 platform, model_name, task = _model.split("_")
                 models[i] = model_name
         elif task_name == "video_generation":
             valid = True
             for _model in models:
                 try:
                     platform, model_name, task = _model.split("_")
                 except ValueError:
                     valid = False
                     break
                 continue
             for i, _model in enumerate(models):
                 platform, model_name, task = _model.split("_")
                 models[i] = model_name
         else:
             raise ValueError(f"Invalid task_name: {task_name}")
         models = [replace_model_name(m, row["tstamp"]) for m in models]
         # Exclude certain models
         if exclude_model_names and any(x in exclude_model_names for x in models):
             ct_invalid += 1
             continue
         if mode == "conv_release":
             # assert the two images are the same
         question_id = row["states"][0]["conv_id"]
         ip = row["ip"]
         if ip not in all_ips:
                 model_b=models[1],
                 winner=convert_type[row["type"]],
                 judge=f"arena_user_{user_id}",
                 anony=anony,
                 tstamp=row["tstamp"],
             )
         )
             print(battles[i])
         output = f"clean_battle_{args.task_name}_{cutoff_date}.json"
     elif args.mode == "conv_release":
         output = f"clean_battle_{args.task_name}_conv_{cutoff_date}.json"
     with open(output, "w") as fout:

model/model_manager.py CHANGED Viewed

@@ -66,6 +66,7 @@ class ModelManager:
             pipe = self.load_model_pipe(model_name)
             result = pipe(prompt=prompt)
         else:
             result = ''
         return result
@@ -75,6 +76,7 @@ class ModelManager:
             pipe = self.load_model_pipe(model_name)
             result = pipe(prompt=prompt)
         else:
             result = ''
         return result

             pipe = self.load_model_pipe(model_name)
             result = pipe(prompt=prompt)
         else:
+            print(f'The prompt "{prompt}" is not safe')
             result = ''
         return result
             pipe = self.load_model_pipe(model_name)
             result = pipe(prompt=prompt)
         else:
+            print(f'The prompt "{prompt}" is not safe')
             result = ''
         return result

model/model_registry.py CHANGED Viewed

@@ -258,6 +258,7 @@ register_model_info(
     "AnimateDiff Turbo is a lightning version of AnimateDiff.",
 )
 register_model_info(
     ["videogenhub_LaVie_generation"],
     "LaVie",
@@ -265,6 +266,7 @@ register_model_info(
     "LaVie is a video generation model with cascaded latent diffusion models.",
 )
 register_model_info(
     ["videogenhub_VideoCrafter2_generation"],
     "VideoCrafter2",
@@ -285,7 +287,7 @@ register_model_info(
     "https://github.com/hpcaitech/Open-Sora",
     "A community-driven opensource implementation of Sora.",
 )
 register_model_info(
     ["videogenhub_OpenSora12_generation"],
     "OpenSora v1.2",
@@ -301,7 +303,7 @@ register_model_info(
 )
 register_model_info(
-    ["videogenhub_T2VTurbo_generation"],
     "T2V-Turbo",
     "https://github.com/Ji4chenLi/t2v-turbo",
     "Video Consistency Model with Mixed Reward Feedback.",

     "AnimateDiff Turbo is a lightning version of AnimateDiff.",
 )
+"""
 register_model_info(
     ["videogenhub_LaVie_generation"],
     "LaVie",
     "LaVie is a video generation model with cascaded latent diffusion models.",
 )
 register_model_info(
     ["videogenhub_VideoCrafter2_generation"],
     "VideoCrafter2",
     "https://github.com/hpcaitech/Open-Sora",
     "A community-driven opensource implementation of Sora.",
 )
+"""
 register_model_info(
     ["videogenhub_OpenSora12_generation"],
     "OpenSora v1.2",
 )
 register_model_info(
+    ["fal_T2VTurbo_generation"],
     "T2V-Turbo",
     "https://github.com/Ji4chenLi/t2v-turbo",
     "Video Consistency Model with Mixed Reward Feedback.",

model/models/__init__.py CHANGED Viewed

@@ -17,10 +17,14 @@ IMAGE_EDITION_MODELS = ['imagenhub_CycleDiffusion_edition', 'imagenhub_Pix2PixZe
                         'imagenhub_InfEdit_edition', 'imagenhub_CosXLEdit_edition', 'imagenhub_UltraEdit_edition']
 VIDEO_GENERATION_MODELS = ['fal_AnimateDiff_text2video',
                            'fal_AnimateDiffTurbo_text2video',
-                           'videogenhub_LaVie_generation',
-                           'videogenhub_VideoCrafter2_generation',
-                           'videogenhub_ModelScope_generation', 'videogenhub_CogVideoX_generation', 'videogenhub_OpenSora12_generation',
-                           'videogenhub_OpenSora_generation', 'videogenhub_T2VTurbo_generation','fal_StableVideoDiffusion_text2video']
 MUSEUM_UNSUPPORTED_MODELS = ['videogenhub_OpenSoraPlan_generation']
 DESIRED_APPEAR_MODEL = ['videogenhub_T2VTurbo_generation','fal_StableVideoDiffusion_text2video']

                         'imagenhub_InfEdit_edition', 'imagenhub_CosXLEdit_edition', 'imagenhub_UltraEdit_edition']
 VIDEO_GENERATION_MODELS = ['fal_AnimateDiff_text2video',
                            'fal_AnimateDiffTurbo_text2video',
+                           #'videogenhub_LaVie_generation',
+                           #'videogenhub_VideoCrafter2_generation',
+                           #'videogenhub_ModelScope_generation',
+                           'videogenhub_CogVideoX_generation', 'videogenhub_OpenSora12_generation',
+                           #'videogenhub_OpenSora_generation',
+                           #'videogenhub_T2VTurbo_generation',
+                           'fal_T2VTurbo_text2video',
+                           'fal_StableVideoDiffusion_text2video']
 MUSEUM_UNSUPPORTED_MODELS = ['videogenhub_OpenSoraPlan_generation']
 DESIRED_APPEAR_MODEL = ['videogenhub_T2VTurbo_generation','fal_StableVideoDiffusion_text2video']

model/models/fal_api_models.py CHANGED Viewed

@@ -7,7 +7,7 @@ import base64
 FAL_MODEl_NAME_MAP = {"SDXL": "fast-sdxl", "SDXLTurbo": "fast-turbo-diffusion", "SDXLLightning": "fast-lightning-sdxl",
                       "LCM(v1.5/XL)": "fast-lcm-diffusion", "PixArtSigma": "pixart-sigma", "StableCascade": "stable-cascade",
-                      "AuraFlow": "aura-flow", "FLUX1schnell": "flux/schnell", "FLUX1dev": "flux/dev"}
 class FalModel():
     def __init__(self, model_name, model_type):

 FAL_MODEl_NAME_MAP = {"SDXL": "fast-sdxl", "SDXLTurbo": "fast-turbo-diffusion", "SDXLLightning": "fast-lightning-sdxl",
                       "LCM(v1.5/XL)": "fast-lcm-diffusion", "PixArtSigma": "pixart-sigma", "StableCascade": "stable-cascade",
+                      "AuraFlow": "aura-flow", "FLUX1schnell": "flux/schnell", "FLUX1dev": "flux/dev", "T2VTurbo": "t2v-turbo"}
 class FalModel():
     def __init__(self, model_name, model_type):

requirements.txt CHANGED Viewed

@@ -4,7 +4,7 @@ flask_cors
 faiss-cpu
 fire
 h5py
--e git+https://github.com/facebookresearch/xformers.git@main#egg=xformers
 numpy>=1.23.5
 pandas<2.0.0
 peft>=0.12
@@ -27,8 +27,6 @@ torch-fidelity>=0.3.0
 setuptools>=59.5.0
 transformers
 torchmetrics>=0.6.0
-lpips
-image-reward
 kornia>=0.6
 diffusers>=0.18.0
 accelerate>=0.20.3

 faiss-cpu
 fire
 h5py
+xformers
 numpy>=1.23.5
 pandas<2.0.0
 peft>=0.12
 setuptools>=59.5.0
 transformers
 torchmetrics>=0.6.0
 kornia>=0.6
 diffusers>=0.18.0
 accelerate>=0.20.3