Spaces:

TIGER-Lab
/

GenAI-Arena

Running on Zero

File size: 14,542 Bytes

e368cec
 
 
0b4b1e4
e368cec
 
 
0b4b1e4
 
e368cec
0b4b1e4
e368cec
 
0b4b1e4
 
e368cec
 
 
 
 
 
 
0b4b1e4
 
e368cec
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94bd22c
e368cec
 
 
0b4b1e4
 
 
e368cec
 
e049190
 
 
 
 
0b4b1e4
 
 
e049190
 
e368cec
94bd22c
0b4b1e4
e368cec
 
0b4b1e4
 
 
e368cec
 
 
94bd22c
0b4b1e4
e368cec
 
0b4b1e4
 
 
e368cec
 
 
 
0b4b1e4
e368cec
 
0b4b1e4
 
 
e368cec
 
 
94bd22c
e368cec
 
 
0b4b1e4
 
 
 
 
 
 
 
 
 
 
 
 
e368cec
 
 
94bd22c
e368cec
 
 
0b4b1e4
 
 
e368cec
 
8d30b3b
 
 
 
 
0b4b1e4
 
 
8d30b3b
 
e368cec
 
 
 
 
0b4b1e4
 
 
e368cec
 
86da3fc
94bd22c
86da3fc
 
 
0b4b1e4
 
 
86da3fc
 
e368cec
94bd22c
e368cec
 
 
0b4b1e4
 
 
e368cec
 
 
94bd22c
e368cec
 
 
0b4b1e4
 
 
e368cec
 
fa656b2
 
 
 
 
0b4b1e4
 
 
fa656b2
 
8340e5c
 
 
 
 
0b4b1e4
 
 
8340e5c
 
09a289b
 
 
 
 
0b4b1e4
 
 
09a289b
 
 
4d32483
09a289b
 
 
74fa6be
0b4b1e4
 
09a289b
 
 
4d32483
09a289b
 
 
0b4b1e4
 
 
09a289b
 
 
e368cec
 
 
 
 
 
0b4b1e4
 
 
e368cec
 
 
 
 
 
 
0b4b1e4
 
 
e368cec
 
 
 
 
 
 
0b4b1e4
 
 
e368cec
 
 
 
 
 
 
 
0b4b1e4
 
 
e368cec
 
 
 
 
 
 
0b4b1e4
 
 
e368cec
 
 
 
 
 
 
0b4b1e4
 
 
e368cec
 
623aaf3
 
 
 
 
0b4b1e4
 
 
623aaf3
 
 
 
 
 
 
0b4b1e4
 
 
623aaf3
 
172a089
 
 
 
 
0b4b1e4
 
 
172a089
 
b3212f3
 
 
 
 
 
 
 
 
 
e368cec
944dd2b
e368cec
 
 
0b4b1e4
 
 
e368cec
944dd2b
 
26dad4e
944dd2b
 
 
0b4b1e4
 
 
944dd2b
 
765fb5e
 
 
 
 
0b4b1e4
 
 
765fb5e
 
944dd2b
26dad4e
944dd2b
 
 
0b4b1e4
 
 
944dd2b
26dad4e
 
 
 
 
 
0b4b1e4
 
 
26dad4e
 
1599f4c
 
 
 
 
0b4b1e4
 
 
1599f4c
26dad4e
 
 
 
 
0b4b1e4
 
 
26dad4e
65622ab
e1b7db1
 
 
c3c53e2
e1b7db1
0b4b1e4
 
 
e1b7db1
0b4b1e4
4e37c8d
 
 
 
 
0b4b1e4
 
 
4e37c8d
 
e548ada
063f216
e548ada
 
 
 
 
 
 
 
bf853bd
 
 
 
 
 
 
 
 
4e37c8d
063f216
e548ada
4e37c8d
 
0b4b1e4
 
 
4e37c8d
e368cec
172a089
1599f4c
172a089
 
 
0b4b1e4
 
 
172a089
 
aa59622
 
 
 
 
 
 
 
 
 
530174e
 
 
 
 
 
 
 
 
 
fb2a667

from collections import namedtuple
from typing import List

ModelInfo = namedtuple("ModelInfo", ["simple_name", "link", "description", "license", "organization", "type"])
model_info = {}

def register_model_info(
    full_names: List[str], simple_name: str, link: str, description: str,
    license: str, organization: str, model_type: str
):
    info = ModelInfo(simple_name, link, description, license, organization, model_type)
    for full_name in full_names:
        model_info[full_name] = info
        model_info[full_name.split("_")[1]] = info
    model_info[simple_name] = info

def get_model_info(name: str) -> ModelInfo:
    if name in model_info:
        return model_info[name]
    else:
        # To fix this, please use `register_model_info` to register your model
        return ModelInfo(
            name, "-", "Register the description at fastchat/model/model_registry.py",
            "-", "-", None
        )

def get_model_description_md(model_list):
    model_description_md = """
| | | |
| ---- | ---- | ---- |
"""
    ct = 0
    visited = set()
    for i, name in enumerate(model_list):
        minfo = get_model_info(name)
        if minfo.simple_name in visited:
            continue
        visited.add(minfo.simple_name)
        one_model_md = f"[{minfo.simple_name}]({minfo.link}): {minfo.description}"

        if ct % 3 == 0:
            model_description_md += "|"
        model_description_md += f" {one_model_md} |"
        if ct % 3 == 2:
            model_description_md += "\n"
        ct += 1
    return model_description_md

# regist image generation models

register_model_info(
    ["imagenhub_LCM_generation", "fal_LCM_text2image"],
    "LCM",
    "https://huggingface.co/SimianLuo/LCM_Dreamshaper_v7",
    "Latent Consistency Models.",
    "MIT License",
    "Tsinghua University",
    "text2image_generation"
)

register_model_info(
    ["fal_LCM(v1.5/XL)_text2image"],
    "LCM(v1.5/XL)",
    "https://fal.ai/models/fast-lcm-diffusion-turbo",
    "Latent Consistency Models (v1.5/XL)",
    "openrail++",
    "Latent Consistency",
    "text2image_generation"
)

register_model_info(
    ["imagenhub_PlayGroundV2_generation", 'playground_PlayGroundV2_generation'],
    "PlayGround V2",
    "https://huggingface.co/playgroundai/playground-v2-1024px-aesthetic",
    "Playground v2 – 1024px Aesthetic Model",
    "Playground v2 Community License",
    "Playground",
    "text2image_generation"
)

register_model_info(
    ["imagenhub_PlayGroundV2.5_generation", 'playground_PlayGroundV2.5_generation'],
    "PlayGround V2.5",
    "https://huggingface.co/playgroundai/playground-v2.5-1024px-aesthetic",
    "Playground v2.5 is the state-of-the-art open-source model in aesthetic quality",
    "Playground v2.5 Community License",
    "Playground",
    "text2image_generation"
)

register_model_info(
    ["imagenhub_OpenJourney_generation"],
    "OpenJourney",
    "https://huggingface.co/prompthero/openjourney",
    "Openjourney is an open source Stable Diffusion fine tuned model on Midjourney images, by PromptHero.",
    "creativeml-openrail-m",
    "PromptHero",
    "text2image_generation"
)

register_model_info(
    ["imagenhub_SDXLTurbo_generation", "fal_SDXLTurbo_text2image"],
    "SDXLTurbo",
    "https://huggingface.co/stabilityai/sdxl-turbo",
    "SDXL-Turbo is a fast generative text-to-image model.",
    "sai-nc-community (other)",
    "Stability AI",
    "text2image_generation"
)

register_model_info(
    ["imagenhub_SDEdit_edition"],
    "SDEdit",
    "https://sde-image-editing.github.io",
    "SDEdit is an image synthesis and editing framework based on stochastic differential equations (SDEs) or diffusion models.",
    "MIT License",
    "Stanford University",
    "image_edition"
)

register_model_info(
    ["imagenhub_SDXL_generation", "fal_SDXL_text2image"],
    "SDXL",
    "https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0",
    "SDXL is a Latent Diffusion Model that uses two fixed, pretrained text encoders.",
    "openrail++",
    "Stability AI",
    "text2image_generation"
)

register_model_info(
    ["imagenhub_SD3_generation"],
    "SD3",
    "https://huggingface.co/blog/sd3",
    "SD3 is a novel Multimodal Diffusion Transformer (MMDiT) model.",
    "stabilityai-nc-research-community",
    "Stability AI",
    "text2image_generation"
)

register_model_info(
    ["imagenhub_PixArtAlpha_generation"],
    "PixArtAlpha",
    "https://huggingface.co/PixArt-alpha/PixArt-XL-2-1024-MS",
    "Pixart-α consists of pure transformer blocks for latent diffusion.",
    "openrail++",
    "PixArt-alpha",
    "text2image_generation"
)

register_model_info(
    ["imagenhub_PixArtSigma_generation", "fal_PixArtSigma_text2image"],
    "PixArtSigma",
    "https://github.com/PixArt-alpha/PixArt-sigma",
    "Improved version of Pixart-α.",
    "openrail++",
    "PixArt-alpha",
    "text2image_generation"
)

register_model_info(
    ["imagenhub_SDXLLightning_generation", "fal_SDXLLightning_text2image"],
    "SDXL-Lightning",
    "https://huggingface.co/ByteDance/SDXL-Lightning",
    "SDXL-Lightning is a lightning-fast text-to-image generation model.",
    "openrail++",
    "ByteDance",
    "text2image_generation"
)

register_model_info(
    ["imagenhub_StableCascade_generation", "fal_StableCascade_text2image"],
    "StableCascade",
    "https://huggingface.co/stabilityai/stable-cascade",
    "StableCascade is built upon the Würstchen architecture and working at a much smaller latent space.",
    "stable-cascade-nc-community (other)",
    "Stability AI",
    "text2image_generation"
)

register_model_info(
    ["imagenhub_HunyuanDiT_generation"],
    "HunyuanDiT",
    "https://github.com/Tencent/HunyuanDiT",
    "HunyuanDiT is a Powerful Multi-Resolution Diffusion Transformer with Fine-Grained Chinese Understanding",
    "tencent-hunyuan-community",
    "Tencent",
    "text2image_generation"
)

register_model_info(
    ["imagenhub_Kolors_generation"],
    "Kolors",
    "https://huggingface.co/Kwai-Kolors/Kolors",
    "Kolors is a large-scale text-to-image generation model based on latent diffusion",
    "Apache-2.0",
    "Kwai Kolors",
    "text2image_generation"
)

register_model_info(
    ["fal_AuraFlow_text2image"],
    "AuraFlow",
    "https://huggingface.co/fal/AuraFlow",
    "Opensourced flow-based text-to-image generation model.",
    "Apache-2.0",
    "Fal.AI",
    "text2image_generation"
)

register_model_info(
    ["fal_FLUX1schnell_text2image"],
    "FLUX.1-schnell",
    "https://huggingface.co/docs/diffusers/main/en/api/pipelines/flux",
    "Flux is a series of text-to-image generation models based on diffusion transformers. Timestep-distilled version.",
    "Apache-2.0",
    "Black Forest Labs",
    "text2image_generation"
)

register_model_info(
    ["fal_FLUX1dev_text2image"],
    "FLUX.1-dev",
    "https://huggingface.co/docs/diffusers/main/en/api/pipelines/flux",
    "Flux is a series of text-to-image generation models based on diffusion transformers. Guidance-distilled version.",
    "flux-1-dev-non-commercial-license (other)",
    "Black Forest Labs",
    "text2image_generation"
)


# regist image edition models
register_model_info(
    ["imagenhub_CycleDiffusion_edition"],
    "CycleDiffusion",
    "https://github.com/ChenWu98/cycle-diffusion?tab=readme-ov-file",
    "A latent space for stochastic diffusion models.",
    "X11",
    "Carnegie Mellon University",
    "image_edition"
)

register_model_info(
    ["imagenhub_Pix2PixZero_edition"],
    "Pix2PixZero",
    "https://pix2pixzero.github.io/",
    "A zero-shot Image-to-Image translation model.",
    "MIT License",
    "Carnegie Mellon University, Adobe Research",
    "image_edition"
)

register_model_info(
    ["imagenhub_Prompt2prompt_edition"],
    "Prompt2prompt",
    "https://prompt-to-prompt.github.io/",
    "Image Editing with Cross-Attention Control.",
    "Apache-2.0",
    "Google, Tel Aviv University",
    "image_edition"
)


register_model_info(
    ["imagenhub_InstructPix2Pix_edition"],
    "InstructPix2Pix",
    "https://www.timothybrooks.com/instruct-pix2pix",
    "An instruction-based image editing model.",
    "Copyright 2023 Timothy Brooks, Aleksander Holynski, Alexei A. Efros",
    "University of California, Berkeley",
    "image_edition"
)

register_model_info(
    ["imagenhub_MagicBrush_edition"],
    "MagicBrush",
    "https://osu-nlp-group.github.io/MagicBrush/",
    "Manually Annotated Dataset for Instruction-Guided Image Editing.",
    "CC-BY-4.0",
    "The Ohio State University, University of Waterloo",
    "image_edition"
)

register_model_info(
    ["imagenhub_PNP_edition"],
    "PNP",
    "https://github.com/MichalGeyer/plug-and-play",
    "Plug-and-Play Diffusion Features for Text-Driven Image-to-Image Translation.",
    "-",
    "Weizmann Institute of Science",
    "image_edition"
)

register_model_info(
    ["imagenhub_InfEdit_edition"],
    "InfEdit",
    "https://sled-group.github.io/InfEdit/",
    "Inversion-Free Image Editing with Natural Language.",
    "CC BY-NC-ND 4.0",
    "University of Michigan, University of California, Berkeley",
    "image_edition"
)

register_model_info(
    ["imagenhub_CosXLEdit_edition"],
    "CosXLEdit",
    "https://huggingface.co/stabilityai/cosxl",
    "An instruction-based image editing model from SDXL.",
    "cosxl-nc-community",
    "Stability AI",
    "image_edition"
)

register_model_info(
    ["imagenhub_UltraEdit_edition"],
    "UltraEdit",
    "https://ultra-editing.github.io/",
    "Instruction-based Fine-Grained Image Editing at Scale.",
    "other",
    "Peking University; BIGAI",
    "image_edition"
)

register_model_info(
    ["imagenhub_AURORA_edition"],
    "AURORA",
    "https://aurora-editing.github.io/",
    "AURORA (Action Reasoning Object Attribute) enables training an instruction-guided image editing model that can perform action and reasoning-centric edits.",
    "MIT",
    "McGill NLP",
    "image_edition"
)

register_model_info(
    ["fal_stable-cascade_text2image"],
    "StableCascade",
    "https://fal.ai/models/stable-cascade/api",
    "StableCascade is a generative model that can generate high-quality images from text prompts.",
    "stable-cascade-nc-community (other)",
    "Stability AI",
    "image_edition"
)

register_model_info(
    ["fal_AnimateDiff_text2video"],
    "AnimateDiff",
    "https://fal.ai/models/fast-animatediff-t2v",
    "AnimateDiff is a text-driven models that produce diverse and personalized animated images.",
    "creativeml-openrail-m",
    "The Chinese University of Hong Kong, Shanghai AI Lab, Stanford University",
    "text2video_generation"
)

register_model_info(
    ["fal_StableVideoDiffusion_text2video"],
    "StableVideoDiffusion",
    "https://fal.ai/models/fal-ai/fast-svd/text-to-video/api",
    "Stable Video Diffusion empowers individuals to transform text and image inputs into vivid scenes.",
    "SVD-nc-community",
    "Stability AI",
    "text2video_generation"
)

register_model_info(
    ["fal_AnimateDiffTurbo_text2video"],
    "AnimateDiff Turbo",
    "https://fal.ai/models/fast-animatediff-t2v-turbo",
    "AnimateDiff Turbo is a lightning version of AnimateDiff.",
    "creativeml-openrail-m",
    "The Chinese University of Hong Kong, Shanghai AI Lab, Stanford University",
    "text2video_generation"
)

register_model_info(
    ["videogenhub_VideoCrafter2_generation"],
    "VideoCrafter2",
    "https://ailab-cvc.github.io/videocrafter2/",
    "VideoCrafter2 is a T2V model that disentangling motion from appearance.",
    "Apache 2.0",
    "Tencent AI Lab",
    "text2video_generation"
)

register_model_info(
    ["videogenhub_LaVie_generation"],
    "LaVie",
    "https://github.com/Vchitect/LaVie",
    "LaVie is a video generation model with cascaded latent diffusion models.",
    "Apache 2.0",
    "Shanghai AI Lab",
    "text2video_generation"
)
register_model_info(
    ["videogenhub_ModelScope_generation"],
    "ModelScope",
    "https://arxiv.org/abs/2308.06571",
    "ModelScope is a a T2V synthesis model that evolves from a T2I synthesis model.",
    "cc-by-nc-4.0",
    "Alibaba Group",
    "text2video_generation"
)

register_model_info(
    ["videogenhub_OpenSora_generation"],
    "OpenSora",
    "https://github.com/hpcaitech/Open-Sora",
    "A community-driven opensource implementation of Sora.",
    "Apache 2.0",
    "HPC-AI Tech",
    "text2video_generation"
)

register_model_info(
    ["videogenhub_OpenSora12_generation"],
    "OpenSora v1.2",
    "https://github.com/hpcaitech/Open-Sora",
    "A community-driven opensource implementation of Sora. v1.2",
    "Apache 2.0",
    "HPC-AI Tech",
    "text2video_generation"
)

register_model_info(
    ["videogenhub_CogVideoX-2B_generation"],
    "CogVideoX-2B",
    "https://github.com/THUDM/CogVideo",
    "Text-to-Video Diffusion Models with An Expert Transformer.",
    "CogVideoX LICENSE",
    "THUDM",
    "text2video_generation"
)

register_model_info(
    ["videogenhub_PyramidFlow_text2video"],
    "Pyramid Flow",
    "https://pyramid-flow.github.io/",
    "Pyramidal Flow Matching for Efficient Video Generative Modeling.",
    "MIT LICENSE",
    "Peking University",
    "text2video_generation"
)
register_model_info(
    ["fal_CogVideoX-5B_text2video"],
    "CogVideoX-5B",
    "https://github.com/THUDM/CogVideo",
    "Text-to-Video Diffusion Models with An Expert Transformer.",
    "CogVideoX LICENSE",
    "THUDM",
    "text2video_generation"
)
    
register_model_info(
    ["fal_T2VTurbo_text2video"],
    "T2V-Turbo",
    "https://github.com/Ji4chenLi/t2v-turbo",
    "Video Consistency Model with Mixed Reward Feedback.",
    "cc-by-nc-4.0",
    "University of California, Santa Barbara",
    "text2video_generation"
)
    
register_model_info(
    ["videogenhub_Allegro_text2video"],
    "Allegro",
    "https://github.com/rhymes-ai/Allegro",
    "DiT based Video Generation Model",
    "Apache 2.0",
    "rhymes-ai",
    "text2video_generation"
)
    
register_model_info(
    ["videogenhub_LTXVideo_text2video"],
    "LTXVideo",
    "https://github.com/Lightricks/LTX-Video",
    "DiT based Video Generation Model",
    "Apache 2.0",
    "Lightricks",
    "text2video_generation"
)
    
register_model_info(
    ["videogenhub_Mochi1_text2video"],
    "Mochi1",
    "https://github.com/genmoai/mochi",
    "Mochi 1 preview is an open state-of-the-art video generation model with high-fidelity motion and strong prompt adherence in preliminary evaluation.",
    "Apache 2.0",
    "Genmo AI",
    "text2video_generation"
)