GenAI-Arena / model /model_registry.py
vinesmsuic's picture
Add AURORA
b3212f3
raw
history blame
14.5 kB
from collections import namedtuple
from typing import List
ModelInfo = namedtuple("ModelInfo", ["simple_name", "link", "description", "license", "organization", "type"])
model_info = {}
def register_model_info(
full_names: List[str], simple_name: str, link: str, description: str,
license: str, organization: str, model_type: str
):
info = ModelInfo(simple_name, link, description, license, organization, model_type)
for full_name in full_names:
model_info[full_name] = info
model_info[full_name.split("_")[1]] = info
model_info[simple_name] = info
def get_model_info(name: str) -> ModelInfo:
if name in model_info:
return model_info[name]
else:
# To fix this, please use `register_model_info` to register your model
return ModelInfo(
name, "-", "Register the description at fastchat/model/model_registry.py",
"-", "-", None
)
def get_model_description_md(model_list):
model_description_md = """
| | | |
| ---- | ---- | ---- |
"""
ct = 0
visited = set()
for i, name in enumerate(model_list):
minfo = get_model_info(name)
if minfo.simple_name in visited:
continue
visited.add(minfo.simple_name)
one_model_md = f"[{minfo.simple_name}]({minfo.link}): {minfo.description}"
if ct % 3 == 0:
model_description_md += "|"
model_description_md += f" {one_model_md} |"
if ct % 3 == 2:
model_description_md += "\n"
ct += 1
return model_description_md
# regist image generation models
register_model_info(
["imagenhub_LCM_generation", "fal_LCM_text2image"],
"LCM",
"https://huggingface.co/SimianLuo/LCM_Dreamshaper_v7",
"Latent Consistency Models.",
"MIT License",
"Tsinghua University",
"text2image_generation"
)
register_model_info(
["fal_LCM(v1.5/XL)_text2image"],
"LCM(v1.5/XL)",
"https://fal.ai/models/fast-lcm-diffusion-turbo",
"Latent Consistency Models (v1.5/XL)",
"openrail++",
"Latent Consistency",
"text2image_generation"
)
register_model_info(
["imagenhub_PlayGroundV2_generation", 'playground_PlayGroundV2_generation'],
"PlayGround V2",
"https://huggingface.co/playgroundai/playground-v2-1024px-aesthetic",
"Playground v2 – 1024px Aesthetic Model",
"Playground v2 Community License",
"Playground",
"text2image_generation"
)
register_model_info(
["imagenhub_PlayGroundV2.5_generation", 'playground_PlayGroundV2.5_generation'],
"PlayGround V2.5",
"https://huggingface.co/playgroundai/playground-v2.5-1024px-aesthetic",
"Playground v2.5 is the state-of-the-art open-source model in aesthetic quality",
"Playground v2.5 Community License",
"Playground",
"text2image_generation"
)
register_model_info(
["imagenhub_OpenJourney_generation"],
"OpenJourney",
"https://huggingface.co/prompthero/openjourney",
"Openjourney is an open source Stable Diffusion fine tuned model on Midjourney images, by PromptHero.",
"creativeml-openrail-m",
"PromptHero",
"text2image_generation"
)
register_model_info(
["imagenhub_SDXLTurbo_generation", "fal_SDXLTurbo_text2image"],
"SDXLTurbo",
"https://huggingface.co/stabilityai/sdxl-turbo",
"SDXL-Turbo is a fast generative text-to-image model.",
"sai-nc-community (other)",
"Stability AI",
"text2image_generation"
)
register_model_info(
["imagenhub_SDEdit_edition"],
"SDEdit",
"https://sde-image-editing.github.io",
"SDEdit is an image synthesis and editing framework based on stochastic differential equations (SDEs) or diffusion models.",
"MIT License",
"Stanford University",
"image_edition"
)
register_model_info(
["imagenhub_SDXL_generation", "fal_SDXL_text2image"],
"SDXL",
"https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0",
"SDXL is a Latent Diffusion Model that uses two fixed, pretrained text encoders.",
"openrail++",
"Stability AI",
"text2image_generation"
)
register_model_info(
["imagenhub_SD3_generation"],
"SD3",
"https://huggingface.co/blog/sd3",
"SD3 is a novel Multimodal Diffusion Transformer (MMDiT) model.",
"stabilityai-nc-research-community",
"Stability AI",
"text2image_generation"
)
register_model_info(
["imagenhub_PixArtAlpha_generation"],
"PixArtAlpha",
"https://huggingface.co/PixArt-alpha/PixArt-XL-2-1024-MS",
"Pixart-α consists of pure transformer blocks for latent diffusion.",
"openrail++",
"PixArt-alpha",
"text2image_generation"
)
register_model_info(
["imagenhub_PixArtSigma_generation", "fal_PixArtSigma_text2image"],
"PixArtSigma",
"https://github.com/PixArt-alpha/PixArt-sigma",
"Improved version of Pixart-α.",
"openrail++",
"PixArt-alpha",
"text2image_generation"
)
register_model_info(
["imagenhub_SDXLLightning_generation", "fal_SDXLLightning_text2image"],
"SDXL-Lightning",
"https://huggingface.co/ByteDance/SDXL-Lightning",
"SDXL-Lightning is a lightning-fast text-to-image generation model.",
"openrail++",
"ByteDance",
"text2image_generation"
)
register_model_info(
["imagenhub_StableCascade_generation", "fal_StableCascade_text2image"],
"StableCascade",
"https://huggingface.co/stabilityai/stable-cascade",
"StableCascade is built upon the Würstchen architecture and working at a much smaller latent space.",
"stable-cascade-nc-community (other)",
"Stability AI",
"text2image_generation"
)
register_model_info(
["imagenhub_HunyuanDiT_generation"],
"HunyuanDiT",
"https://github.com/Tencent/HunyuanDiT",
"HunyuanDiT is a Powerful Multi-Resolution Diffusion Transformer with Fine-Grained Chinese Understanding",
"tencent-hunyuan-community",
"Tencent",
"text2image_generation"
)
register_model_info(
["imagenhub_Kolors_generation"],
"Kolors",
"https://huggingface.co/Kwai-Kolors/Kolors",
"Kolors is a large-scale text-to-image generation model based on latent diffusion",
"Apache-2.0",
"Kwai Kolors",
"text2image_generation"
)
register_model_info(
["fal_AuraFlow_text2image"],
"AuraFlow",
"https://huggingface.co/fal/AuraFlow",
"Opensourced flow-based text-to-image generation model.",
"Apache-2.0",
"Fal.AI",
"text2image_generation"
)
register_model_info(
["fal_FLUX1schnell_text2image"],
"FLUX.1-schnell",
"https://huggingface.co/docs/diffusers/main/en/api/pipelines/flux",
"Flux is a series of text-to-image generation models based on diffusion transformers. Timestep-distilled version.",
"Apache-2.0",
"Black Forest Labs",
"text2image_generation"
)
register_model_info(
["fal_FLUX1dev_text2image"],
"FLUX.1-dev",
"https://huggingface.co/docs/diffusers/main/en/api/pipelines/flux",
"Flux is a series of text-to-image generation models based on diffusion transformers. Guidance-distilled version.",
"flux-1-dev-non-commercial-license (other)",
"Black Forest Labs",
"text2image_generation"
)
# regist image edition models
register_model_info(
["imagenhub_CycleDiffusion_edition"],
"CycleDiffusion",
"https://github.com/ChenWu98/cycle-diffusion?tab=readme-ov-file",
"A latent space for stochastic diffusion models.",
"X11",
"Carnegie Mellon University",
"image_edition"
)
register_model_info(
["imagenhub_Pix2PixZero_edition"],
"Pix2PixZero",
"https://pix2pixzero.github.io/",
"A zero-shot Image-to-Image translation model.",
"MIT License",
"Carnegie Mellon University, Adobe Research",
"image_edition"
)
register_model_info(
["imagenhub_Prompt2prompt_edition"],
"Prompt2prompt",
"https://prompt-to-prompt.github.io/",
"Image Editing with Cross-Attention Control.",
"Apache-2.0",
"Google, Tel Aviv University",
"image_edition"
)
register_model_info(
["imagenhub_InstructPix2Pix_edition"],
"InstructPix2Pix",
"https://www.timothybrooks.com/instruct-pix2pix",
"An instruction-based image editing model.",
"Copyright 2023 Timothy Brooks, Aleksander Holynski, Alexei A. Efros",
"University of California, Berkeley",
"image_edition"
)
register_model_info(
["imagenhub_MagicBrush_edition"],
"MagicBrush",
"https://osu-nlp-group.github.io/MagicBrush/",
"Manually Annotated Dataset for Instruction-Guided Image Editing.",
"CC-BY-4.0",
"The Ohio State University, University of Waterloo",
"image_edition"
)
register_model_info(
["imagenhub_PNP_edition"],
"PNP",
"https://github.com/MichalGeyer/plug-and-play",
"Plug-and-Play Diffusion Features for Text-Driven Image-to-Image Translation.",
"-",
"Weizmann Institute of Science",
"image_edition"
)
register_model_info(
["imagenhub_InfEdit_edition"],
"InfEdit",
"https://sled-group.github.io/InfEdit/",
"Inversion-Free Image Editing with Natural Language.",
"CC BY-NC-ND 4.0",
"University of Michigan, University of California, Berkeley",
"image_edition"
)
register_model_info(
["imagenhub_CosXLEdit_edition"],
"CosXLEdit",
"https://huggingface.co/stabilityai/cosxl",
"An instruction-based image editing model from SDXL.",
"cosxl-nc-community",
"Stability AI",
"image_edition"
)
register_model_info(
["imagenhub_UltraEdit_edition"],
"UltraEdit",
"https://ultra-editing.github.io/",
"Instruction-based Fine-Grained Image Editing at Scale.",
"other",
"Peking University; BIGAI",
"image_edition"
)
register_model_info(
["imagenhub_AURORA_edition"],
"AURORA",
"https://aurora-editing.github.io/",
"AURORA (Action Reasoning Object Attribute) enables training an instruction-guided image editing model that can perform action and reasoning-centric edits.",
"MIT",
"McGill NLP",
"image_edition"
)
register_model_info(
["fal_stable-cascade_text2image"],
"StableCascade",
"https://fal.ai/models/stable-cascade/api",
"StableCascade is a generative model that can generate high-quality images from text prompts.",
"stable-cascade-nc-community (other)",
"Stability AI",
"image_edition"
)
register_model_info(
["fal_AnimateDiff_text2video"],
"AnimateDiff",
"https://fal.ai/models/fast-animatediff-t2v",
"AnimateDiff is a text-driven models that produce diverse and personalized animated images.",
"creativeml-openrail-m",
"The Chinese University of Hong Kong, Shanghai AI Lab, Stanford University",
"text2video_generation"
)
register_model_info(
["fal_StableVideoDiffusion_text2video"],
"StableVideoDiffusion",
"https://fal.ai/models/fal-ai/fast-svd/text-to-video/api",
"Stable Video Diffusion empowers individuals to transform text and image inputs into vivid scenes.",
"SVD-nc-community",
"Stability AI",
"text2video_generation"
)
register_model_info(
["fal_AnimateDiffTurbo_text2video"],
"AnimateDiff Turbo",
"https://fal.ai/models/fast-animatediff-t2v-turbo",
"AnimateDiff Turbo is a lightning version of AnimateDiff.",
"creativeml-openrail-m",
"The Chinese University of Hong Kong, Shanghai AI Lab, Stanford University",
"text2video_generation"
)
register_model_info(
["videogenhub_VideoCrafter2_generation"],
"VideoCrafter2",
"https://ailab-cvc.github.io/videocrafter2/",
"VideoCrafter2 is a T2V model that disentangling motion from appearance.",
"Apache 2.0",
"Tencent AI Lab",
"text2video_generation"
)
register_model_info(
["videogenhub_LaVie_generation"],
"LaVie",
"https://github.com/Vchitect/LaVie",
"LaVie is a video generation model with cascaded latent diffusion models.",
"Apache 2.0",
"Shanghai AI Lab",
"text2video_generation"
)
register_model_info(
["videogenhub_ModelScope_generation"],
"ModelScope",
"https://arxiv.org/abs/2308.06571",
"ModelScope is a a T2V synthesis model that evolves from a T2I synthesis model.",
"cc-by-nc-4.0",
"Alibaba Group",
"text2video_generation"
)
register_model_info(
["videogenhub_OpenSora_generation"],
"OpenSora",
"https://github.com/hpcaitech/Open-Sora",
"A community-driven opensource implementation of Sora.",
"Apache 2.0",
"HPC-AI Tech",
"text2video_generation"
)
register_model_info(
["videogenhub_OpenSora12_generation"],
"OpenSora v1.2",
"https://github.com/hpcaitech/Open-Sora",
"A community-driven opensource implementation of Sora. v1.2",
"Apache 2.0",
"HPC-AI Tech",
"text2video_generation"
)
register_model_info(
["videogenhub_CogVideoX-2B_generation"],
"CogVideoX-2B",
"https://github.com/THUDM/CogVideo",
"Text-to-Video Diffusion Models with An Expert Transformer.",
"CogVideoX LICENSE",
"THUDM",
"text2video_generation"
)
register_model_info(
["videogenhub_PyramidFlow_text2video"],
"Pyramid Flow",
"https://pyramid-flow.github.io/",
"Pyramidal Flow Matching for Efficient Video Generative Modeling.",
"MIT LICENSE",
"Peking University",
"text2video_generation"
)
register_model_info(
["fal_CogVideoX-5B_text2video"],
"CogVideoX-5B",
"https://github.com/THUDM/CogVideo",
"Text-to-Video Diffusion Models with An Expert Transformer.",
"CogVideoX LICENSE",
"THUDM",
"text2video_generation"
)
register_model_info(
["fal_T2VTurbo_text2video"],
"T2V-Turbo",
"https://github.com/Ji4chenLi/t2v-turbo",
"Video Consistency Model with Mixed Reward Feedback.",
"cc-by-nc-4.0",
"University of California, Santa Barbara",
"text2video_generation"
)
register_model_info(
["videogenhub_Allegro_text2video"],
"Allegro",
"https://github.com/rhymes-ai/Allegro",
"DiT based Video Generation Model",
"Apache 2.0",
"rhymes-ai",
"text2video_generation"
)
register_model_info(
["videogenhub_LTXVideo_text2video"],
"LTXVideo",
"https://github.com/Lightricks/LTX-Video",
"DiT based Video Generation Model",
"Apache 2.0",
"Lightricks",
"text2video_generation"
)
register_model_info(
["videogenhub_Mochi1_text2video"],
"Mochi1",
"https://github.com/genmoai/mochi",
"Mochi 1 preview is an open state-of-the-art video generation model with high-fidelity motion and strong prompt adherence in preliminary evaluation.",
"Apache 2.0",
"Genmo AI",
"text2video_generation"
)