from collections import namedtuple from typing import List ModelInfo = namedtuple("ModelInfo", ["simple_name", "link", "description", "license", "organization", "type"]) model_info = {} def register_model_info( full_names: List[str], simple_name: str, link: str, description: str, license: str, organization: str, model_type: str ): info = ModelInfo(simple_name, link, description, license, organization, model_type) for full_name in full_names: model_info[full_name] = info model_info[full_name.split("_")[1]] = info model_info[simple_name] = info def get_model_info(name: str) -> ModelInfo: if name in model_info: return model_info[name] else: # To fix this, please use `register_model_info` to register your model return ModelInfo( name, "-", "Register the description at fastchat/model/model_registry.py", "-", "-", None ) def get_model_description_md(model_list): model_description_md = """ | | | | | ---- | ---- | ---- | """ ct = 0 visited = set() for i, name in enumerate(model_list): minfo = get_model_info(name) if minfo.simple_name in visited: continue visited.add(minfo.simple_name) one_model_md = f"[{minfo.simple_name}]({minfo.link}): {minfo.description}" if ct % 3 == 0: model_description_md += "|" model_description_md += f" {one_model_md} |" if ct % 3 == 2: model_description_md += "\n" ct += 1 return model_description_md # regist image generation models register_model_info( ["imagenhub_LCM_generation", "fal_LCM_text2image"], "LCM", "https://huggingface.co/SimianLuo/LCM_Dreamshaper_v7", "Latent Consistency Models.", "MIT License", "Tsinghua University", "text2image_generation" ) register_model_info( ["fal_LCM(v1.5/XL)_text2image"], "LCM(v1.5/XL)", "https://fal.ai/models/fast-lcm-diffusion-turbo", "Latent Consistency Models (v1.5/XL)", "openrail++", "Latent Consistency", "text2image_generation" ) register_model_info( ["imagenhub_PlayGroundV2_generation", 'playground_PlayGroundV2_generation'], "PlayGround V2", "https://huggingface.co/playgroundai/playground-v2-1024px-aesthetic", "Playground v2 – 1024px Aesthetic Model", "Playground v2 Community License", "Playground", "text2image_generation" ) register_model_info( ["imagenhub_PlayGroundV2.5_generation", 'playground_PlayGroundV2.5_generation'], "PlayGround V2.5", "https://huggingface.co/playgroundai/playground-v2.5-1024px-aesthetic", "Playground v2.5 is the state-of-the-art open-source model in aesthetic quality", "Playground v2.5 Community License", "Playground", "text2image_generation" ) register_model_info( ["imagenhub_OpenJourney_generation"], "OpenJourney", "https://huggingface.co/prompthero/openjourney", "Openjourney is an open source Stable Diffusion fine tuned model on Midjourney images, by PromptHero.", "creativeml-openrail-m", "PromptHero", "text2image_generation" ) register_model_info( ["imagenhub_SDXLTurbo_generation", "fal_SDXLTurbo_text2image"], "SDXLTurbo", "https://huggingface.co/stabilityai/sdxl-turbo", "SDXL-Turbo is a fast generative text-to-image model.", "sai-nc-community (other)", "Stability AI", "text2image_generation" ) register_model_info( ["imagenhub_SDEdit_edition"], "SDEdit", "https://sde-image-editing.github.io", "SDEdit is an image synthesis and editing framework based on stochastic differential equations (SDEs) or diffusion models.", "MIT License", "Stanford University", "image_edition" ) register_model_info( ["imagenhub_SDXL_generation", "fal_SDXL_text2image"], "SDXL", "https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0", "SDXL is a Latent Diffusion Model that uses two fixed, pretrained text encoders.", "openrail++", "Stability AI", "text2image_generation" ) register_model_info( ["imagenhub_SD3_generation"], "SD3", "https://huggingface.co/blog/sd3", "SD3 is a novel Multimodal Diffusion Transformer (MMDiT) model.", "stabilityai-nc-research-community", "Stability AI", "text2image_generation" ) register_model_info( ["imagenhub_PixArtAlpha_generation"], "PixArtAlpha", "https://huggingface.co/PixArt-alpha/PixArt-XL-2-1024-MS", "Pixart-α consists of pure transformer blocks for latent diffusion.", "openrail++", "PixArt-alpha", "text2image_generation" ) register_model_info( ["imagenhub_PixArtSigma_generation", "fal_PixArtSigma_text2image"], "PixArtSigma", "https://github.com/PixArt-alpha/PixArt-sigma", "Improved version of Pixart-α.", "openrail++", "PixArt-alpha", "text2image_generation" ) register_model_info( ["imagenhub_SDXLLightning_generation", "fal_SDXLLightning_text2image"], "SDXL-Lightning", "https://huggingface.co/ByteDance/SDXL-Lightning", "SDXL-Lightning is a lightning-fast text-to-image generation model.", "openrail++", "ByteDance", "text2image_generation" ) register_model_info( ["imagenhub_StableCascade_generation", "fal_StableCascade_text2image"], "StableCascade", "https://huggingface.co/stabilityai/stable-cascade", "StableCascade is built upon the Würstchen architecture and working at a much smaller latent space.", "stable-cascade-nc-community (other)", "Stability AI", "text2image_generation" ) register_model_info( ["imagenhub_HunyuanDiT_generation"], "HunyuanDiT", "https://github.com/Tencent/HunyuanDiT", "HunyuanDiT is a Powerful Multi-Resolution Diffusion Transformer with Fine-Grained Chinese Understanding", "tencent-hunyuan-community", "Tencent", "text2image_generation" ) register_model_info( ["imagenhub_Kolors_generation"], "Kolors", "https://huggingface.co/Kwai-Kolors/Kolors", "Kolors is a large-scale text-to-image generation model based on latent diffusion", "Apache-2.0", "Kwai Kolors", "text2image_generation" ) register_model_info( ["fal_AuraFlow_text2image"], "AuraFlow", "https://huggingface.co/fal/AuraFlow", "Opensourced flow-based text-to-image generation model.", "Apache-2.0", "Fal.AI", "text2image_generation" ) register_model_info( ["fal_FLUX1schnell_text2image"], "FLUX.1-schnell", "https://huggingface.co/docs/diffusers/main/en/api/pipelines/flux", "Flux is a series of text-to-image generation models based on diffusion transformers. Timestep-distilled version.", "Apache-2.0", "Black Forest Labs", "text2image_generation" ) register_model_info( ["fal_FLUX1dev_text2image"], "FLUX.1-dev", "https://huggingface.co/docs/diffusers/main/en/api/pipelines/flux", "Flux is a series of text-to-image generation models based on diffusion transformers. Guidance-distilled version.", "flux-1-dev-non-commercial-license (other)", "Black Forest Labs", "text2image_generation" ) # regist image edition models register_model_info( ["imagenhub_CycleDiffusion_edition"], "CycleDiffusion", "https://github.com/ChenWu98/cycle-diffusion?tab=readme-ov-file", "A latent space for stochastic diffusion models.", "X11", "Carnegie Mellon University", "image_edition" ) register_model_info( ["imagenhub_Pix2PixZero_edition"], "Pix2PixZero", "https://pix2pixzero.github.io/", "A zero-shot Image-to-Image translation model.", "MIT License", "Carnegie Mellon University, Adobe Research", "image_edition" ) register_model_info( ["imagenhub_Prompt2prompt_edition"], "Prompt2prompt", "https://prompt-to-prompt.github.io/", "Image Editing with Cross-Attention Control.", "Apache-2.0", "Google, Tel Aviv University", "image_edition" ) register_model_info( ["imagenhub_InstructPix2Pix_edition"], "InstructPix2Pix", "https://www.timothybrooks.com/instruct-pix2pix", "An instruction-based image editing model.", "Copyright 2023 Timothy Brooks, Aleksander Holynski, Alexei A. Efros", "University of California, Berkeley", "image_edition" ) register_model_info( ["imagenhub_MagicBrush_edition"], "MagicBrush", "https://osu-nlp-group.github.io/MagicBrush/", "Manually Annotated Dataset for Instruction-Guided Image Editing.", "CC-BY-4.0", "The Ohio State University, University of Waterloo", "image_edition" ) register_model_info( ["imagenhub_PNP_edition"], "PNP", "https://github.com/MichalGeyer/plug-and-play", "Plug-and-Play Diffusion Features for Text-Driven Image-to-Image Translation.", "-", "Weizmann Institute of Science", "image_edition" ) register_model_info( ["imagenhub_InfEdit_edition"], "InfEdit", "https://sled-group.github.io/InfEdit/", "Inversion-Free Image Editing with Natural Language.", "CC BY-NC-ND 4.0", "University of Michigan, University of California, Berkeley", "image_edition" ) register_model_info( ["imagenhub_CosXLEdit_edition"], "CosXLEdit", "https://huggingface.co/stabilityai/cosxl", "An instruction-based image editing model from SDXL.", "cosxl-nc-community", "Stability AI", "image_edition" ) register_model_info( ["imagenhub_UltraEdit_edition"], "UltraEdit", "https://ultra-editing.github.io/", "Instruction-based Fine-Grained Image Editing at Scale.", "other", "Peking University; BIGAI", "image_edition" ) register_model_info( ["imagenhub_AURORA_edition"], "AURORA", "https://aurora-editing.github.io/", "AURORA (Action Reasoning Object Attribute) enables training an instruction-guided image editing model that can perform action and reasoning-centric edits.", "MIT", "McGill NLP", "image_edition" ) register_model_info( ["fal_stable-cascade_text2image"], "StableCascade", "https://fal.ai/models/stable-cascade/api", "StableCascade is a generative model that can generate high-quality images from text prompts.", "stable-cascade-nc-community (other)", "Stability AI", "image_edition" ) register_model_info( ["fal_AnimateDiff_text2video"], "AnimateDiff", "https://fal.ai/models/fast-animatediff-t2v", "AnimateDiff is a text-driven models that produce diverse and personalized animated images.", "creativeml-openrail-m", "The Chinese University of Hong Kong, Shanghai AI Lab, Stanford University", "text2video_generation" ) register_model_info( ["fal_StableVideoDiffusion_text2video"], "StableVideoDiffusion", "https://fal.ai/models/fal-ai/fast-svd/text-to-video/api", "Stable Video Diffusion empowers individuals to transform text and image inputs into vivid scenes.", "SVD-nc-community", "Stability AI", "text2video_generation" ) register_model_info( ["fal_AnimateDiffTurbo_text2video"], "AnimateDiff Turbo", "https://fal.ai/models/fast-animatediff-t2v-turbo", "AnimateDiff Turbo is a lightning version of AnimateDiff.", "creativeml-openrail-m", "The Chinese University of Hong Kong, Shanghai AI Lab, Stanford University", "text2video_generation" ) register_model_info( ["videogenhub_VideoCrafter2_generation"], "VideoCrafter2", "https://ailab-cvc.github.io/videocrafter2/", "VideoCrafter2 is a T2V model that disentangling motion from appearance.", "Apache 2.0", "Tencent AI Lab", "text2video_generation" ) register_model_info( ["videogenhub_LaVie_generation"], "LaVie", "https://github.com/Vchitect/LaVie", "LaVie is a video generation model with cascaded latent diffusion models.", "Apache 2.0", "Shanghai AI Lab", "text2video_generation" ) register_model_info( ["videogenhub_ModelScope_generation"], "ModelScope", "https://arxiv.org/abs/2308.06571", "ModelScope is a a T2V synthesis model that evolves from a T2I synthesis model.", "cc-by-nc-4.0", "Alibaba Group", "text2video_generation" ) register_model_info( ["videogenhub_OpenSora_generation"], "OpenSora", "https://github.com/hpcaitech/Open-Sora", "A community-driven opensource implementation of Sora.", "Apache 2.0", "HPC-AI Tech", "text2video_generation" ) register_model_info( ["videogenhub_OpenSora12_generation"], "OpenSora v1.2", "https://github.com/hpcaitech/Open-Sora", "A community-driven opensource implementation of Sora. v1.2", "Apache 2.0", "HPC-AI Tech", "text2video_generation" ) register_model_info( ["videogenhub_CogVideoX-2B_generation"], "CogVideoX-2B", "https://github.com/THUDM/CogVideo", "Text-to-Video Diffusion Models with An Expert Transformer.", "CogVideoX LICENSE", "THUDM", "text2video_generation" ) register_model_info( ["videogenhub_PyramidFlow_text2video"], "Pyramid Flow", "https://pyramid-flow.github.io/", "Pyramidal Flow Matching for Efficient Video Generative Modeling.", "MIT LICENSE", "Peking University", "text2video_generation" ) register_model_info( ["fal_CogVideoX-5B_text2video"], "CogVideoX-5B", "https://github.com/THUDM/CogVideo", "Text-to-Video Diffusion Models with An Expert Transformer.", "CogVideoX LICENSE", "THUDM", "text2video_generation" ) register_model_info( ["fal_T2VTurbo_text2video"], "T2V-Turbo", "https://github.com/Ji4chenLi/t2v-turbo", "Video Consistency Model with Mixed Reward Feedback.", "cc-by-nc-4.0", "University of California, Santa Barbara", "text2video_generation" ) register_model_info( ["videogenhub_Allegro_text2video"], "Allegro", "https://github.com/rhymes-ai/Allegro", "DiT based Video Generation Model", "Apache 2.0", "rhymes-ai", "text2video_generation" ) register_model_info( ["videogenhub_LTXVideo_text2video"], "LTXVideo", "https://github.com/Lightricks/LTX-Video", "DiT based Video Generation Model", "Apache 2.0", "Lightricks", "text2video_generation" ) register_model_info( ["videogenhub_Mochi1_text2video"], "Mochi1", "https://github.com/genmoai/mochi", "Mochi 1 preview is an open state-of-the-art video generation model with high-fidelity motion and strong prompt adherence in preliminary evaluation.", "Apache 2.0", "Genmo AI", "text2video_generation" )