Spaces:

yhzhai
/

mcm

Running on Zero

App Files Files Community

yhzhai commited on Jun 16

Commit

8b19867

•

1 Parent(s): ed9c4b4

add demo

Browse files

Files changed (4) hide show

.gitignore +180 -0
README.md +8 -1
app.py +349 -103
requirements.txt +87 -6

.gitignore ADDED Viewed

	@@ -0,0 +1,180 @@

+# Created by https://www.toptal.com/developers/gitignore/api/python
+# Edit at https://www.toptal.com/developers/gitignore?templates=python
+### Python ###
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+### Python Patch ###
+# Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration
+poetry.toml
+# ruff
+.ruff_cache/
+# LSP config files
+pyrightconfig.json
+# End of https://www.toptal.com/developers/gitignore/api/python
+gradio_cached_examples
+*.DS_Store
+samples

README.md CHANGED Viewed

@@ -1,5 +1,5 @@
 ---
-title: Mcm
 emoji: 🖼
 colorFrom: purple
 colorTo: red
@@ -8,6 +8,13 @@ sdk_version: 4.26.0
 app_file: app.py
 pinned: false
 license: apache-2.0
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Motion Consistency Model - Accelerating Video Diffusion with Disentangled Motion-Appearance Distillation
 emoji: 🖼
 colorFrom: purple
 colorTo: red
 app_file: app.py
 pinned: false
 license: apache-2.0
+short_description: Detect and locate image manipulations.
+preload_from_hub:
+    - yhzhai/mcm
+    - ali-vilab/text-to-video-ms-1.7b
+    - runwayml/stable-diffusion-v1-5
+    - emilianJR/epiCRealism
+    - SG161222/Realistic_Vision_V6.0_B1_noVAE
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py CHANGED Viewed

@@ -1,70 +1,334 @@
 import gradio as gr
 import numpy as np
-import random
-from diffusers import DiffusionPipeline
 import torch
 device = "cuda" if torch.cuda.is_available() else "cpu"
-if torch.cuda.is_available():
-    torch.cuda.max_memory_allocated(device=device)
-    pipe = DiffusionPipeline.from_pretrained("stabilityai/sdxl-turbo", torch_dtype=torch.float16, variant="fp16", use_safetensors=True)
-    pipe.enable_xformers_memory_efficient_attention()
     pipe = pipe.to(device)
-else:
-    pipe = DiffusionPipeline.from_pretrained("stabilityai/sdxl-turbo", use_safetensors=True)
     pipe = pipe.to(device)
-MAX_SEED = np.iinfo(np.int32).max
-MAX_IMAGE_SIZE = 1024
-def infer(prompt, negative_prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps):
     if randomize_seed:
         seed = random.randint(0, MAX_SEED)
     generator = torch.Generator().manual_seed(seed)
-    image = pipe(
-        prompt = prompt,
-        negative_prompt = negative_prompt,
-        guidance_scale = guidance_scale,
-        num_inference_steps = num_inference_steps,
-        width = width,
-        height = height,
-        generator = generator
-    ).images[0]
-    return image
 examples = [
-    "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k",
-    "An astronaut riding a green horse",
-    "A delicious ceviche cheesecake slice",
 ]
-css="""
 #col-container {
     margin: 0 auto;
-    max-width: 520px;
 }
 """
-if torch.cuda.is_available():
-    power_device = "GPU"
-else:
-    power_device = "CPU"
 with gr.Blocks(css=css) as demo:
     with gr.Column(elem_id="col-container"):
-        gr.Markdown(f"""
-        # Text-to-Image Gradio Template
-        Currently running on {power_device}.
-        """)
         with gr.Row():
             prompt = gr.Text(
                 label="Prompt",
                 show_label=False,
@@ -72,75 +336,57 @@ with gr.Blocks(css=css) as demo:
                 placeholder="Enter your prompt",
                 container=False,
             )
             run_button = gr.Button("Run", scale=0)
-        result = gr.Image(label="Result", show_label=False)
-        with gr.Accordion("Advanced Settings", open=False):
-            negative_prompt = gr.Text(
-                label="Negative prompt",
-                max_lines=1,
-                placeholder="Enter a negative prompt",
-                visible=False,
-            )
-            seed = gr.Slider(
-                label="Seed",
-                minimum=0,
-                maximum=MAX_SEED,
-                step=1,
-                value=0,
-            )
-            randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
-            with gr.Row():
-                width = gr.Slider(
-                    label="Width",
-                    minimum=256,
-                    maximum=MAX_IMAGE_SIZE,
-                    step=32,
-                    value=512,
-                )
-                height = gr.Slider(
-                    label="Height",
-                    minimum=256,
-                    maximum=MAX_IMAGE_SIZE,
-                    step=32,
-                    value=512,
-                )
-            with gr.Row():
-                guidance_scale = gr.Slider(
-                    label="Guidance scale",
-                    minimum=0.0,
-                    maximum=10.0,
-                    step=0.1,
-                    value=0.0,
-                )
-                num_inference_steps = gr.Slider(
-                    label="Number of inference steps",
-                    minimum=1,
-                    maximum=12,
-                    step=1,
-                    value=2,
                 )
         gr.Examples(
-            examples = examples,
-            inputs = [prompt]
         )
     run_button.click(
-        fn = infer,
-        inputs = [prompt, negative_prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps],
-        outputs = [result]
     )
-demo.queue().launch()

+import os
+import random
+from datetime import datetime
+from typing import Optional
 import gradio as gr
 import numpy as np
 import torch
+from diffusers import (
+    AnimateDiffPipeline,
+    DiffusionPipeline,
+    LCMScheduler,
+    MotionAdapter,
+)
+from diffusers.utils import export_to_video
+from peft import PeftModel
 device = "cuda" if torch.cuda.is_available() else "cpu"
+mcm_id = "yhzhai/mcm"
+basedir = os.getcwd()
+savedir = os.path.join(
+    basedir, "samples", datetime.now().strftime("Gradio-%Y-%m-%dT%H-%M-%S")
+)
+MAX_SEED = np.iinfo(np.int32).max
+def get_modelscope_pipeline(
+    mcm_variant: Optional[str] = "WebVid",
+):
+    model_id = "ali-vilab/text-to-video-ms-1.7b"
+    pipe = DiffusionPipeline.from_pretrained(
+        model_id, torch_dtype=torch.float16, variant="fp16"
+    )
+    scheduler = LCMScheduler.from_pretrained(
+        model_id,
+        subfolder="scheduler",
+        timestep_scaling=4.0,
+    )
+    pipe.scheduler = scheduler
+    pipe.enable_vae_slicing()
+    if mcm_variant == "WebVid":
+        subfolder = "modelscopet2v-webvid"
+    elif mcm_variant == "LAION-aes":
+        subfolder = "modelscopet2v-laion"
+    elif mcm_variant == "Anime":
+        subfolder = "modelscopet2v-anime"
+    elif mcm_variant == "Realistic":
+        subfolder = "modelscopet2v-real"
+    elif mcm_variant == "3D Cartoon":
+        subfolder = "modelscopet2v-3d-cartoon"
+    else:
+        subfolder = "modelscopet2v-laion"
+    lora = PeftModel.from_pretrained(
+        pipe.unet,
+        model_id=mcm_id,
+        subfolder=subfolder,
+        adapter_name="lora",
+        torch_device="cpu",
+    )
+    lora.merge_and_unload()
+    pipe.unet = lora
     pipe = pipe.to(device)
+    return pipe
+def get_animatediff_pipeline(
+    real_variant: Optional[str] = "realvision",
+    motion_module_path: str = "guoyww/animatediff-motion-adapter-v1-5-2",
+    mcm_variant: Optional[str] = "WebVid",
+):
+    if real_variant is None:
+        model_id = "runwayml/stable-diffusion-v1-5"
+    elif real_variant == "epicrealism":
+        model_id = "emilianJR/epiCRealism"
+    elif real_variant == "realvision":
+        model_id = "SG161222/Realistic_Vision_V6.0_B1_noVAE"
+    else:
+        raise ValueError(f"Unknown real_variant {real_variant}")
+    adapter = MotionAdapter.from_pretrained(
+        motion_module_path, torch_dtype=torch.float16
+    )
+    pipe = AnimateDiffPipeline.from_pretrained(
+        model_id,
+        motion_adapter=adapter,
+        torch_dtype=torch.float16,
+    )
+    scheduler = LCMScheduler.from_pretrained(
+        model_id,
+        subfolder="scheduler",
+        timestep_scaling=4.0,
+        clip_sample=False,
+        timestep_spacing="linspace",
+        beta_schedule="linear",
+        beta_start=0.00085,
+        beta_end=0.012,
+        steps_offset=1,
+    )
+    pipe.scheduler = scheduler
+    pipe.enable_vae_slicing()
+    if mcm_variant == "WebVid":
+        subfolder = "animatediff-webvid"
+    elif mcm_variant == "LAION-aes":
+        subfolder = "animatediff-laion"
+    else:
+        subfolder = "animatediff-laion"
+    lora = PeftModel.from_pretrained(
+        pipe.unet,
+        model_id=mcm_id,
+        subfolder=subfolder,
+        adapter_name="lora",
+        torch_device="cpu",
+    )
+    lora.merge_and_unload()
+    pipe.unet = lora
     pipe = pipe.to(device)
+    return pipe
+# pipe_dict = {
+#     "ModelScope T2V": {"WebVid": None, "LAION-aes": None, "Anime": None, "Realistic": None, "3D Cartoon": None},
+#     "AnimateDiff (SD1.5)": {"WebVid": None, "LAION-aes": None},
+#     "AnimateDiff (RealisticVision)": {"WebVid": None, "LAION-aes": None},
+#     "AnimateDiff (epiCRealism)": {"WebVid": None, "LAION-aes": None},
+# }
+cache_pipeline = {
+    "base_model": None,
+    "variant": None,
+    "pipeline": None,
+}
+def infer(
+    base_model, variant, prompt, seed=0, randomize_seed=True, num_inference_steps=4
+):
+    # if pipe_dict[base_model][variant] is None:
+    #     if base_model == "ModelScope T2V":
+    #         pipe_dict[base_model][variant] = get_modelscope_pipeline(mcm_variant=variant)
+    #     elif base_model == "AnimateDiff (SD1.5)":
+    #         pipe_dict[base_model][variant] = get_animatediff_pipeline(
+    #             real_variant=None,
+    #             motion_module_path="guoyww/animatediff-motion-adapter-v1-5-2",
+    #             mcm_variant=variant,
+    #         )
+    #     elif base_model == "AnimateDiff (RealisticVision)":
+    #         pipe_dict[base_model][variant] = get_animatediff_pipeline(
+    #             real_variant="realvision",
+    #             motion_module_path="guoyww/animatediff-motion-adapter-v1-5-2",
+    #             mcm_variant=variant,
+    #         )
+    #     elif base_model == "AnimateDiff (epiCRealism)":
+    #         pipe_dict[base_model][variant] = get_animatediff_pipeline(
+    #             real_variant="epicrealism",
+    #             motion_module_path="guoyww/animatediff-motion-adapter-v1-5-2",
+    #             mcm_variant=variant,
+    #         )
+    #     else:
+    #         raise ValueError(f"Unknown base_model {base_model}")
+    if (
+        cache_pipeline["base_model"] == base_model
+        and cache_pipeline["variant"] == variant
+    ):
+        pass
+    else:
+        if base_model == "ModelScope T2V":
+            pipeline = get_modelscope_pipeline(mcm_variant=variant)
+        elif base_model == "AnimateDiff (SD1.5)":
+            pipeline = get_animatediff_pipeline(
+                real_variant=None,
+                motion_module_path="guoyww/animatediff-motion-adapter-v1-5-2",
+                mcm_variant=variant,
+            )
+        elif base_model == "AnimateDiff (RealisticVision)":
+            pipeline = get_animatediff_pipeline(
+                real_variant="realvision",
+                motion_module_path="guoyww/animatediff-motion-adapter-v1-5-2",
+                mcm_variant=variant,
+            )
+        elif base_model == "AnimateDiff (epiCRealism)":
+            pipeline = get_animatediff_pipeline(
+                real_variant="epicrealism",
+                motion_module_path="guoyww/animatediff-motion-adapter-v1-5-2",
+                mcm_variant=variant,
+            )
+        else:
+            raise ValueError(f"Unknown base_model {base_model}")
+        cache_pipeline["base_model"] = base_model
+        cache_pipeline["variant"] = variant
+        cache_pipeline["pipeline"] = pipeline
     if randomize_seed:
         seed = random.randint(0, MAX_SEED)
     generator = torch.Generator().manual_seed(seed)
+    output = cache_pipeline["pipeline"](
+        prompt=prompt,
+        num_frames=16,
+        guidance_scale=1.0,
+        num_inference_steps=num_inference_steps,
+        generator=generator,
+    ).frames
+    if not isinstance(output, list):
+        output = [output[i] for i in range(output.shape[0])]
+    os.makedirs(savedir, exist_ok=True)
+    save_path = os.path.join(
+        savedir, f"sample_{base_model}_{variant}_{seed}.mp4".replace(" ", "_")
+    )
+    export_to_video(
+        output[0],
+        save_path,
+        fps=7,
+    )
+    print(f"Saved to {save_path}")
+    return save_path
 examples = [
+    [
+        "ModelScope T2V",
+        "LAION-aes",
+        "Aerial uhd 4k view. mid-air flight over fresh and clean mountain river at sunny summer morning. Green trees and sun rays on horizon. Direct on sun.",
+    ],
+    ["ModelScope T2V", "Anime", "Timelapse misty mountain landscape"],
+    [
+        "ModelScope T2V",
+        "WebVid",
+        "Back of woman in shorts going near pure creek in beautiful mountains.",
+    ],
+    [
+        "ModelScope T2V",
+        "3D Cartoon",
+        "A rotating pandoro (a traditional italian sweet yeast bread, most popular around christmas and new year) being eaten in time-lapse.",
+    ],
+    [
+        "ModelScope T2V",
+        "Realistic",
+        "Slow motion avocado with a stone falls and breaks into 2 parts with splashes",
+    ],
+    [
+        "AnimateDiff (SD1.5)",
+        "LAION-aes",
+        "Slow motion of delicious salmon sachimi set with green vegetables leaves served on wood plate. make homemade japanese food at home.-dan",
+    ],
+    [
+        "AnimateDiff (SD1.5)",
+        "WebVid",
+        "Blooming meadow panorama zoom-out shot heavenly clouds and upcoming thunderstorm in mountain range harz, germany.",
+    ],
+    [
+        "AnimateDiff (RealisticVision)",
+        "LAION-aes",
+        "A young woman in a yellow sweater uses vr glasses, sitting on the shore of a pond on a background of dark waves. a strong wind develops her hair, the sun's rays are reflected from the water.",
+    ],
+    [
+        "AnimateDiff (epiCRealism)",
+        "LAION-aes",
+        "Female running at sunset. healthy fitness concept",
+    ],
 ]
+css = """
 #col-container {
     margin: 0 auto;
 }
 """
+variants = {
+    "ModelScope T2V": ["WebVid", "LAION-aes", "Anime", "Realistic", "3D Cartoon"],
+    "AnimateDiff (SD1.5)": ["WebVid", "LAION-aes"],
+    "AnimateDiff (RealisticVision)": ["WebVid", "LAION-aes"],
+    "AnimateDiff (epiCRealism)": ["WebVid", "LAION-aes"],
+}
+def update_variant(rs):
+    return gr.update(choices=variants[rs], value=None)
 with gr.Blocks(css=css) as demo:
     with gr.Column(elem_id="col-container"):
+        gr.HTML(
+            """
+        <div style="text-align: center; margin-bottom: 20px;">
+            <h1 align="center">
+              <a href="https://yhzhai.github.io/mcm/"><b>Motion Consistency Model: Accelerating Video Diffusion with Disentangled Motion-Appearance Distillation</b></a>
+            </h1>
+            <h4>Our motion consistency model not only accelerates text2video diffusion model sampling process, but also can benefit from an additional high-quality image dataset to improve the frame quality of generated videos.</h4>
+            <div style="display: flex; justify-content: center; align-items: center; text-align: center;">
+                <a href='https://yhzhai.github.io/mcm/'><img src='https://img.shields.io/badge/Project-Page-Green'></a>
+                <a href='https://arxiv.org/abs/2406.06890'><img src='https://img.shields.io/badge/Paper-arXiv-red'></a>
+                <a href='https://huggingface.co/yhzhai/mcm'><img src='https://img.shields.io/badge/HF-checkpoint-yellow'></a>
+            </div>
+        </div>
+        """
+        )
+        with gr.Row():
+            base_model = gr.Dropdown(
+                label="Base model",
+                choices=[
+                    "ModelScope T2V",
+                    "AnimateDiff (SD1.5)",
+                    "AnimateDiff (RealisticVision)",
+                    "AnimateDiff (epiCRealism)",
+                ],
+                value="ModelScope T2V",
+                interactive=True,
+            )
+            variant_dropdown = gr.Dropdown(
+                variants["ModelScope T2V"],
+                label="MCM Variant",
+                interactive=True,
+                value=None,
+            )
+            base_model.change(
+                update_variant, inputs=[base_model], outputs=[variant_dropdown]
+            )
         with gr.Row():
             prompt = gr.Text(
                 label="Prompt",
                 show_label=False,
                 placeholder="Enter your prompt",
                 container=False,
             )
             run_button = gr.Button("Run", scale=0)
+        with gr.Row():
+            with gr.Column():
+                with gr.Accordion("Advanced Settings", open=True):
+                    seed = gr.Slider(
+                        label="Seed",
+                        minimum=0,
+                        maximum=MAX_SEED,
+                        step=1,
+                        value=0,
+                    )
+                    randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
+                    with gr.Row():
+                        num_inference_steps = gr.Slider(
+                            label="Number of inference steps",
+                            minimum=1,
+                            maximum=16,
+                            step=1,
+                            value=4,
+                        )
+            with gr.Column():
+                # result = gr.Video(label="Result", show_label=False, interactive=False, height=512, width=512, autoplay=True)
+                result = gr.Video(
+                    label="Result", show_label=False, interactive=False, autoplay=True
                 )
         gr.Examples(
+            examples=examples,
+            inputs=[base_model, variant_dropdown, prompt],
+            cache_examples=True,
+            fn=infer,
+            outputs=[result],
         )
     run_button.click(
+        fn=infer,
+        inputs=[
+            base_model,
+            variant_dropdown,
+            prompt,
+            seed,
+            randomize_seed,
+            num_inference_steps,
+        ],
+        outputs=[result],
     )
+demo.queue().launch()

requirements.txt CHANGED Viewed

@@ -1,6 +1,87 @@
-accelerate
-diffusers
-invisible_watermark
-torch
-transformers
-xformers

+# --extra-index-url https://download.pytorch.org/whl/cu118
+# torch==2.1.2
+torchvision==0.16.2
+git+https://github.com/yhZhai/diffusers.git
+transformers==4.36.2
+wandb
+matplotlib
+torchmetrics==1.3.1
+torch-fidelity==0.3.0
+einops
+azure-storage-blob==12.12.0
+tensorboard
+tensorboardX
+ffmpeg-python
+opencv-python
+timm
+ftfy
+rouge_score
+omegaconf
+decord
+colorlog
+deepdish
+configobj
+json_lines
+albumentations
+pudb
+imageio
+imageio-ffmpeg
+pytorch-lightning
+omegaconf
+test-tube
+streamlit
+setuptools
+kornia
+clean-fid
+pytorch-fid
+h5py
+lpips
+tabulate
+ninja
+matplotlib
+webdataset
+braceexpand
+Pillow
+accelerate==0.29.3
+compel==0.1.8
+datasets
+filelock
+flax>=0.4.1
+hf-doc-builder>=0.3.0
+huggingface-hub>=0.20.2
+requests-mock==1.10.0
+importlib_metadata
+invisible-watermark>=0.2.0
+isort>=5.5.4
+jax>=0.4.1
+jaxlib>=0.4.1
+Jinja2
+k-diffusion>=0.0.12
+torchsde
+note_seq
+librosa
+numpy
+parameterized
+git+https://github.com/yhZhai/peft.git
+protobuf==3.20.3
+pytest
+pytest-timeout
+pytest-xdist
+ruff==0.1.5
+safetensors>=0.3.1
+sentencepiece>=0.1.91,!=0.1.92
+GitPython<3.1.19
+# scipy==1.11.1
+onnx
+regex!=2019.12.17
+requests
+bitsandbytes
+git+https://github.com/microsoft/azfuse.git
+deepspeed==0.11.2
+# deepspeed==0.6.6
+albumentations
+mlflow
+moviepy
+git+https://github.com/openai/CLIP.git
+av
+git+https://github.com/yhZhai/open_clip.git