MimicMotion

Runtime error

File size: 11,991 Bytes

import gradio as gr
import os
import shutil
import yaml
import tempfile
import cv2
import huggingface_hub
import subprocess
import threading
import torch
from subprocess import getoutput

is_shared_ui = False
# is_shared_ui = True if "fffiloni/MimicMotion" in os.environ['SPACE_ID'] else False
available_property = False if is_shared_ui else True

is_gpu_associated = torch.cuda.is_available()
if is_gpu_associated:
    gpu_info = getoutput('nvidia-smi')
    if("A10G" in gpu_info):
        which_gpu = "A10G"
    elif("T4" in gpu_info):
        which_gpu = "T4"
    else:
        which_gpu = "CPU"

def stream_output(pipe):
    for line in iter(pipe.readline, ''):
        print(line, end='')
    pipe.close()

HF_TKN = os.environ.get("GATED_HF_TOKEN")
huggingface_hub.login(token=HF_TKN)

huggingface_hub.hf_hub_download(
    repo_id='yzd-v/DWPose',
    filename='yolox_l.onnx',
    local_dir='./models/DWPose'
)

huggingface_hub.hf_hub_download(
    repo_id='yzd-v/DWPose',
    filename='dw-ll_ucoco_384.onnx',
    local_dir='./models/DWPose'
)

huggingface_hub.hf_hub_download(
    repo_id='ixaac/MimicMotion',
    filename='MimicMotion_1.pth',
    local_dir='./models'
)

def print_directory_contents(path):
    for root, dirs, files in os.walk(path):
        level = root.replace(path, '').count(os.sep)
        indent = ' ' * 4 * (level)
        print(f"{indent}{os.path.basename(root)}/")
        subindent = ' ' * 4 * (level + 1)
        for f in files:
            print(f"{subindent}{f}")

def check_outputs_folder(folder_path):
    # Check if the folder exists
    if os.path.exists(folder_path) and os.path.isdir(folder_path):
        # Delete all contents inside the folder
        for filename in os.listdir(folder_path):
            file_path = os.path.join(folder_path, filename)
            try:
                if os.path.isfile(file_path) or os.path.islink(file_path):
                    os.unlink(file_path)  # Remove file or link
                elif os.path.isdir(file_path):
                    shutil.rmtree(file_path)  # Remove directory
            except Exception as e:
                print(f'Failed to delete {file_path}. Reason: {e}')
    else:
        print(f'The folder {folder_path} does not exist.')

def check_for_mp4_in_outputs():
    # Define the path to the outputs folder
    outputs_folder = './outputs'
    
    # Check if the outputs folder exists
    if not os.path.exists(outputs_folder):
        return None
    
    # Check if there is a .mp4 file in the outputs folder
    mp4_files = [f for f in os.listdir(outputs_folder) if f.endswith('.mp4')]
    
    # Return the path to the mp4 file if it exists
    if mp4_files:
        return os.path.join(outputs_folder, mp4_files[0])
    else:
        return None

def get_video_fps(video_path):
    # Open the video file
    video_capture = cv2.VideoCapture(video_path)
    
    if not video_capture.isOpened():
        raise ValueError("Error opening video file")

    # Get the FPS value
    fps = video_capture.get(cv2.CAP_PROP_FPS)
    
    # Release the video capture object
    video_capture.release()
    
    return fps

def load_examples(ref_image_in, ref_video_in):
    return "./examples/mimicmotion_result1_example.mp4"
    
def infer(ref_image_in, ref_video_in):
    # check if 'outputs' dir exists and empty it if necessary
    check_outputs_folder('./outputs')
    
    # Create a temporary directory
    with tempfile.TemporaryDirectory() as temp_dir:
        print("Temporary directory created:", temp_dir)
    
        # Define the values for the variables
        ref_video_path = ref_video_in
        ref_image_path = ref_image_in
        num_frames = 16
        resolution = 576
        frames_overlap = 6
        num_inference_steps = 25
        noise_aug_strength = 0
        guidance_scale = 2.0
        sample_stride = 2
        fps = 16
        seed = 42
    
        # Create the data structure
        data = {
            'base_model_path': 'stabilityai/stable-video-diffusion-img2vid-xt-1-1',
            'ckpt_path': 'models/MimicMotion_1.pth',
            'test_case': [
                {
                    'ref_video_path': ref_video_path,
                    'ref_image_path': ref_image_path,
                    'num_frames': num_frames,
                    'resolution': resolution,
                    'frames_overlap': frames_overlap,
                    'num_inference_steps': num_inference_steps,
                    'noise_aug_strength': noise_aug_strength,
                    'guidance_scale': guidance_scale,
                    'sample_stride': sample_stride,
                    'fps': fps,
                    'seed': seed
                }
            ]
        }
    
        # Define the file path
        file_path = os.path.join(temp_dir, 'config.yaml')
    
        # Write the data to a YAML file
        with open(file_path, 'w') as file:
            yaml.dump(data, file, default_flow_style=False)
    
        print("YAML file 'config.yaml' created successfully in", file_path)

        # Execute the inference command
        command = ['python', 'inference.py', '--inference_config', file_path]
        process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, bufsize=1)
    
        # Create threads to handle stdout and stderr
        stdout_thread = threading.Thread(target=stream_output, args=(process.stdout,))
        stderr_thread = threading.Thread(target=stream_output, args=(process.stderr,))

    
        # Start the threads
        stdout_thread.start()
        stderr_thread.start()
    
        # Wait for the process to complete and the threads to finish
        process.wait()
        stdout_thread.join()
        stderr_thread.join()
    
        print("Inference script finished with return code:", process.returncode)
        
        # Print the outputs directory contents
        print_directory_contents('./outputs')

    # Call the function and print the result
    mp4_file_path = check_for_mp4_in_outputs()
    print(mp4_file_path)
    
    return mp4_file_path

output_video = gr.Video(label="Output Video")

css = """
div#warning-duplicate {
    background-color: #ebf5ff;
    padding: 0 16px 16px;
    margin: 20px 0;
    color: #030303!important;
}
div#warning-duplicate > .gr-prose > h2, div#warning-duplicate > .gr-prose > p {
    color: #0f4592!important;
}
div#warning-duplicate strong {
    color: #0f4592;
}
p.actions {
    display: flex;
    align-items: center;
    margin: 20px 0;
}
div#warning-duplicate .actions a {
    display: inline-block;
    margin-right: 10px;
}
div#warning-ready {
    background-color: #ecfdf5;
    padding: 0 16px 16px;
    margin: 20px 0;
    color: #030303!important;
}
div#warning-ready > .gr-prose > h2, div#warning-ready > .gr-prose > p {
    color: #057857!important;
}
.custom-color {
    color: #030303 !important;
}
"""
with gr.Blocks(css=css) as demo:
    with gr.Column():
        gr.Markdown("# MimicMotion")
        with gr.Row():
            with gr.Column():
                if is_shared_ui:
                    top_description = gr.HTML(f'''
                        <div class="gr-prose">
                            <h2 class="custom-color"><svg xmlns="http://www.w3.org/2000/svg" width="18px" height="18px" style="margin-right: 0px;display: inline-block;"fill="none"><path fill="#fff" d="M7 13.2a6.3 6.3 0 0 0 4.4-10.7A6.3 6.3 0 0 0 .6 6.9 6.3 6.3 0 0 0 7 13.2Z"/><path fill="#fff" fill-rule="evenodd" d="M7 0a6.9 6.9 0 0 1 4.8 11.8A6.9 6.9 0 0 1 0 7 6.9 6.9 0 0 1 7 0Zm0 0v.7V0ZM0 7h.6H0Zm7 6.8v-.6.6ZM13.7 7h-.6.6ZM9.1 1.7c-.7-.3-1.4-.4-2.2-.4a5.6 5.6 0 0 0-4 1.6 5.6 5.6 0 0 0-1.6 4 5.6 5.6 0 0 0 1.6 4 5.6 5.6 0 0 0 4 1.7 5.6 5.6 0 0 0 4-1.7 5.6 5.6 0 0 0 1.7-4 5.6 5.6 0 0 0-1.7-4c-.5-.5-1.1-.9-1.8-1.2Z" clip-rule="evenodd"/><path fill="#000" fill-rule="evenodd" d="M7 2.9a.8.8 0 1 1 0 1.5A.8.8 0 0 1 7 3ZM5.8 5.7c0-.4.3-.6.6-.6h.7c.3 0 .6.2.6.6v3.7h.5a.6.6 0 0 1 0 1.3H6a.6.6 0 0 1 0-1.3h.4v-3a.6.6 0 0 1-.6-.7Z" clip-rule="evenodd"/></svg>
                            Attention: this Space need to be duplicated to work</h2>
                            <p class="main-message custom-color">
                                To make it work, <strong>duplicate the Space</strong> and run it on your own profile using a <strong>private</strong> GPU (A10G-large recommended).<br />
                                A A10G-large costs <strong>US$1.50/h</strong>.
                            </p>
                            <p class="actions custom-color">
                                <a href="https://huggingface.co/spaces/{os.environ['SPACE_ID']}?duplicate=true">
                                    <img src="https://huggingface.co/datasets/huggingface/badges/resolve/main/duplicate-this-space-lg-dark.svg" alt="Duplicate this Space" />
                                </a>
                                to start experimenting with this demo
                            </p>
                        </div>
                    ''', elem_id="warning-duplicate")
                else:
                    top_description = gr.HTML(f'''
                            <div class="gr-prose">
                                <h2 class="custom-color"><svg xmlns="http://www.w3.org/2000/svg" width="18px" height="18px" style="margin-right: 0px;display: inline-block;"fill="none"><path fill="#fff" d="M7 13.2a6.3 6.3 0 0 0 4.4-10.7A6.3 6.3 0 0 0 .6 6.9 6.3 6.3 0 0 0 7 13.2Z"/><path fill="#fff" fill-rule="evenodd" d="M7 0a6.9 6.9 0 0 1 4.8 11.8A6.9 6.9 0 0 1 0 7 6.9 6.9 0 0 1 7 0Zm0 0v.7V0ZM0 7h.6H0Zm7 6.8v-.6.6ZM13.7 7h-.6.6ZM9.1 1.7c-.7-.3-1.4-.4-2.2-.4a5.6 5.6 0 0 0-4 1.6 5.6 5.6 0 0 0-1.6 4 5.6 5.6 0 0 0 1.6 4 5.6 5.6 0 0 0 4 1.7 5.6 5.6 0 0 0 4-1.7 5.6 5.6 0 0 0 1.7-4 5.6 5.6 0 0 0-1.7-4c-.5-.5-1.1-.9-1.8-1.2Z" clip-rule="evenodd"/><path fill="#000" fill-rule="evenodd" d="M7 2.9a.8.8 0 1 1 0 1.5A.8.8 0 0 1 7 3ZM5.8 5.7c0-.4.3-.6.6-.6h.7c.3 0 .6.2.6.6v3.7h.5a.6.6 0 0 1 0 1.3H6a.6.6 0 0 1 0-1.3h.4v-3a.6.6 0 0 1-.6-.7Z" clip-rule="evenodd"/></svg>
                                You have successfully associated a {which_gpu} GPU to the MimicMotion Space 🎉</h2>
                                <p class="custom-color">
                                    You will be billed by the minute from when you activated the GPU until when it is turned off.
                                </p> 
                            </div>
                    ''', elem_id="warning-ready")
                with gr.Row():
                    ref_image_in = gr.Image(label="Person Image Reference", type="filepath")
                    ref_video_in = gr.Video(label="Person Video Reference")
                    with gr.Accordion("Advanced Settings", open=False):
                        num_inference_steps = gr.Slider(label="num inference steps", minimum=12, maximum=50, value=25, step=1, interactive=available_property)
                        guidance_scale = gr.Slider(label="guidance scale", minimum=0.1, maximum=10, value=2, step=0.1, interactive=available_property)
                        output_frames_per_second = gr.Slider(label="fps", minimum=1, maximum=60, value=16, step=1, interactive=available_property)
                        seed = gr.Number(label="Seed", value=42, interactive=available_property)
                submit_btn = gr.Button("Submit", interactive=available_property)
                gr.Examples(
                    examples = [
                        ["./examples/demo1.jpg", "./examples/preview_1.mp4"]
                    ],
                    fn = load_examples,
                    inputs = [ref_image_in, ref_video_in],
                    outputs = [output_video],
                    run_on_click = True,
                    cache_examples = False
                )
            output_video.render()
    submit_btn.click(
        fn = infer,
        inputs = [ref_image_in, ref_video_in],
        outputs = [output_video]
    )

demo.launch(show_api=False, show_error=False)