pip install diffusers transformers accelerate torch

import torch
from diffusers import DiffusionPipeline, DPMSolverMultistepScheduler
from diffusers.utils import export_to_video

pipe = DiffusionPipeline.from_pretrained("damo-vilab/text-to-video-ms-1.7b", torch_dtype=torch.float16, variant="fp16")
pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
pipe.enable_model_cpu_offload()

prompt = "Spiderman is surfing"
video_frames = pipe(prompt, num_inference_steps=24).frames

Check the shape of the video_frames array

print(video_frames.shape)

Reshape the video_frames array if necessary

if len(video_frames.shape) == 5:
video_frames = video_frames[0]

Export the video

video_path = export_to_video(video_frames)

Verify that the video is successfully exported

print(video_path)

Long Video Generation

pip install git+https://github.com/huggingface/diffusers transformers accelerate

import torch
from diffusers import DiffusionPipeline, DPMSolverMultistepScheduler
from diffusers.utils import export_to_video
import cv2

load pipeline

pipe = DiffusionPipeline.from_pretrained("damo-vilab/text-to-video-ms-1.7b", torch_dtype=torch.float16, variant="fp16")
pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)

optimize for GPU memory

pipe.enable_model_cpu_offload()
pipe.enable_vae_slicing()

generate

prompt = "monkey is eating apple and dancing"
video_frames = pipe(prompt, num_inference_steps=25, num_frames=200).frames

check dimensions

if video_frames.shape != (1, 200, 256, 256, 3):
raise ValueError("Unexpected video_frames shape")

extract dimensions

h, w, _, c = video_frames[0].shape

convert to video

video_path = export_to_video(video_frames[0], output_video_path="output.mp4", fps=24)

print(f"Video saved to: {video_path}")

ali-vilab
/

text-to-video-ms-1.7b

run it in colab without any errors