Spaces:
Running
on
Zero
Running
on
Zero
import sys | |
import os | |
import torch | |
import numpy as np | |
from os.path import join as pjoin | |
import utils.paramUtil as paramUtil | |
from utils.plot_script import * | |
from utils.utils import * | |
from utils.motion_process import recover_from_ric | |
from accelerate.utils import set_seed | |
from models.gaussian_diffusion import DiffusePipeline | |
from options.generate_options import GenerateOptions | |
from utils.model_load import load_model_weights | |
from motion_loader import get_dataset_loader | |
from models import build_models | |
import yaml | |
from box import Box | |
def yaml_to_box(yaml_file): | |
with open(yaml_file, "r") as file: | |
yaml_data = yaml.safe_load(file) | |
return Box(yaml_data) | |
if __name__ == "__main__": | |
parser = GenerateOptions() | |
opt = parser.parse() | |
set_seed(opt.seed) | |
device_id = opt.gpu_id | |
device = torch.device("cuda:%d" % device_id if torch.cuda.is_available() else "cpu") | |
opt.device = device | |
assert opt.dataset_name == "t2m" or "kit" | |
# Using a text prompt for generation | |
if opt.text_prompt != "": | |
texts = [opt.text_prompt] | |
opt.num_samples = 1 | |
motion_lens = [opt.motion_length * opt.fps] | |
# Or using texts (in .txt file) for generation | |
elif opt.input_text != "": | |
with open(opt.input_text, "r") as fr: | |
texts = [line.strip() for line in fr.readlines()] | |
opt.num_samples = len(texts) | |
if opt.input_lens != "": | |
with open(opt.input_lens, "r") as fr: | |
motion_lens = [int(line.strip()) for line in fr.readlines()] | |
assert len(texts) == len( | |
motion_lens | |
), f"Please ensure that the motion length in {opt.input_lens} corresponds to the text in {opt.input_text}." | |
else: | |
motion_lens = [opt.motion_length * opt.fps for _ in range(opt.num_samples)] | |
# Or usining texts in dataset | |
else: | |
gen_datasetloader = get_dataset_loader( | |
opt, opt.num_samples, mode="hml_gt", split="test" | |
) | |
texts, _, motion_lens = next(iter(gen_datasetloader)) | |
# edit mode | |
if opt.edit_mode: | |
edit_config = yaml_to_box("options/edit.yaml") | |
else: | |
edit_config = yaml_to_box("options/noedit.yaml") | |
print(edit_config) | |
ckpt_path = pjoin(opt.model_dir, opt.which_ckpt + ".tar") | |
checkpoint = torch.load(ckpt_path,map_location={'cuda:0': str(device)}) | |
niter = checkpoint.get('total_it', 0) | |
# make save dir | |
out_path = opt.output_dir | |
if out_path == "": | |
out_path = pjoin(opt.save_root, "samples_iter{}_seed{}".format(niter, opt.seed)) | |
if opt.text_prompt != "": | |
out_path += "_" + opt.text_prompt.replace(" ", "_").replace(".", "") | |
elif opt.input_text != "": | |
out_path += "_" + os.path.basename(opt.input_text).replace( | |
".txt", "" | |
).replace(" ", "_").replace(".", "") | |
os.makedirs(out_path, exist_ok=True) | |
# load model | |
model = build_models(opt, edit_config=edit_config, out_path=out_path) | |
niter = load_model_weights(model, ckpt_path, use_ema=not opt.no_ema) | |
# Create a pipeline for generation in diffusion model framework | |
pipeline = DiffusePipeline( | |
opt=opt, | |
model=model, | |
diffuser_name=opt.diffuser_name, | |
device=device, | |
num_inference_steps=opt.num_inference_steps, | |
torch_dtype=torch.float16, | |
) | |
# generate | |
pred_motions, _ = pipeline.generate( | |
texts, torch.LongTensor([int(x) for x in motion_lens]) | |
) | |
# Convert the generated motion representaion into 3D joint coordinates and save as npy file | |
npy_dir = pjoin(out_path, "joints_npy") | |
root_dir = pjoin(out_path, "root_npy") | |
os.makedirs(npy_dir, exist_ok=True) | |
os.makedirs(root_dir, exist_ok=True) | |
print(f"saving results npy file (3d joints) to [{npy_dir}]") | |
mean = np.load(pjoin(opt.meta_dir, "mean.npy")) | |
std = np.load(pjoin(opt.meta_dir, "std.npy")) | |
samples = [] | |
root_list = [] | |
for i, motion in enumerate(pred_motions): | |
motion = motion.cpu().numpy() * std + mean | |
np.save(pjoin(npy_dir, f"raw_{i:02}.npy"), motion) | |
npy_name = f"{i:02}.npy" | |
# 1. recover 3d joints representation by ik | |
motion = recover_from_ric(torch.from_numpy(motion).float(), opt.joints_num) | |
# 2. put on Floor (Y axis) | |
floor_height = motion.min(dim=0)[0].min(dim=0)[0][1] | |
motion[:, :, 1] -= floor_height | |
motion = motion.numpy() | |
# 3. remove jitter | |
motion = motion_temporal_filter(motion, sigma=1) | |
# save root trajectory (Y axis) | |
root_trajectory = motion[:, 0, :] | |
root_list.append(root_trajectory) | |
np.save(pjoin(root_dir, f"root_{i:02}.npy"), root_trajectory) | |
y = root_trajectory[:, 1] | |
plt.figure() | |
plt.plot(y) | |
plt.legend() | |
plt.title("Root Joint Trajectory") | |
plt.xlabel("Frame") | |
plt.ylabel("Position") | |
plt.savefig("./root_trajectory_xyz.png") | |
np.save(pjoin(npy_dir, npy_name), motion) | |
samples.append(motion) | |
root_list_res = np.concatenate(root_list, axis=0) | |
np.save("root_list.npy", root_list_res) | |
# save the text and length conditions used for this generation | |
with open(pjoin(out_path, "results.txt"), "w") as fw: | |
fw.write("\n".join(texts)) | |
with open(pjoin(out_path, "results_lens.txt"), "w") as fw: | |
fw.write("\n".join([str(l) for l in motion_lens])) | |
# skeletal animation visualization | |
print(f"saving motion videos to [{out_path}]...") | |
for i, title in enumerate(texts): | |
motion = samples[i] | |
fname = f"{i:02}.mp4" | |
kinematic_tree = ( | |
paramUtil.t2m_kinematic_chain | |
if (opt.dataset_name == "t2m") | |
else paramUtil.kit_kinematic_chain | |
) | |
plot_3d_motion( | |
pjoin(out_path, fname), | |
kinematic_tree, | |
motion, | |
title=title, | |
fps=opt.fps, | |
radius=opt.radius, | |
) | |