wzhouxiff commited on
Commit
83719d4
1 Parent(s): 75aa986

add handcrafted camera poses and add motionctrl+videocrafter2 cmcm

Browse files
Files changed (3) hide show
  1. app.py +213 -480
  2. gradio_utils/camera_utils.py +14 -4
  3. gradio_utils/page_control.py +580 -0
app.py CHANGED
@@ -13,7 +13,7 @@ from omegaconf import OmegaConf
13
  from PIL import Image
14
  from pytorch_lightning import seed_everything
15
 
16
- from gradio_utils.camera_utils import CAMERA_MOTION_MODE, process_camera
17
  from gradio_utils.traj_utils import (OBJECT_MOTION_MODE, get_provided_traj,
18
  process_points, process_traj)
19
  from gradio_utils.utils import vis_camera
@@ -23,17 +23,46 @@ from main.evaluation.motionctrl_inference import (DEFAULT_NEGATIVE_PROMPT,
23
  post_prompt)
24
  from utils.utils import instantiate_from_config
25
 
 
 
 
 
 
 
 
 
 
 
26
  os.environ['KMP_DUPLICATE_LIB_OK']='True'
 
 
 
 
 
27
 
28
 
29
  #### Description ####
30
  title = r"""<h1 align="center">MotionCtrl: A Unified and Flexible Motion Controller for Video Generation</h1>"""
 
 
 
 
 
 
 
 
 
 
 
31
 
32
  description = r"""
33
  <b>Official Gradio demo</b> for <a href='https://github.com/TencentARC/MotionCtrl' target='_blank'><b>MotionCtrl: A Unified and Flexible Motion Controller for Video Generation</b></a>.<br>
34
  🔥 MotionCtrl is capable of independently and flexibly controling the camera motion and object motion of a generated video, with only a unified model.<br>
35
  🤗 Try to control the motion of the generated videos yourself!<br>
36
- ❗❗❗ Please note that current version of **MotionCtrl** is deployed on **LVDM/VideoCrafter**. The versions that depolyed on **AnimateDiff** and **SVD** will be released soon.<br>
 
 
 
37
  """
38
  article = r"""
39
  If MotionCtrl is helpful, please help to ⭐ the <a href='https://github.com/TencentARC/MotionCtrl' target='_blank'>Github Repo</a>. Thanks!
@@ -78,12 +107,12 @@ button {border-radius: 8px !important;}
78
 
79
 
80
  T_base = [
81
- [1.,0.,0.], ## W2C x 的正方向: 相机朝左 left
82
- [-1.,0.,0.], ## W2C x 的负方向: 相机朝右 right
83
- [0., 1., 0.], ## W2C y 的正方向: 相机朝上 up
84
- [0.,-1.,0.], ## W2C y 的负方向: 相机朝下 down
85
- [0.,0.,1.], ## W2C z 的正方向: 相机往前 zoom out
86
- [0.,0.,-1.], ## W2C z 的负方向: 相机往前 zoom in
87
  ]
88
  radius = 1
89
  n = 16
@@ -99,6 +128,7 @@ res = []
99
  res_forsave = []
100
  T_range = 1.8
101
 
 
102
 
103
 
104
  for i in range(0, 16):
@@ -111,34 +141,14 @@ for i in range(0, 16):
111
 
112
  fig = vis_camera(res)
113
 
114
- # MODE = ["camera motion control", "object motion control", "camera + object motion control"]
115
- MODE = ["control camera poses", "control object trajectory", "control both camera and object motion"]
116
- BASE_MODEL = ['LVDM/VideoCrafter', 'AnimateDiff', 'SVD']
117
-
118
 
119
- traj_list = []
120
- camera_dict = {
121
- "motion":[],
122
- "mode": "Customized Mode 1: First A then B", # "First A then B", "Both A and B", "Custom"
123
- "speed": 1.0,
124
- "complex": None
125
- }
126
 
127
- def fn_vis_camera(info_mode):
128
- global camera_dict
129
- RT = process_camera(camera_dict) # [t, 3, 4]
130
- if camera_dict['complex'] is not None:
131
- # rescale T to [-2,2]
132
- for i in range(3):
133
- min_T = np.min(RT[:,i,-1])
134
- max_T = np.max(RT[:,i,-1])
135
- if min_T < -2 or max_T > 2:
136
- RT[:,i,-1] = RT[:,i,-1] - min_T
137
- RT[:,i,-1] = RT[:,i,-1] / (np.max(RT[:,:,-1]) + 1e-6)
138
- RT[:,i,-1] = RT[:,i,-1] * 4
139
- RT[:,i,-1] = RT[:,i,-1] - 2
140
-
141
- fig = vis_camera(RT)
142
 
143
  if info_mode == MODE[0]:
144
  vis_step3_prompt_generate = True
@@ -174,6 +184,7 @@ def fn_vis_camera(info_mode):
174
 
175
  def fn_vis_traj():
176
  global traj_list
 
177
  xy_range = 1024
178
  points = process_points(traj_list)
179
  imgs = []
@@ -194,7 +205,22 @@ def fn_vis_traj():
194
 
195
  # size = (512, 512)
196
  fps = 10
197
- path = tempfile.NamedTemporaryFile(suffix='.mp4', delete=False).name
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
198
  writer = imageio.get_writer(path, format='mp4', mode='I', fps=fps)
199
  for img in imgs:
200
  writer.append_data(img)
@@ -214,114 +240,53 @@ def fn_vis_traj():
214
  gr.update(visible=vis_start), \
215
  gr.update(visible=vis_gen_video, value=None)
216
 
217
- def display_camera_info(camera_dict, camera_mode=None):
218
- if camera_dict['complex'] is not None:
219
- res = f"complex : {camera_dict['complex']}. "
220
- else:
221
- res = ""
222
- res += f"motion : {[_ for _ in camera_dict['motion']]}. "
223
- res += f"speed : {camera_dict['speed']}. "
224
- if camera_mode == CAMERA_MOTION_MODE[2]:
225
- res += f"mode : {camera_dict['mode']}. "
226
- return res
227
-
228
- def add_traj_point(evt: gr.SelectData, ):
229
- global traj_list
230
- traj_list.append(evt.index)
231
- traj_str = [f"{traj}" for traj in traj_list]
232
- return ", ".join(traj_str)
233
 
234
- def add_provided_traj(traj_name):
235
- global traj_list
236
- traj_list = get_provided_traj(traj_name)
237
- traj_str = [f"{traj}" for traj in traj_list]
238
- return ", ".join(traj_str)
239
 
240
- def add_camera_motion(camera_motion, camera_mode):
241
- global camera_dict
242
- if camera_dict['complex'] is not None:
243
- camera_dict['complex'] = None
244
- if camera_mode == CAMERA_MOTION_MODE[2] and len(camera_dict['motion']) <2:
245
- camera_dict['motion'].append(camera_motion)
246
- else:
247
- camera_dict['motion']=[camera_motion]
248
-
249
- return display_camera_info(camera_dict, camera_mode)
250
-
251
- def add_complex_camera_motion(camera_motion):
252
- global camera_dict
253
- camera_dict['complex']=camera_motion
254
- return display_camera_info(camera_dict)
255
-
256
- def change_camera_mode(combine_type, camera_mode):
257
- global camera_dict
258
- camera_dict['mode'] = combine_type
259
-
260
- return display_camera_info(camera_dict, camera_mode)
261
-
262
- def change_camera_speed(camera_speed):
263
- global camera_dict
264
- camera_dict['speed'] = camera_speed
265
- return display_camera_info(camera_dict)
266
-
267
- def reset_camera():
268
- global camera_dict
269
- camera_dict = {
270
- "motion":[],
271
- "mode": "Customized Mode 1: First A then B",
272
- "speed": 1.0,
273
- "complex": None
274
- }
275
- return display_camera_info(camera_dict)
276
-
277
-
278
- def fn_traj_droplast():
279
- global traj_list
280
-
281
- if traj_list:
282
- traj_list.pop()
283
-
284
- if traj_list:
285
- traj_str = [f"{traj}" for traj in traj_list]
286
- return ", ".join(traj_str)
287
- else:
288
- return "Click to specify trajectory"
289
-
290
- def fn_traj_reset():
291
- global traj_list
292
- traj_list = []
293
- return "Click to specify trajectory"
294
 
295
  ###########################################
296
- model_path='./motionctrl.pth?download=true'
 
297
  config_path='./configs/inference/config_both.yaml'
298
  if not os.path.exists(model_path):
299
  os.system(f'wget https://huggingface.co/TencentARC/MotionCtrl/resolve/main/motionctrl.pth?download=true -P .')
300
 
301
  config = OmegaConf.load(config_path)
302
  model_config = config.pop("model", OmegaConf.create())
303
- model = instantiate_from_config(model_config)
304
  if torch.cuda.is_available():
305
- model = model.cuda()
 
 
 
306
 
307
- model = load_model_checkpoint(model, model_path)
308
- model.eval()
 
309
 
 
 
 
 
 
310
 
311
- def model_run(prompts, infer_mode, seed, n_samples):
 
 
312
  global traj_list
313
  global camera_dict
314
 
315
- RT = process_camera(camera_dict).reshape(-1,12)
316
  traj_flow = process_traj(traj_list).transpose(3,0,1,2)
317
- print(prompts)
318
- print(RT.shape)
319
- print(traj_flow.shape)
320
 
321
- noise_shape = [1, 4, 16, 32, 32]
 
 
 
 
 
322
  unconditional_guidance_scale = 7.5
323
  unconditional_guidance_scale_temporal = None
324
- # n_samples = 1
325
  ddim_steps= 50
326
  ddim_eta=1.0
327
  cond_T=800
@@ -415,15 +380,13 @@ def model_run(prompts, infer_mode, seed, n_samples):
415
  batch_variants = torch.stack(batch_variants, dim=1)
416
  batch_variants = batch_variants[0]
417
 
418
- # file_path = save_results(batch_variants, "MotionCtrl", "gradio_temp", fps=10)
419
  file_path = save_results(batch_variants, fps=10)
420
- print(file_path)
421
 
422
  return gr.update(value=file_path, width=256*n_samples, height=256)
423
 
424
- # return file_path
425
 
426
- def save_results(video, fps=10):
427
 
428
  # b,c,t,h,w
429
  video = video.detach().cpu()
@@ -435,7 +398,10 @@ def save_results(video, fps=10):
435
  grid = (grid + 1.0) / 2.0
436
  grid = (grid * 255).to(torch.uint8).permute(0, 2, 3, 1) # [t, h, w*n, 3]
437
 
438
- path = tempfile.NamedTemporaryFile(suffix='.mp4', delete=False).name
 
 
 
439
 
440
  writer = imageio.get_writer(path, format='mp4', mode='I', fps=fps)
441
  for i in range(grid.shape[0]):
@@ -446,357 +412,35 @@ def save_results(video, fps=10):
446
 
447
  return path
448
 
449
- def visualized_step2(infer_mode):
450
-
451
- # reset
452
- reset_camera()
453
- fn_traj_reset()
454
-
455
- # camera motion control
456
- vis_basic_camera_motion = False
457
- vis_basic_camera_motion_des = False
458
- vis_custom_camera_motion = False
459
- vis_custom_run_status = False
460
- vis_complex_camera_motion = False
461
- vis_complex_camera_motion_des = False
462
- vis_U = False
463
- vis_D = False
464
- vis_L = False
465
- vis_R = False
466
- vis_I = False
467
- vis_O = False
468
- vis_ACW = False
469
- vis_CW = False
470
- vis_combine1 = False
471
- vis_combine2 = False
472
- vis_speed = False
473
-
474
- vis_Pose_1, vis_Pose_2, vis_Pose_3, vis_Pose_4 = False, False, False, False
475
- vis_Pose_5, vis_Pose_6, vis_Pose_7, vis_Pose_8 = False, False, False, False
476
-
477
- vis_camera_args = False
478
- vis_camera_reset = False
479
- vis_camera_vis = False
480
- vis_vis_camera = False
481
-
482
- # object motion control
483
- vis_provided_traj = False
484
- vis_provided_traj_des = False
485
- vis_draw_yourself = False
486
- vis_draw_run_status = False
487
-
488
- vis_traj_1, vis_traj_2, vis_traj_3, vis_traj_4 = False, False, False, False
489
- vis_traj_5, vis_traj_6, vis_traj_7, vis_traj_8 = False, False, False, False
490
-
491
- traj_args = False
492
- traj_droplast, traj_reset = False, False
493
- traj_vis = False
494
- traj_input, vis_traj = False, False
495
-
496
-
497
- # generate video
498
- vis_step3_prompt_generate = False
499
- vis_prompt = False
500
- vis_num_samples = False
501
- vis_seed = False
502
- vis_start = False
503
- vis_gen_video = False
504
-
505
- if infer_mode == MODE[0]:
506
- vis_step2_camera_motion = True
507
- vis_step2_camera_motion_des = True
508
- vis_camera_mode = True
509
- vis_camera_info = True
510
-
511
- vis_step2_object_motion = False
512
- vis_step2_object_motion_des = False
513
- vis_traj_mode = False
514
- vis_traj_info = False
515
-
516
- step2_camera_object_motion = False
517
- step2_camera_object_motion_des = False
518
-
519
- elif infer_mode == MODE[1]:
520
- vis_step2_camera_motion = False
521
- vis_step2_camera_motion_des = False
522
- vis_camera_mode = False
523
- vis_camera_info = False
524
-
525
- vis_step2_object_motion = True
526
- vis_step2_object_motion_des = True
527
- vis_traj_mode = True
528
- vis_traj_info = True
529
-
530
- step2_camera_object_motion = False
531
- step2_camera_object_motion_des = False
532
- else: #infer_mode == MODE[2]:
533
- vis_step2_camera_motion = False
534
- vis_step2_camera_motion_des = False
535
- vis_camera_mode = False
536
- vis_camera_info = False
537
-
538
- vis_step2_object_motion = False
539
- vis_step2_object_motion_des = False
540
- vis_traj_mode = False
541
- vis_traj_info = False
542
-
543
- step2_camera_object_motion = True
544
- step2_camera_object_motion_des = True
545
-
546
- vis_basic_camera_motion = True
547
- vis_basic_camera_motion_des = True
548
- vis_U = True
549
- vis_D = True
550
- vis_L = True
551
- vis_R = True
552
- vis_I = True
553
- vis_O = True
554
- vis_ACW = True
555
- vis_CW = True
556
- vis_speed = True
557
-
558
- vis_camera_args = True
559
- vis_camera_reset = True
560
- vis_camera_vis = True
561
- vis_vis_camera = True
562
-
563
-
564
- return gr.update(visible=vis_step2_camera_motion), \
565
- gr.update(visible=vis_step2_camera_motion_des), \
566
- gr.update(visible=vis_camera_mode), \
567
- gr.update(visible=vis_camera_info), \
568
- gr.update(visible=vis_basic_camera_motion), \
569
- gr.update(visible=vis_basic_camera_motion_des), \
570
- gr.update(visible=vis_custom_camera_motion), \
571
- gr.update(visible=vis_custom_run_status), \
572
- gr.update(visible=vis_complex_camera_motion), \
573
- gr.update(visible=vis_complex_camera_motion_des), \
574
- gr.update(visible=vis_U), gr.update(visible=vis_D), gr.update(visible=vis_L), gr.update(visible=vis_R), \
575
- gr.update(visible=vis_I), gr.update(visible=vis_O), gr.update(visible=vis_ACW), gr.update(visible=vis_CW), \
576
- gr.update(visible=vis_combine1), gr.update(visible=vis_combine2), \
577
- gr.update(visible=vis_speed), \
578
- gr.update(visible=vis_Pose_1), gr.update(visible=vis_Pose_2), gr.update(visible=vis_Pose_3), gr.update(visible=vis_Pose_4), \
579
- gr.update(visible=vis_Pose_5), gr.update(visible=vis_Pose_6), gr.update(visible=vis_Pose_7), gr.update(visible=vis_Pose_8), \
580
- gr.update(visible=vis_camera_args, value=None), \
581
- gr.update(visible=vis_camera_reset), gr.update(visible=vis_camera_vis), \
582
- gr.update(visible=vis_vis_camera, value=None), \
583
- gr.update(visible=vis_step2_object_motion), \
584
- gr.update(visible=vis_step2_object_motion_des), \
585
- gr.update(visible=vis_traj_mode), \
586
- gr.update(visible=vis_traj_info), \
587
- gr.update(visible=vis_provided_traj), \
588
- gr.update(visible=vis_provided_traj_des), \
589
- gr.update(visible=vis_draw_yourself), \
590
- gr.update(visible=vis_draw_run_status), \
591
- gr.update(visible=vis_traj_1), gr.update(visible=vis_traj_2), gr.update(visible=vis_traj_3), gr.update(visible=vis_traj_4), \
592
- gr.update(visible=vis_traj_5), gr.update(visible=vis_traj_6), gr.update(visible=vis_traj_7), gr.update(visible=vis_traj_8), \
593
- gr.update(visible=traj_args), \
594
- gr.update(visible=traj_droplast), gr.update(visible=traj_reset), \
595
- gr.update(visible=traj_vis), \
596
- gr.update(visible=traj_input), gr.update(visible=vis_traj, value=None), \
597
- gr.update(visible=step2_camera_object_motion), \
598
- gr.update(visible=step2_camera_object_motion_des), \
599
- gr.update(visible=vis_step3_prompt_generate), \
600
- gr.update(visible=vis_prompt), \
601
- gr.update(visible=vis_num_samples), \
602
- gr.update(visible=vis_seed), \
603
- gr.update(visible=vis_start), \
604
- gr.update(visible=vis_gen_video)
605
-
606
- def visualized_camera_poses(step2_camera_motion):
607
- reset_camera()
608
-
609
- # generate video
610
- vis_step3_prompt_generate = False
611
- vis_prompt = False
612
- vis_num_samples = False
613
- vis_seed = False
614
- vis_start = False
615
- vis_gen_video = False
616
-
617
- if step2_camera_motion == CAMERA_MOTION_MODE[0]:
618
- vis_basic_camera_motion = True
619
- vis_basic_camera_motion_des = True
620
- vis_custom_camera_motion = False
621
- vis_custom_run_status = False
622
- vis_complex_camera_motion = False
623
- vis_complex_camera_motion_des = False
624
- vis_U = True
625
- vis_D = True
626
- vis_L = True
627
- vis_R = True
628
- vis_I = True
629
- vis_O = True
630
- vis_ACW = True
631
- vis_CW = True
632
- vis_combine1 = False
633
- vis_combine2 = False
634
- vis_speed = True
635
-
636
- vis_Pose_1, vis_Pose_2, vis_Pose_3, vis_Pose_4 = False, False, False, False
637
- vis_Pose_5, vis_Pose_6, vis_Pose_7, vis_Pose_8 = False, False, False, False
638
-
639
- elif step2_camera_motion == CAMERA_MOTION_MODE[1]:
640
- vis_basic_camera_motion = False
641
- vis_basic_camera_motion_des = False
642
- vis_custom_camera_motion = False
643
- vis_custom_run_status = False
644
- vis_complex_camera_motion = True
645
- vis_complex_camera_motion_des = True
646
- vis_U = False
647
- vis_D = False
648
- vis_L = False
649
- vis_R = False
650
- vis_I = False
651
- vis_O = False
652
- vis_ACW = False
653
- vis_CW = False
654
- vis_combine1 = False
655
- vis_combine2 = False
656
- vis_speed = False
657
-
658
- vis_Pose_1, vis_Pose_2, vis_Pose_3, vis_Pose_4 = True, True, True, True
659
- vis_Pose_5, vis_Pose_6, vis_Pose_7, vis_Pose_8 = True, True, True, True
660
-
661
- else: # step2_camera_motion = CAMERA_MOTION_MODE[2]:
662
- vis_basic_camera_motion = False
663
- vis_basic_camera_motion_des = False
664
- vis_custom_camera_motion = True
665
- vis_custom_run_status = True
666
- vis_complex_camera_motion = False
667
- vis_complex_camera_motion_des = False
668
- vis_U = True
669
- vis_D = True
670
- vis_L = True
671
- vis_R = True
672
- vis_I = True
673
- vis_O = True
674
- vis_ACW = True
675
- vis_CW = True
676
- vis_combine1 = True
677
- vis_combine2 = True
678
- vis_speed = True
679
-
680
- vis_Pose_1, vis_Pose_2, vis_Pose_3, vis_Pose_4 = False, False, False, False
681
- vis_Pose_5, vis_Pose_6, vis_Pose_7, vis_Pose_8 = False, False, False, False
682
-
683
- vis_camera_args = True
684
- vis_camera_reset = True
685
- vis_camera_vis = True
686
- vis_vis_camera = True
687
-
688
- return gr.update(visible=vis_basic_camera_motion), \
689
- gr.update(visible=vis_basic_camera_motion_des), \
690
- gr.update(visible=vis_custom_camera_motion), \
691
- gr.update(visible=vis_custom_run_status), \
692
- gr.update(visible=vis_complex_camera_motion), \
693
- gr.update(visible=vis_complex_camera_motion_des), \
694
- gr.update(visible=vis_U), gr.update(visible=vis_D), gr.update(visible=vis_L), gr.update(visible=vis_R), \
695
- gr.update(visible=vis_I), gr.update(visible=vis_O), gr.update(visible=vis_ACW), gr.update(visible=vis_CW), \
696
- gr.update(visible=vis_combine1), gr.update(visible=vis_combine2), \
697
- gr.update(visible=vis_speed), \
698
- gr.update(visible=vis_Pose_1), gr.update(visible=vis_Pose_2), gr.update(visible=vis_Pose_3), gr.update(visible=vis_Pose_4), \
699
- gr.update(visible=vis_Pose_5), gr.update(visible=vis_Pose_6), gr.update(visible=vis_Pose_7), gr.update(visible=vis_Pose_8), \
700
- gr.update(visible=vis_camera_args, value=None), \
701
- gr.update(visible=vis_camera_reset), gr.update(visible=vis_camera_vis), \
702
- gr.update(visible=vis_vis_camera, value=None), \
703
- gr.update(visible=vis_step3_prompt_generate), \
704
- gr.update(visible=vis_prompt), \
705
- gr.update(visible=vis_num_samples), \
706
- gr.update(visible=vis_seed), \
707
- gr.update(visible=vis_start), \
708
- gr.update(visible=vis_gen_video)
709
-
710
- def visualized_traj_poses(step2_object_motion):
711
-
712
- fn_traj_reset()
713
-
714
- # generate video
715
- vis_step3_prompt_generate = False
716
- vis_prompt = False
717
- vis_num_samples = False
718
- vis_seed = False
719
- vis_start = False
720
- vis_gen_video = False
721
-
722
- if step2_object_motion == "Provided Trajectory":
723
- vis_provided_traj = True
724
- vis_provided_traj_des = True
725
- vis_draw_yourself = False
726
- vis_draw_run_status = False
727
-
728
- vis_traj_1, vis_traj_2, vis_traj_3, vis_traj_4 = True, True, True, True
729
- vis_traj_5, vis_traj_6, vis_traj_7, vis_traj_8 = True, True, True, True
730
-
731
- traj_args = True
732
- traj_droplast, traj_reset = False, True
733
- traj_vis = True
734
- traj_input, vis_traj = False, True
735
-
736
-
737
- elif step2_object_motion == "Custom Trajectory":
738
- vis_provided_traj = False
739
- vis_provided_traj_des = False
740
- vis_draw_yourself = True
741
- vis_draw_run_status = True
742
-
743
- vis_traj_1, vis_traj_2, vis_traj_3, vis_traj_4 = False, False, False, False
744
- vis_traj_5, vis_traj_6, vis_traj_7, vis_traj_8 = False, False, False, False
745
-
746
- traj_args = True
747
- traj_droplast, traj_reset = True, True
748
- traj_vis = True
749
- traj_input, vis_traj = True, True
750
-
751
- return gr.update(visible=vis_provided_traj), \
752
- gr.update(visible=vis_provided_traj_des), \
753
- gr.update(visible=vis_draw_yourself), \
754
- gr.update(visible=vis_draw_run_status), \
755
- gr.update(visible=vis_traj_1), gr.update(visible=vis_traj_2), gr.update(visible=vis_traj_3), gr.update(visible=vis_traj_4), \
756
- gr.update(visible=vis_traj_5), gr.update(visible=vis_traj_6), gr.update(visible=vis_traj_7), gr.update(visible=vis_traj_8), \
757
- gr.update(visible=traj_args), \
758
- gr.update(visible=traj_droplast), gr.update(visible=traj_reset), \
759
- gr.update(visible=traj_vis), \
760
- gr.update(visible=traj_input), gr.update(visible=vis_traj, value=None), \
761
- gr.update(visible=vis_step3_prompt_generate), \
762
- gr.update(visible=vis_prompt), \
763
- gr.update(visible=vis_num_samples), \
764
- gr.update(visible=vis_seed), \
765
- gr.update(visible=vis_start), \
766
- gr.update(visible=vis_gen_video)
767
 
768
  def main(args):
769
  demo = gr.Blocks()
770
  with demo:
771
 
772
  gr.Markdown(title)
 
773
  gr.Markdown(description)
774
 
775
- # state = gr.State({
776
- # "mode": "camera_only",
777
- # "camera_input": [],
778
- # "traj_input": [],
779
- # })
780
 
781
  with gr.Column():
782
- '''
783
  # step 0: select based model.
784
  gr.Markdown("## Step0: Selecting the model", show_label=False)
785
  gr.Markdown( f'- {BASE_MODEL[0]}: **MotionCtrl** deployed on {BASE_MODEL[0]}', show_label=False)
786
  gr.Markdown( f'- {BASE_MODEL[1]}: **MotionCtrl** deployed on {BASE_MODEL[1]}', show_label=False)
787
- gr.Markdown( f'- {BASE_MODEL[2]}: **MotionCtrl** deployed on {BASE_MODEL[2]}', show_label=False)
788
- gr.Markdown( f'- **Only the model that deployed on {BASE_MODEL[0]} is avalible now. MotionCtrl models deployed on {BASE_MODEL[1]} and {BASE_MODEL[2]} are coming soon.**', show_label=False)
789
- gr.Radio(choices=BASE_MODEL, value=BASE_MODEL[0], label="Based Model", interactive=False)
790
- '''
791
 
792
  # step 1: select motion control mode
793
- gr.Markdown("## Step 1/3: Selecting the motion control mode", show_label=False)
794
- gr.Markdown( f'- {MODE[0]}: Control the camera motion only', show_label=False)
795
- gr.Markdown( f'- {MODE[1]}: Control the object motion only', show_label=False)
796
- gr.Markdown( f'- {MODE[2]}: Control both the camera and object motion', show_label=False)
797
- gr.Markdown( f'- Click `Proceed` to go into next step', show_label=False)
798
- infer_mode = gr.Radio(choices=MODE, value=MODE[0], label="Motion Control Mode", interactive=True)
799
- mode_info = gr.Button(value="Proceed")
 
800
 
801
  # step2 - camera + object motion control
802
  step2_camera_object_motion = gr.Markdown("---\n## Step 2/3: Select the camera poses and trajectory", show_label=False, visible=False)
@@ -834,18 +478,40 @@ def main(args):
834
 
835
  # step2.3 - camera motion control - custom
836
  custom_camera_motion = gr.Markdown(f"---\n### {CAMERA_MOTION_MODE[2]}", show_label=False, visible=False)
837
- custom_run_status = gr.Markdown(f"\n 1. Click two of the basic camera poses, such as `Pan Up` and `Pan Left`; \
838
- \n 2. Click `Customized Mode 1: First A then B` or `Customized Mode 1: First A then B` \
839
- \n - `Customized Mode 1: First A then B`: The camera first `Pan Up` and then `Pan Left`; \
840
- \n - `Customized Mode 2: Both A and B`: The camera move towards the upper left corner; \
841
- \n 3. Slide the `Motion speed` to get a speed value. The large the value, the fast the camera motion; \
842
- \n 4. Click `Visualize Camera and Proceed` to visualize the camera poses and go proceed; \
843
- \n 5. Click `Reset Camera` to reset the camera poses (If needed). ",
 
 
 
 
 
 
 
 
844
  show_label=False, visible=False)
845
 
 
 
 
846
  gr.HighlightedText(value=[("",""), ("1. Select two of the basic camera poses; 2. Select Customized Mode 1 OR Customized Mode 2. 3. Visualized Camera to show the customized camera poses", "Normal")],
847
  color_map={"Normal": "green", "Error": "red", "Clear clicks": "gray", "Add mask": "green", "Remove mask": "red"}, visible=False)
848
 
 
 
 
 
 
 
 
 
 
 
 
849
  with gr.Row():
850
  U = gr.Button(value="Pan Up", visible=False)
851
  D = gr.Button(value="Pan Down", visible=False)
@@ -857,9 +523,9 @@ def main(args):
857
  ACW = gr.Button(value="ACW", visible=False)
858
  CW = gr.Button(value="CW", visible=False)
859
 
860
- with gr.Row():
861
- combine1 = gr.Button(value="Customized Mode 1: First A then B", visible=False)
862
- combine2 = gr.Button(value="Customized Mode 2: Both A and B", visible=False)
863
 
864
  with gr.Row():
865
  speed = gr.Slider(minimum=0, maximum=2, step=0.2, label="Motion Speed", value=1.0, visible=False)
@@ -941,12 +607,62 @@ def main(args):
941
  with gr.Column():
942
  step3_prompt_generate = gr.Markdown("---\n## Step 3/3: Add prompt and Generate videos", show_label=False, visible=False)
943
  prompt = gr.Textbox(value="a dog sitting on grass", label="Prompt", interactive=True, visible=False)
944
- n_samples = gr.Number(value=3, precision=0, interactive=True, label="n_samples", visible=False)
945
  seed = gr.Number(value=1234, precision=0, interactive=True, label="Seed", visible=False)
946
  start = gr.Button(value="Start generation !", visible=False)
947
  with gr.Column():
948
  gen_video = gr.Video(value=None, label="Generate Video", visible=False)
949
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
950
  mode_info.click(
951
  fn=visualized_step2,
952
  inputs=[infer_mode],
@@ -963,7 +679,7 @@ def main(args):
963
  complex_camera_motion_des,
964
  U, D, L, R,
965
  I, O, ACW, CW,
966
- combine1, combine2,
967
  speed,
968
  Pose_1, Pose_2, Pose_3, Pose_4,
969
  Pose_5, Pose_6, Pose_7, Pose_8,
@@ -1006,7 +722,7 @@ def main(args):
1006
  complex_camera_motion_des,
1007
  U, D, L, R,
1008
  I, O, ACW, CW,
1009
- combine1, combine2,
1010
  speed,
1011
  Pose_1, Pose_2, Pose_3, Pose_4,
1012
  Pose_5, Pose_6, Pose_7, Pose_8,
@@ -1044,10 +760,27 @@ def main(args):
1044
  speed.change(fn=change_camera_speed, inputs=speed, outputs=camera_args)
1045
  camera_reset.click(fn=reset_camera, inputs=None, outputs=[camera_args])
1046
 
1047
- combine1.click(fn=change_camera_mode, inputs=[combine1, camera_mode], outputs=camera_args)
1048
- combine2.click(fn=change_camera_mode, inputs=[combine2, camera_mode], outputs=camera_args)
1049
-
1050
- camera_vis.click(fn=fn_vis_camera, inputs=[infer_mode], outputs=[vis_camera, object_mode, object_info, step3_prompt_generate, prompt, n_samples, seed, start, gen_video])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1051
 
1052
  Pose_1.click(fn=add_complex_camera_motion, inputs=Pose_1, outputs=camera_args)
1053
  Pose_2.click(fn=add_complex_camera_motion, inputs=Pose_2, outputs=camera_args)
@@ -1073,7 +806,7 @@ def main(args):
1073
  traj_reset.click(fn=fn_traj_reset, inputs=None, outputs=traj_args)
1074
 
1075
 
1076
- start.click(fn=model_run, inputs=[prompt, infer_mode, seed, n_samples], outputs=gen_video)
1077
 
1078
  gr.Markdown(article)
1079
 
 
13
  from PIL import Image
14
  from pytorch_lightning import seed_everything
15
 
16
+ from gradio_utils.camera_utils import CAMERA_MOTION_MODE, process_camera, create_relative
17
  from gradio_utils.traj_utils import (OBJECT_MOTION_MODE, get_provided_traj,
18
  process_points, process_traj)
19
  from gradio_utils.utils import vis_camera
 
23
  post_prompt)
24
  from utils.utils import instantiate_from_config
25
 
26
+ from gradio_utils.page_control import (MODE, BASE_MODEL, traj_list, camera_dict,
27
+ reset_camera,
28
+ visualized_step1, visualized_step2,
29
+ visualized_camera_poses, visualized_traj_poses,
30
+ add_camera_motion, add_complex_camera_motion,
31
+ input_raw_camera_pose,
32
+ change_camera_mode, change_camera_speed,
33
+ add_traj_point, add_provided_traj,
34
+ fn_traj_droplast, fn_traj_reset)
35
+
36
  os.environ['KMP_DUPLICATE_LIB_OK']='True'
37
+ SPACE_ID = os.environ.get('SPACE_ID', '')
38
+
39
+ DIY_MODE = ['Customized Mode 1: First A then B',
40
+ 'Customized Mode 2: Both A and B',
41
+ 'Customized Mode 3: RAW Camera Poses']
42
 
43
 
44
  #### Description ####
45
  title = r"""<h1 align="center">MotionCtrl: A Unified and Flexible Motion Controller for Video Generation</h1>"""
46
+ # subtitle = r"""<h2 align="center">Deployed on SVD Generation</h2>"""
47
+ important_link = r"""
48
+ <div align='center'>
49
+ <a href='https://huggingface.co/spaces/TencentARC/MotionCtrl_SVD'>[Demo MotionCtrl + SVD]</a>
50
+ &ensp; <a href='https://wzhouxiff.github.io/projects/MotionCtrl/assets/paper/MotionCtrl.pdf'>[Paper]</a>
51
+ &ensp; <a href='https://wzhouxiff.github.io/projects/MotionCtrl/'>[Project Page]</a>
52
+ &ensp; <a href='https://github.com/TencentARC/MotionCtrl'>[Code]</a>
53
+ &ensp; <a href='https://github.com/TencentARC/MotionCtrl/blob/svd/doc/showcase_svd.md'>[Showcases]</a>
54
+ &ensp; <a href='https://github.com/TencentARC/MotionCtrl/blob/svd/doc/tutorial.md'>[Tutorial]</a>
55
+ </div>
56
+ """
57
 
58
  description = r"""
59
  <b>Official Gradio demo</b> for <a href='https://github.com/TencentARC/MotionCtrl' target='_blank'><b>MotionCtrl: A Unified and Flexible Motion Controller for Video Generation</b></a>.<br>
60
  🔥 MotionCtrl is capable of independently and flexibly controling the camera motion and object motion of a generated video, with only a unified model.<br>
61
  🤗 Try to control the motion of the generated videos yourself!<br>
62
+ ❗❗❗ This demo provides model of **MotionCtrl** deployed on **LVDM/VideoCrafter** and **VideoCrafte2**.
63
+ Deployments in **LVDM/VideoCrafter** include both Camera and Object Motion Control,
64
+ while deployments in **VideoCrafte2** only include Camera Motion Control.
65
+ <br>
66
  """
67
  article = r"""
68
  If MotionCtrl is helpful, please help to ⭐ the <a href='https://github.com/TencentARC/MotionCtrl' target='_blank'>Github Repo</a>. Thanks!
 
107
 
108
 
109
  T_base = [
110
+ [1.,0.,0.], ## W2C left
111
+ [-1.,0.,0.], ## W2C right
112
+ [0., 1., 0.], ## W2C up
113
+ [0.,-1.,0.], ## W2C down
114
+ [0.,0.,1.], ## W2C zoom out
115
+ [0.,0.,-1.], ## W2C zoom in
116
  ]
117
  radius = 1
118
  n = 16
 
128
  res_forsave = []
129
  T_range = 1.8
130
 
131
+ exp_no = 0
132
 
133
 
134
  for i in range(0, 16):
 
141
 
142
  fig = vis_camera(res)
143
 
144
+ def fn_vis_camera(info_mode, camera_args=None):
145
+ global camera_dict
146
+ RT = process_camera(camera_dict, camera_args) # [t, 3, 4]
 
147
 
148
+ rescale_T = 1.0
149
+ rescale_T = max(rescale_T, np.max(np.abs(RT[:,:,-1])) / 1.9)
 
 
 
 
 
150
 
151
+ fig = vis_camera(create_relative(RT), rescale_T=rescale_T)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
152
 
153
  if info_mode == MODE[0]:
154
  vis_step3_prompt_generate = True
 
184
 
185
  def fn_vis_traj():
186
  global traj_list
187
+ global exp_no
188
  xy_range = 1024
189
  points = process_points(traj_list)
190
  imgs = []
 
205
 
206
  # size = (512, 512)
207
  fps = 10
208
+
209
+ out_dir = f'./results_trajs/{exp_no}'
210
+ os.makedirs(out_dir, exist_ok=True)
211
+ exp_no += 1
212
+
213
+ traj_flow = process_traj(traj_list).transpose(3,0,1,2)
214
+
215
+ np.save(f'{out_dir}/traj_flow.npy', traj_flow)
216
+ with open(f'{out_dir}/traj_list.txt', 'w') as f:
217
+ for item in traj_list:
218
+ f.write(f"{item[0]}, {item[1]}\n")
219
+
220
+ if out_dir is None:
221
+ path = tempfile.NamedTemporaryFile(suffix='.mp4', delete=False).name
222
+ else:
223
+ path = os.path.join(out_dir, 'traj.mp4')
224
  writer = imageio.get_writer(path, format='mp4', mode='I', fps=fps)
225
  for img in imgs:
226
  writer.append_data(img)
 
240
  gr.update(visible=vis_start), \
241
  gr.update(visible=vis_gen_video, value=None)
242
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
243
 
 
 
 
 
 
244
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
245
 
246
  ###########################################
247
+
248
+ model_path='./checkpoints/motionctrl.pth'
249
  config_path='./configs/inference/config_both.yaml'
250
  if not os.path.exists(model_path):
251
  os.system(f'wget https://huggingface.co/TencentARC/MotionCtrl/resolve/main/motionctrl.pth?download=true -P .')
252
 
253
  config = OmegaConf.load(config_path)
254
  model_config = config.pop("model", OmegaConf.create())
255
+ model_v1 = instantiate_from_config(model_config)
256
  if torch.cuda.is_available():
257
+ model_v1 = model_v1.cuda()
258
+
259
+ model_v1 = load_model_checkpoint(model_v1, model_path)
260
+ model_v1.eval()
261
 
262
+ v2_model_path = './checkpoints/videocrafter2_motionctrl_cmcm.ckpt'
263
+ if not os.path.exists(v2_model_path):
264
+ os.system(f'wget https://huggingface.co/TencentARC/MotionCtrl/resolve/main/videocrafter2_motionctrl_cmcm.ckpt?download=true -P .')
265
 
266
+ model_v2 = instantiate_from_config(model_config)
267
+ model_v2 = load_model_checkpoint(model_v2, v2_model_path)
268
+
269
+ if torch.cuda.is_available():
270
+ model_v2 = model_v2.cuda()
271
 
272
+ model_v2.eval()
273
+
274
+ def model_run(prompts, choose_model, infer_mode, seed, n_samples, camera_args=None):
275
  global traj_list
276
  global camera_dict
277
 
278
+ RT = process_camera(camera_dict, camera_args).reshape(-1,12)
279
  traj_flow = process_traj(traj_list).transpose(3,0,1,2)
 
 
 
280
 
281
+ if choose_model == BASE_MODEL[0]:
282
+ model = model_v1
283
+ noise_shape = [1, 4, 16, 32, 32]
284
+ else:
285
+ model = model_v2
286
+ noise_shape = [1, 4, 16, 40, 64]
287
  unconditional_guidance_scale = 7.5
288
  unconditional_guidance_scale_temporal = None
289
+
290
  ddim_steps= 50
291
  ddim_eta=1.0
292
  cond_T=800
 
380
  batch_variants = torch.stack(batch_variants, dim=1)
381
  batch_variants = batch_variants[0]
382
 
 
383
  file_path = save_results(batch_variants, fps=10)
 
384
 
385
  return gr.update(value=file_path, width=256*n_samples, height=256)
386
 
387
+ # return
388
 
389
+ def save_results(video, fps=10, out_dir=None):
390
 
391
  # b,c,t,h,w
392
  video = video.detach().cpu()
 
398
  grid = (grid + 1.0) / 2.0
399
  grid = (grid * 255).to(torch.uint8).permute(0, 2, 3, 1) # [t, h, w*n, 3]
400
 
401
+ if out_dir is None:
402
+ path = tempfile.NamedTemporaryFile(suffix='.mp4', delete=False).name
403
+ else:
404
+ path = os.path.join(out_dir, 'motionctrl.mp4')
405
 
406
  writer = imageio.get_writer(path, format='mp4', mode='I', fps=fps)
407
  for i in range(grid.shape[0]):
 
412
 
413
  return path
414
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
415
 
416
  def main(args):
417
  demo = gr.Blocks()
418
  with demo:
419
 
420
  gr.Markdown(title)
421
+ gr.Markdown(important_link)
422
  gr.Markdown(description)
423
 
 
 
 
 
 
424
 
425
  with gr.Column():
 
426
  # step 0: select based model.
427
  gr.Markdown("## Step0: Selecting the model", show_label=False)
428
  gr.Markdown( f'- {BASE_MODEL[0]}: **MotionCtrl** deployed on {BASE_MODEL[0]}', show_label=False)
429
  gr.Markdown( f'- {BASE_MODEL[1]}: **MotionCtrl** deployed on {BASE_MODEL[1]}', show_label=False)
430
+ # gr.HighlightedText(value=[("",""), (f'Choosing {BASE_MODEL[1]} requires time for loading new model. Please be patient.', "Normal")],
431
+ # color_map={"Normal": "green", "Error": "red", "Clear clicks": "gray", "Add mask": "green", "Remove mask": "red"}, visible=True)
432
+ choose_model = gr.Radio(choices=BASE_MODEL, value=BASE_MODEL[0], label="Based Model", interactive=True)
433
+ choose_model_button = gr.Button(value="Proceed")
434
 
435
  # step 1: select motion control mode
436
+ step1 = gr.Markdown("## Step 1/3: Selecting the motion control mode", show_label=False, visible=False)
437
+ setp1_dec = gr.Markdown( f'\n - {MODE[0]}: Control the camera motion only \
438
+ \n- {MODE[1]}: Control the object motion only \
439
+ \n- {MODE[2]}: Control both the camera and object motion \
440
+ \n- Click `Proceed` to go into next step',
441
+ show_label=False, visible=False)
442
+ infer_mode = gr.Radio(choices=MODE, value=MODE[0], label="Motion Control Mode", interactive=True, visible=False)
443
+ mode_info = gr.Button(value="Proceed", visible=False)
444
 
445
  # step2 - camera + object motion control
446
  step2_camera_object_motion = gr.Markdown("---\n## Step 2/3: Select the camera poses and trajectory", show_label=False, visible=False)
 
478
 
479
  # step2.3 - camera motion control - custom
480
  custom_camera_motion = gr.Markdown(f"---\n### {CAMERA_MOTION_MODE[2]}", show_label=False, visible=False)
481
+ # custom_run_status = gr.Markdown(f"\n 1. Click two of the basic camera poses, such as `Pan Up` and `Pan Left`; \
482
+ # \n 2. Click `Customized Mode 1: First A then B` or `Customized Mode 1: First A then B` \
483
+ # \n - `Customized Mode 1: First A then B`: The camera first `Pan Up` and then `Pan Left`; \
484
+ # \n - `Customized Mode 2: Both A and B`: The camera move towards the upper left corner; \
485
+ # \n 3. Slide the `Motion speed` to get a speed value. The large the value, the fast the camera motion; \
486
+ # \n 4. Click `Visualize Camera and Proceed` to visualize the camera poses and go proceed; \
487
+ # \n 5. Click `Reset Camera` to reset the camera poses (If needed). ",
488
+ # show_label=False, visible=False)
489
+ custom_run_status = gr.Markdown(f"\n 1. Click `{DIY_MODE[0]}`, `{DIY_MODE[1]}`, or `{DIY_MODE[2]}` \
490
+ \n - `Customized Mode 1: First A then B`: For example, click `Pan Up` and `Pan Left`, the camera will first `Pan Up` and then `Pan Left`; \
491
+ \n - `Customized Mode 2: Both A and B`: For example, click `Pan Up` and `Pan Left`, the camera will move towards the upper left corner; \
492
+ \n - `{DIY_MODE[2]}`: Input the RAW RT matrix yourselves. \
493
+ \n 2. Slide the `Motion speed` to get a speed value. The large the value, the fast the camera motion; \
494
+ \n 3. Click `Visualize Camera and Proceed` to visualize the camera poses and go proceed; \
495
+ \n 4. Click `Reset Camera` to reset the camera poses (If needed). ",
496
  show_label=False, visible=False)
497
 
498
+ # gr.HighlightedText(value=[("",""), ("1. Select two of the basic camera poses; 2. Select Customized Mode 1 OR Customized Mode 2. 3. Visualized Camera to show the customized camera poses", "Normal")],
499
+ # color_map={"Normal": "green", "Error": "red", "Clear clicks": "gray", "Add mask": "green", "Remove mask": "red"}, visible=False)
500
+
501
  gr.HighlightedText(value=[("",""), ("1. Select two of the basic camera poses; 2. Select Customized Mode 1 OR Customized Mode 2. 3. Visualized Camera to show the customized camera poses", "Normal")],
502
  color_map={"Normal": "green", "Error": "red", "Clear clicks": "gray", "Add mask": "green", "Remove mask": "red"}, visible=False)
503
 
504
+ with gr.Row():
505
+ combine1 = gr.Button(value=DIY_MODE[0], visible=False)
506
+ combine2 = gr.Button(value=DIY_MODE[1], visible=False)
507
+ combine3 = gr.Button(value=DIY_MODE[2], visible=False)
508
+ with gr.Row():
509
+ combine3_des = gr.Markdown(f"---\n#### Input your camera pose in the following textbox. \
510
+ A total of 14 lines and each line contains 12 float number, indicated \
511
+ the RT matrix in the shape of 1x12. \
512
+ The example is RT matrix of ZOOM IN.", show_label=False, visible=False)
513
+
514
+
515
  with gr.Row():
516
  U = gr.Button(value="Pan Up", visible=False)
517
  D = gr.Button(value="Pan Down", visible=False)
 
523
  ACW = gr.Button(value="ACW", visible=False)
524
  CW = gr.Button(value="CW", visible=False)
525
 
526
+ # with gr.Row():
527
+ # combine1 = gr.Button(value="Customized Mode 1: First A then B", visible=False)
528
+ # combine2 = gr.Button(value="Customized Mode 2: Both A and B", visible=False)
529
 
530
  with gr.Row():
531
  speed = gr.Slider(minimum=0, maximum=2, step=0.2, label="Motion Speed", value=1.0, visible=False)
 
607
  with gr.Column():
608
  step3_prompt_generate = gr.Markdown("---\n## Step 3/3: Add prompt and Generate videos", show_label=False, visible=False)
609
  prompt = gr.Textbox(value="a dog sitting on grass", label="Prompt", interactive=True, visible=False)
610
+ n_samples = gr.Number(value=2, precision=0, interactive=True, label="n_samples", visible=False)
611
  seed = gr.Number(value=1234, precision=0, interactive=True, label="Seed", visible=False)
612
  start = gr.Button(value="Start generation !", visible=False)
613
  with gr.Column():
614
  gen_video = gr.Video(value=None, label="Generate Video", visible=False)
615
 
616
+ choose_model_button.click(
617
+ fn=visualized_step1,
618
+ inputs=[choose_model],
619
+ outputs=[
620
+ step1, setp1_dec, infer_mode, mode_info,
621
+ step2_camera_motion,
622
+ step2_camera_motion_des,
623
+ camera_mode,
624
+ camera_info,
625
+
626
+ basic_camera_motion,
627
+ basic_camera_motion_des,
628
+ custom_camera_motion,
629
+ custom_run_status,
630
+ complex_camera_motion,
631
+ complex_camera_motion_des,
632
+ U, D, L, R,
633
+ I, O, ACW, CW,
634
+ combine1, combine2, combine3, combine3_des,
635
+ speed,
636
+ Pose_1, Pose_2, Pose_3, Pose_4,
637
+ Pose_5, Pose_6, Pose_7, Pose_8,
638
+ camera_args,
639
+ camera_reset, camera_vis,
640
+ vis_camera,
641
+
642
+ step2_object_motion,
643
+ step2_object_motion_des,
644
+ object_mode,
645
+ object_info,
646
+
647
+ provided_traj,
648
+ provided_traj_des,
649
+ draw_traj,
650
+ draw_run_status,
651
+ traj_1, traj_2, traj_3, traj_4,
652
+ traj_5, traj_6, traj_7, traj_8,
653
+ traj_args,
654
+ traj_droplast, traj_reset,
655
+ traj_vis,
656
+ traj_input, vis_traj,
657
+
658
+ step2_camera_object_motion,
659
+ step2_camera_object_motion_des,
660
+
661
+ step3_prompt_generate, prompt, n_samples, seed, start, gen_video,
662
+
663
+ ],
664
+ )
665
+
666
  mode_info.click(
667
  fn=visualized_step2,
668
  inputs=[infer_mode],
 
679
  complex_camera_motion_des,
680
  U, D, L, R,
681
  I, O, ACW, CW,
682
+ combine1, combine2, combine3, combine3_des,
683
  speed,
684
  Pose_1, Pose_2, Pose_3, Pose_4,
685
  Pose_5, Pose_6, Pose_7, Pose_8,
 
722
  complex_camera_motion_des,
723
  U, D, L, R,
724
  I, O, ACW, CW,
725
+ combine1, combine2, combine3, combine3_des,
726
  speed,
727
  Pose_1, Pose_2, Pose_3, Pose_4,
728
  Pose_5, Pose_6, Pose_7, Pose_8,
 
760
  speed.change(fn=change_camera_speed, inputs=speed, outputs=camera_args)
761
  camera_reset.click(fn=reset_camera, inputs=None, outputs=[camera_args])
762
 
763
+ combine1.click(fn=change_camera_mode,
764
+ inputs=[combine1, camera_mode],
765
+ outputs=[camera_args,
766
+ U, D, L, R,
767
+ I, O, ACW, CW, speed,
768
+ combine3_des])
769
+ combine2.click(fn=change_camera_mode,
770
+ inputs=[combine2, camera_mode],
771
+ outputs=[camera_args,
772
+ U, D, L, R,
773
+ I, O, ACW, CW, speed,
774
+ combine3_des])
775
+ combine3.click(fn=input_raw_camera_pose,
776
+ inputs=[combine3, camera_mode],
777
+ outputs=[camera_args,
778
+ U, D, L, R,
779
+ I, O, ACW, CW,
780
+ speed,
781
+ combine3_des])
782
+
783
+ camera_vis.click(fn=fn_vis_camera, inputs=[infer_mode, camera_args], outputs=[vis_camera, object_mode, object_info, step3_prompt_generate, prompt, n_samples, seed, start, gen_video])
784
 
785
  Pose_1.click(fn=add_complex_camera_motion, inputs=Pose_1, outputs=camera_args)
786
  Pose_2.click(fn=add_complex_camera_motion, inputs=Pose_2, outputs=camera_args)
 
806
  traj_reset.click(fn=fn_traj_reset, inputs=None, outputs=traj_args)
807
 
808
 
809
+ start.click(fn=model_run, inputs=[prompt, choose_model, infer_mode, seed, n_samples, camera_args], outputs=gen_video)
810
 
811
  gr.Markdown(article)
812
 
gradio_utils/camera_utils.py CHANGED
@@ -95,7 +95,20 @@ def combine_camera_motion(RT_0, RT_1):
95
 
96
  return np.concatenate([RT_0, RT_1], axis=0)
97
 
98
- def process_camera(camera_dict):
 
 
 
 
 
 
 
 
 
 
 
 
 
99
  # "First A then B", "Both A and B", "Custom"
100
  if camera_dict['complex'] is not None:
101
  with open(COMPLEX_CAMERA[camera_dict['complex']]) as f:
@@ -105,9 +118,6 @@ def process_camera(camera_dict):
105
  return RT
106
 
107
 
108
- motion_list = camera_dict['motion']
109
- mode = camera_dict['mode']
110
- speed = camera_dict['speed']
111
  print(len(motion_list))
112
  if len(motion_list) == 0:
113
  angle = np.array([0,0,0])
 
95
 
96
  return np.concatenate([RT_0, RT_1], axis=0)
97
 
98
+ def process_camera(camera_dict, camera_args=None, num_frames=16):
99
+ speed = camera_dict['speed']
100
+ motion_list = camera_dict['motion']
101
+ mode = camera_dict['mode']
102
+
103
+ if mode == 'Customized Mode 3: RAW Camera Poses':
104
+ print(camera_args)
105
+ RT = camera_args.strip().split()
106
+ assert(len(RT) == num_frames*12), "The number of camera poses should be equal to the number of frames"
107
+ RT = [float(x) for x in RT]
108
+ RT = np.array(RT).reshape(-1, 3, 4)
109
+ RT[:, :, -1] = RT[:, :, -1] * np.array([1.5, 1, 1.3]) * speed
110
+ return RT
111
+
112
  # "First A then B", "Both A and B", "Custom"
113
  if camera_dict['complex'] is not None:
114
  with open(COMPLEX_CAMERA[camera_dict['complex']]) as f:
 
118
  return RT
119
 
120
 
 
 
 
121
  print(len(motion_list))
122
  if len(motion_list) == 0:
123
  angle = np.array([0,0,0])
gradio_utils/page_control.py ADDED
@@ -0,0 +1,580 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from gradio_utils.camera_utils import CAMERA_MOTION_MODE
3
+ from gradio_utils.traj_utils import get_provided_traj
4
+
5
+ MODE = ["control camera poses", "control object trajectory", "control both camera and object motion"]
6
+
7
+ BASE_MODEL = ['LVDM/VideoCrafter', 'VideoCrafter2']
8
+
9
+ def display_camera_info(camera_dict, camera_mode=None):
10
+ if camera_dict['complex'] is not None:
11
+ res = f"complex : {camera_dict['complex']}. "
12
+ else:
13
+ res = ""
14
+ res += f"motion : {[_ for _ in camera_dict['motion']]}. "
15
+ res += f"speed : {camera_dict['speed']}. "
16
+ if camera_mode == CAMERA_MOTION_MODE[2]:
17
+ res += f"mode : {camera_dict['mode']}. "
18
+ return res
19
+
20
+ traj_list = []
21
+ camera_dict = {
22
+ "motion":[],
23
+ "mode": "Customized Mode 1: First A then B", # "First A then B", "Both A and B", "Custom"
24
+ "speed": 1.0,
25
+ "complex": None
26
+ }
27
+
28
+ def reset_camera():
29
+ # global camera_dict
30
+ camera_dict = {
31
+ "motion":[],
32
+ "mode": "Customized Mode 1: First A then B",
33
+ "speed": 1.0,
34
+ "complex": None
35
+ }
36
+ return display_camera_info(camera_dict)
37
+
38
+ def fn_traj_reset():
39
+ # global traj_list
40
+ traj_list = []
41
+ return "Click to specify trajectory"
42
+
43
+ def visualized_step1(model_name):
44
+
45
+ # reset
46
+ reset_camera()
47
+ fn_traj_reset()
48
+
49
+ # camera motion control
50
+ vis_basic_camera_motion = False
51
+ vis_basic_camera_motion_des = False
52
+ vis_custom_camera_motion = False
53
+ vis_custom_run_status = False
54
+ vis_complex_camera_motion = False
55
+ vis_complex_camera_motion_des = False
56
+ vis_U = False
57
+ vis_D = False
58
+ vis_L = False
59
+ vis_R = False
60
+ vis_I = False
61
+ vis_O = False
62
+ vis_ACW = False
63
+ vis_CW = False
64
+ vis_combine1 = False
65
+ vis_combine2 = False
66
+ vis_combine3 = False
67
+ vis_combine3_des = False
68
+ vis_speed = False
69
+
70
+ vis_Pose_1, vis_Pose_2, vis_Pose_3, vis_Pose_4 = False, False, False, False
71
+ vis_Pose_5, vis_Pose_6, vis_Pose_7, vis_Pose_8 = False, False, False, False
72
+
73
+ vis_camera_args = False
74
+ vis_camera_reset = False
75
+ vis_camera_vis = False
76
+ vis_vis_camera = False
77
+
78
+ # object motion control
79
+ vis_provided_traj = False
80
+ vis_provided_traj_des = False
81
+ vis_draw_yourself = False
82
+ vis_draw_run_status = False
83
+
84
+ vis_traj_1, vis_traj_2, vis_traj_3, vis_traj_4 = False, False, False, False
85
+ vis_traj_5, vis_traj_6, vis_traj_7, vis_traj_8 = False, False, False, False
86
+
87
+ traj_args = False
88
+ traj_droplast, traj_reset = False, False
89
+ traj_vis = False
90
+ traj_input, vis_traj = False, False
91
+
92
+
93
+ # generate video
94
+ vis_step3_prompt_generate = False
95
+ vis_prompt = False
96
+ vis_num_samples = False
97
+ vis_seed = False
98
+ vis_start = False
99
+ vis_gen_video = False
100
+
101
+ vis_step2_camera_motion = False
102
+ vis_step2_camera_motion_des = False
103
+ vis_camera_mode = False
104
+ vis_camera_info = False
105
+
106
+ vis_step2_object_motion = False
107
+ vis_step2_object_motion_des = False
108
+ vis_traj_mode = False
109
+ vis_traj_info = False
110
+
111
+ step2_camera_object_motion = False
112
+ step2_camera_object_motion_des = False
113
+
114
+ vis_step1 = True
115
+ vis_step1_dec = True
116
+ vis_infer_mode = True
117
+ mode_info = True
118
+
119
+ if model_name == BASE_MODEL[0]:
120
+ interative_mode = True
121
+ else:
122
+ interative_mode = False
123
+
124
+ return gr.update(visible=vis_step1), \
125
+ gr.update(visible=vis_step1_dec), \
126
+ gr.update(visible=vis_infer_mode, value=MODE[0], interactive=interative_mode), \
127
+ gr.update(visible=mode_info), \
128
+ gr.update(visible=vis_step2_camera_motion), \
129
+ gr.update(visible=vis_step2_camera_motion_des), \
130
+ gr.update(visible=vis_camera_mode), \
131
+ gr.update(visible=vis_camera_info), \
132
+ gr.update(visible=vis_basic_camera_motion), \
133
+ gr.update(visible=vis_basic_camera_motion_des), \
134
+ gr.update(visible=vis_custom_camera_motion), \
135
+ gr.update(visible=vis_custom_run_status), \
136
+ gr.update(visible=vis_complex_camera_motion), \
137
+ gr.update(visible=vis_complex_camera_motion_des), \
138
+ gr.update(visible=vis_U), gr.update(visible=vis_D), gr.update(visible=vis_L), gr.update(visible=vis_R), \
139
+ gr.update(visible=vis_I), gr.update(visible=vis_O), gr.update(visible=vis_ACW), gr.update(visible=vis_CW), \
140
+ gr.update(visible=vis_combine1), gr.update(visible=vis_combine2), gr.update(visible=vis_combine3), gr.update(visible=vis_combine3_des), \
141
+ gr.update(visible=vis_speed), \
142
+ gr.update(visible=vis_Pose_1), gr.update(visible=vis_Pose_2), gr.update(visible=vis_Pose_3), gr.update(visible=vis_Pose_4), \
143
+ gr.update(visible=vis_Pose_5), gr.update(visible=vis_Pose_6), gr.update(visible=vis_Pose_7), gr.update(visible=vis_Pose_8), \
144
+ gr.update(visible=vis_camera_args, value=None), \
145
+ gr.update(visible=vis_camera_reset), gr.update(visible=vis_camera_vis), \
146
+ gr.update(visible=vis_vis_camera, value=None), \
147
+ gr.update(visible=vis_step2_object_motion), \
148
+ gr.update(visible=vis_step2_object_motion_des), \
149
+ gr.update(visible=vis_traj_mode), \
150
+ gr.update(visible=vis_traj_info), \
151
+ gr.update(visible=vis_provided_traj), \
152
+ gr.update(visible=vis_provided_traj_des), \
153
+ gr.update(visible=vis_draw_yourself), \
154
+ gr.update(visible=vis_draw_run_status), \
155
+ gr.update(visible=vis_traj_1), gr.update(visible=vis_traj_2), gr.update(visible=vis_traj_3), gr.update(visible=vis_traj_4), \
156
+ gr.update(visible=vis_traj_5), gr.update(visible=vis_traj_6), gr.update(visible=vis_traj_7), gr.update(visible=vis_traj_8), \
157
+ gr.update(visible=traj_args), \
158
+ gr.update(visible=traj_droplast), gr.update(visible=traj_reset), \
159
+ gr.update(visible=traj_vis), \
160
+ gr.update(visible=traj_input), gr.update(visible=vis_traj, value=None), \
161
+ gr.update(visible=step2_camera_object_motion), \
162
+ gr.update(visible=step2_camera_object_motion_des), \
163
+ gr.update(visible=vis_step3_prompt_generate), \
164
+ gr.update(visible=vis_prompt), \
165
+ gr.update(visible=vis_num_samples), \
166
+ gr.update(visible=vis_seed), \
167
+ gr.update(visible=vis_start), \
168
+ gr.update(visible=vis_gen_video)
169
+
170
+
171
+ def visualized_step2(infer_mode):
172
+
173
+ # reset
174
+ reset_camera()
175
+ fn_traj_reset()
176
+
177
+ # camera motion control
178
+ vis_basic_camera_motion = False
179
+ vis_basic_camera_motion_des = False
180
+ vis_custom_camera_motion = False
181
+ vis_custom_run_status = False
182
+ vis_complex_camera_motion = False
183
+ vis_complex_camera_motion_des = False
184
+ vis_U = False
185
+ vis_D = False
186
+ vis_L = False
187
+ vis_R = False
188
+ vis_I = False
189
+ vis_O = False
190
+ vis_ACW = False
191
+ vis_CW = False
192
+ vis_combine1 = False
193
+ vis_combine2 = False
194
+ vis_combine3 = False
195
+ vis_combine3_des = False
196
+ vis_speed = False
197
+
198
+ vis_Pose_1, vis_Pose_2, vis_Pose_3, vis_Pose_4 = False, False, False, False
199
+ vis_Pose_5, vis_Pose_6, vis_Pose_7, vis_Pose_8 = False, False, False, False
200
+
201
+ vis_camera_args = False
202
+ vis_camera_reset = False
203
+ vis_camera_vis = False
204
+ vis_vis_camera = False
205
+
206
+ # object motion control
207
+ vis_provided_traj = False
208
+ vis_provided_traj_des = False
209
+ vis_draw_yourself = False
210
+ vis_draw_run_status = False
211
+
212
+ vis_traj_1, vis_traj_2, vis_traj_3, vis_traj_4 = False, False, False, False
213
+ vis_traj_5, vis_traj_6, vis_traj_7, vis_traj_8 = False, False, False, False
214
+
215
+ traj_args = False
216
+ traj_droplast, traj_reset = False, False
217
+ traj_vis = False
218
+ traj_input, vis_traj = False, False
219
+
220
+
221
+ # generate video
222
+ vis_step3_prompt_generate = False
223
+ vis_prompt = False
224
+ vis_num_samples = False
225
+ vis_seed = False
226
+ vis_start = False
227
+ vis_gen_video = False
228
+
229
+ if infer_mode == MODE[0]:
230
+ vis_step2_camera_motion = True
231
+ vis_step2_camera_motion_des = True
232
+ vis_camera_mode = True
233
+ vis_camera_info = True
234
+
235
+ vis_step2_object_motion = False
236
+ vis_step2_object_motion_des = False
237
+ vis_traj_mode = False
238
+ vis_traj_info = False
239
+
240
+ step2_camera_object_motion = False
241
+ step2_camera_object_motion_des = False
242
+
243
+ elif infer_mode == MODE[1]:
244
+ vis_step2_camera_motion = False
245
+ vis_step2_camera_motion_des = False
246
+ vis_camera_mode = False
247
+ vis_camera_info = False
248
+
249
+ vis_step2_object_motion = True
250
+ vis_step2_object_motion_des = True
251
+ vis_traj_mode = True
252
+ vis_traj_info = True
253
+
254
+ step2_camera_object_motion = False
255
+ step2_camera_object_motion_des = False
256
+ else: #infer_mode == MODE[2]:
257
+ vis_step2_camera_motion = True
258
+ vis_step2_camera_motion_des = True
259
+ vis_camera_mode = True
260
+ vis_camera_info = True
261
+
262
+ vis_step2_object_motion = False
263
+ vis_step2_object_motion_des = False
264
+ vis_traj_mode = False
265
+ vis_traj_info = False
266
+
267
+ step2_camera_object_motion = True
268
+ step2_camera_object_motion_des = True
269
+
270
+ return gr.update(visible=vis_step2_camera_motion), \
271
+ gr.update(visible=vis_step2_camera_motion_des), \
272
+ gr.update(visible=vis_camera_mode), \
273
+ gr.update(visible=vis_camera_info), \
274
+ gr.update(visible=vis_basic_camera_motion), \
275
+ gr.update(visible=vis_basic_camera_motion_des), \
276
+ gr.update(visible=vis_custom_camera_motion), \
277
+ gr.update(visible=vis_custom_run_status), \
278
+ gr.update(visible=vis_complex_camera_motion), \
279
+ gr.update(visible=vis_complex_camera_motion_des), \
280
+ gr.update(visible=vis_U), gr.update(visible=vis_D), gr.update(visible=vis_L), gr.update(visible=vis_R), \
281
+ gr.update(visible=vis_I), gr.update(visible=vis_O), gr.update(visible=vis_ACW), gr.update(visible=vis_CW), \
282
+ gr.update(visible=vis_combine1), gr.update(visible=vis_combine2), gr.update(visible=vis_combine3), gr.update(visible=vis_combine3_des), \
283
+ gr.update(visible=vis_speed), \
284
+ gr.update(visible=vis_Pose_1), gr.update(visible=vis_Pose_2), gr.update(visible=vis_Pose_3), gr.update(visible=vis_Pose_4), \
285
+ gr.update(visible=vis_Pose_5), gr.update(visible=vis_Pose_6), gr.update(visible=vis_Pose_7), gr.update(visible=vis_Pose_8), \
286
+ gr.update(visible=vis_camera_args, value=None), \
287
+ gr.update(visible=vis_camera_reset), gr.update(visible=vis_camera_vis), \
288
+ gr.update(visible=vis_vis_camera, value=None), \
289
+ gr.update(visible=vis_step2_object_motion), \
290
+ gr.update(visible=vis_step2_object_motion_des), \
291
+ gr.update(visible=vis_traj_mode), \
292
+ gr.update(visible=vis_traj_info), \
293
+ gr.update(visible=vis_provided_traj), \
294
+ gr.update(visible=vis_provided_traj_des), \
295
+ gr.update(visible=vis_draw_yourself), \
296
+ gr.update(visible=vis_draw_run_status), \
297
+ gr.update(visible=vis_traj_1), gr.update(visible=vis_traj_2), gr.update(visible=vis_traj_3), gr.update(visible=vis_traj_4), \
298
+ gr.update(visible=vis_traj_5), gr.update(visible=vis_traj_6), gr.update(visible=vis_traj_7), gr.update(visible=vis_traj_8), \
299
+ gr.update(visible=traj_args), \
300
+ gr.update(visible=traj_droplast), gr.update(visible=traj_reset), \
301
+ gr.update(visible=traj_vis), \
302
+ gr.update(visible=traj_input), gr.update(visible=vis_traj, value=None), \
303
+ gr.update(visible=step2_camera_object_motion), \
304
+ gr.update(visible=step2_camera_object_motion_des), \
305
+ gr.update(visible=vis_step3_prompt_generate), \
306
+ gr.update(visible=vis_prompt), \
307
+ gr.update(visible=vis_num_samples), \
308
+ gr.update(visible=vis_seed), \
309
+ gr.update(visible=vis_start), \
310
+ gr.update(visible=vis_gen_video)
311
+
312
+ def visualized_camera_poses(step2_camera_motion):
313
+ reset_camera()
314
+
315
+ # generate video
316
+ vis_step3_prompt_generate = False
317
+ vis_prompt = False
318
+ vis_num_samples = False
319
+ vis_seed = False
320
+ vis_start = False
321
+ vis_gen_video = False
322
+
323
+ if step2_camera_motion == CAMERA_MOTION_MODE[0]:
324
+ vis_basic_camera_motion = True
325
+ vis_basic_camera_motion_des = True
326
+ vis_custom_camera_motion = False
327
+ vis_custom_run_status = False
328
+ vis_complex_camera_motion = False
329
+ vis_complex_camera_motion_des = False
330
+ vis_U = True
331
+ vis_D = True
332
+ vis_L = True
333
+ vis_R = True
334
+ vis_I = True
335
+ vis_O = True
336
+ vis_ACW = True
337
+ vis_CW = True
338
+ vis_combine1 = False
339
+ vis_combine2 = False
340
+ vis_combine3 = False
341
+ vis_combine3_des = False
342
+ vis_speed = True
343
+
344
+ vis_Pose_1, vis_Pose_2, vis_Pose_3, vis_Pose_4 = False, False, False, False
345
+ vis_Pose_5, vis_Pose_6, vis_Pose_7, vis_Pose_8 = False, False, False, False
346
+
347
+ elif step2_camera_motion == CAMERA_MOTION_MODE[1]:
348
+ vis_basic_camera_motion = False
349
+ vis_basic_camera_motion_des = False
350
+ vis_custom_camera_motion = False
351
+ vis_custom_run_status = False
352
+ vis_complex_camera_motion = True
353
+ vis_complex_camera_motion_des = True
354
+ vis_U = False
355
+ vis_D = False
356
+ vis_L = False
357
+ vis_R = False
358
+ vis_I = False
359
+ vis_O = False
360
+ vis_ACW = False
361
+ vis_CW = False
362
+ vis_combine1 = False
363
+ vis_combine2 = False
364
+ vis_combine3 = False
365
+ vis_combine3_des = False
366
+ vis_speed = False
367
+
368
+ vis_Pose_1, vis_Pose_2, vis_Pose_3, vis_Pose_4 = True, True, True, True
369
+ vis_Pose_5, vis_Pose_6, vis_Pose_7, vis_Pose_8 = True, True, True, True
370
+
371
+ else: # step2_camera_motion = CAMERA_MOTION_MODE[2]:
372
+ vis_basic_camera_motion = False
373
+ vis_basic_camera_motion_des = False
374
+ vis_custom_camera_motion = True
375
+ vis_custom_run_status = True
376
+ vis_complex_camera_motion = False
377
+ vis_complex_camera_motion_des = False
378
+ vis_U = False
379
+ vis_D = False
380
+ vis_L = False
381
+ vis_R = False
382
+ vis_I = False
383
+ vis_O = False
384
+ vis_ACW = False
385
+ vis_CW = False
386
+ vis_combine1 = True
387
+ vis_combine2 = True
388
+ vis_combine3 = True
389
+ vis_combine3_des = True
390
+ vis_speed = False
391
+
392
+ vis_Pose_1, vis_Pose_2, vis_Pose_3, vis_Pose_4 = False, False, False, False
393
+ vis_Pose_5, vis_Pose_6, vis_Pose_7, vis_Pose_8 = False, False, False, False
394
+
395
+ vis_camera_args = True
396
+ vis_camera_reset = True
397
+ vis_camera_vis = True
398
+ vis_vis_camera = True
399
+
400
+ return gr.update(visible=vis_basic_camera_motion), \
401
+ gr.update(visible=vis_basic_camera_motion_des), \
402
+ gr.update(visible=vis_custom_camera_motion), \
403
+ gr.update(visible=vis_custom_run_status), \
404
+ gr.update(visible=vis_complex_camera_motion), \
405
+ gr.update(visible=vis_complex_camera_motion_des), \
406
+ gr.update(visible=vis_U), gr.update(visible=vis_D), gr.update(visible=vis_L), gr.update(visible=vis_R), \
407
+ gr.update(visible=vis_I), gr.update(visible=vis_O), gr.update(visible=vis_ACW), gr.update(visible=vis_CW), \
408
+ gr.update(visible=vis_combine1), gr.update(visible=vis_combine2), gr.update(visible=vis_combine3), gr.update(visible=vis_combine3_des), \
409
+ gr.update(visible=vis_speed), \
410
+ gr.update(visible=vis_Pose_1), gr.update(visible=vis_Pose_2), gr.update(visible=vis_Pose_3), gr.update(visible=vis_Pose_4), \
411
+ gr.update(visible=vis_Pose_5), gr.update(visible=vis_Pose_6), gr.update(visible=vis_Pose_7), gr.update(visible=vis_Pose_8), \
412
+ gr.update(visible=vis_camera_args, value=None), \
413
+ gr.update(visible=vis_camera_reset), gr.update(visible=vis_camera_vis), \
414
+ gr.update(visible=vis_vis_camera, value=None), \
415
+ gr.update(visible=vis_step3_prompt_generate), \
416
+ gr.update(visible=vis_prompt), \
417
+ gr.update(visible=vis_num_samples), \
418
+ gr.update(visible=vis_seed), \
419
+ gr.update(visible=vis_start), \
420
+ gr.update(visible=vis_gen_video)
421
+
422
+ def visualized_traj_poses(step2_object_motion):
423
+
424
+ fn_traj_reset()
425
+
426
+ # generate video
427
+ vis_step3_prompt_generate = False
428
+ vis_prompt = False
429
+ vis_num_samples = False
430
+ vis_seed = False
431
+ vis_start = False
432
+ vis_gen_video = False
433
+
434
+ if step2_object_motion == "Provided Trajectory":
435
+ vis_provided_traj = True
436
+ vis_provided_traj_des = True
437
+ vis_draw_yourself = False
438
+ vis_draw_run_status = False
439
+
440
+ vis_traj_1, vis_traj_2, vis_traj_3, vis_traj_4 = True, True, True, True
441
+ vis_traj_5, vis_traj_6, vis_traj_7, vis_traj_8 = True, True, True, True
442
+
443
+ traj_args = True
444
+ traj_droplast, traj_reset = False, True
445
+ traj_vis = True
446
+ traj_input, vis_traj = False, True
447
+
448
+
449
+ elif step2_object_motion == "Custom Trajectory":
450
+ vis_provided_traj = False
451
+ vis_provided_traj_des = False
452
+ vis_draw_yourself = True
453
+ vis_draw_run_status = True
454
+
455
+ vis_traj_1, vis_traj_2, vis_traj_3, vis_traj_4 = False, False, False, False
456
+ vis_traj_5, vis_traj_6, vis_traj_7, vis_traj_8 = False, False, False, False
457
+
458
+ traj_args = True
459
+ traj_droplast, traj_reset = True, True
460
+ traj_vis = True
461
+ traj_input, vis_traj = True, True
462
+
463
+ return gr.update(visible=vis_provided_traj), \
464
+ gr.update(visible=vis_provided_traj_des), \
465
+ gr.update(visible=vis_draw_yourself), \
466
+ gr.update(visible=vis_draw_run_status), \
467
+ gr.update(visible=vis_traj_1), gr.update(visible=vis_traj_2), gr.update(visible=vis_traj_3), gr.update(visible=vis_traj_4), \
468
+ gr.update(visible=vis_traj_5), gr.update(visible=vis_traj_6), gr.update(visible=vis_traj_7), gr.update(visible=vis_traj_8), \
469
+ gr.update(visible=traj_args), \
470
+ gr.update(visible=traj_droplast), gr.update(visible=traj_reset), \
471
+ gr.update(visible=traj_vis), \
472
+ gr.update(visible=traj_input), gr.update(visible=vis_traj, value=None), \
473
+ gr.update(visible=vis_step3_prompt_generate), \
474
+ gr.update(visible=vis_prompt), \
475
+ gr.update(visible=vis_num_samples), \
476
+ gr.update(visible=vis_seed), \
477
+ gr.update(visible=vis_start), \
478
+ gr.update(visible=vis_gen_video)
479
+
480
+ def add_camera_motion(camera_motion, camera_mode):
481
+ # global camera_dict
482
+ if camera_dict['complex'] is not None:
483
+ camera_dict['complex'] = None
484
+ if camera_mode == CAMERA_MOTION_MODE[2] and len(camera_dict['motion']) <2:
485
+ camera_dict['motion'].append(camera_motion)
486
+ else:
487
+ camera_dict['motion']=[camera_motion]
488
+
489
+ return display_camera_info(camera_dict, camera_mode)
490
+
491
+ def add_complex_camera_motion(camera_motion):
492
+ # global camera_dict
493
+ camera_dict['complex']=camera_motion
494
+ return display_camera_info(camera_dict)
495
+
496
+ def change_camera_mode(combine_type, camera_mode):
497
+ global camera_dict
498
+ camera_dict['mode'] = combine_type
499
+
500
+ # return display_camera_info(camera_dict, camera_mode)
501
+ vis_U = True
502
+ vis_D = True
503
+ vis_L = True
504
+ vis_R = True
505
+ vis_I = True
506
+ vis_O = True
507
+ vis_ACW = True
508
+ vis_CW = True
509
+ vis_speed = True
510
+ vis_combine3_des = False
511
+
512
+ return display_camera_info(camera_dict, camera_mode), \
513
+ gr.update(visible=vis_U), \
514
+ gr.update(visible=vis_D), \
515
+ gr.update(visible=vis_L),\
516
+ gr.update(visible=vis_R), \
517
+ gr.update(visible=vis_I), \
518
+ gr.update(visible=vis_O), \
519
+ gr.update(visible=vis_ACW), \
520
+ gr.update(visible=vis_CW), \
521
+ gr.update(visible=vis_speed), \
522
+ gr.update(visible=vis_combine3_des)
523
+
524
+ def input_raw_camera_pose(combine_type, camera_mode):
525
+ # global camera_dict
526
+ camera_dict['mode'] = combine_type
527
+
528
+ vis_U = False
529
+ vis_D = False
530
+ vis_L = False
531
+ vis_R = False
532
+ vis_I = False
533
+ vis_O = False
534
+ vis_ACW = False
535
+ vis_CW = False
536
+ vis_speed = True
537
+ vis_combine3_des = True
538
+
539
+ return gr.update(value='1 0 0 0 0 1 0 0 0 0 1 0\n1 0 0 0 0 1 0 0 0 0 1 -0.225\n1 0 0 0 0 1 0 0 0 0 1 -0.45\n1 0 0 0 0 1 0 0 0 0 1 -0.675\n1 0 0 0 0 1 0 0 0 0 1 -0.9\n1 0 0 0 0 1 0 0 0 0 1 -1.125\n1 0 0 0 0 1 0 0 0 0 1 -1.35\n1 0 0 0 0 1 0 0 0 0 1 -1.575\n1 0 0 0 0 1 0 0 0 0 1 -1.8\n1 0 0 0 0 1 0 0 0 0 1 -2.025\n1 0 0 0 0 1 0 0 0 0 1 -2.25\n1 0 0 0 0 1 0 0 0 0 1 -2.475\n1 0 0 0 0 1 0 0 0 0 1 -2.7\n1 0 0 0 0 1 0 0 0 0 1 -2.925\n1 0 0 0 0 1 0 0 0 0 1 -3.15\n1 0 0 0 0 1 0 0 0 0 1 -3.375\n', max_lines=16, interactive=True), \
540
+ gr.update(visible=vis_U), \
541
+ gr.update(visible=vis_D), \
542
+ gr.update(visible=vis_L),\
543
+ gr.update(visible=vis_R), \
544
+ gr.update(visible=vis_I), \
545
+ gr.update(visible=vis_O), \
546
+ gr.update(visible=vis_ACW), \
547
+ gr.update(visible=vis_CW), \
548
+ gr.update(visible=vis_speed), \
549
+ gr.update(visible=vis_combine3_des)
550
+
551
+ def change_camera_speed(camera_speed):
552
+ # global camera_dict
553
+ camera_dict['speed'] = camera_speed
554
+ return display_camera_info(camera_dict)
555
+
556
+ def add_traj_point(evt: gr.SelectData, ):
557
+ # global traj_list
558
+ traj_list.append(evt.index)
559
+ traj_str = [f"{traj}" for traj in traj_list]
560
+ return ", ".join(traj_str)
561
+
562
+ def add_provided_traj(traj_name):
563
+ # global traj_list
564
+ traj_list = get_provided_traj(traj_name)
565
+ traj_str = [f"{traj}" for traj in traj_list]
566
+ return ", ".join(traj_str)
567
+
568
+
569
+ def fn_traj_droplast():
570
+ # global traj_list
571
+
572
+ if traj_list:
573
+ traj_list.pop()
574
+
575
+ if traj_list:
576
+ traj_str = [f"{traj}" for traj in traj_list]
577
+ return ", ".join(traj_str)
578
+ else:
579
+ return "Click to specify trajectory"
580
+