aiqcamp commited on
Commit
8a61a4d
·
verified ·
1 Parent(s): 1cfe547

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -10
app.py CHANGED
@@ -65,22 +65,25 @@ output_dir = Path('./output/gradio')
65
  setup_eval_logging()
66
  net, feature_utils, seq_cfg = get_model()
67
 
68
-
69
  @spaces.GPU(duration=120)
70
  @torch.inference_mode()
71
  def video_to_audio(video_path: str, prompt: str, negative_prompt: str = "music",
72
  seed: int = -1, num_steps: int = 25,
73
- cfg_strength: float = 4.5, duration: float = 8):
74
  try:
 
 
75
  rng = torch.Generator(device=device)
76
  if seed >= 0:
77
  rng.manual_seed(seed)
78
  else:
79
  rng.seed()
 
80
  fm = FlowMatching(min_sigma=0, inference_mode='euler', num_steps=num_steps)
81
 
82
  # video_info = load_video(video_path, duration) 대신:
83
- video_info = load_video(video_path, static_duration=duration)
 
84
 
85
  if video_info is None:
86
  logger.error("Failed to load video")
@@ -94,14 +97,15 @@ def video_to_audio(video_path: str, prompt: str, negative_prompt: str = "music",
94
  logger.error("Failed to extract frames from video")
95
  return video_path
96
 
97
- clip_frames = clip_frames.unsqueeze(0)
98
- sync_frames = sync_frames.unsqueeze(0)
99
 
100
  # 시퀀스 길이 업데이트
101
  seq_cfg.duration = actual_duration
102
  net.update_seq_lengths(seq_cfg.latent_seq_len, seq_cfg.clip_seq_len, seq_cfg.sync_seq_len)
103
 
104
  # 오디오 생성
 
105
  audios = generate(clip_frames,
106
  sync_frames,
107
  [prompt],
@@ -120,12 +124,14 @@ def video_to_audio(video_path: str, prompt: str, negative_prompt: str = "music",
120
 
121
  # 결과 비디오 생성
122
  output_path = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4').name
123
- success = make_video(video_info, output_path, audio, sampling_rate=seq_cfg.sampling_rate)
124
 
125
- if not success:
126
- logger.error("Failed to create video with audio")
 
 
127
  return video_path
128
-
129
  logger.info(f'Successfully saved video with audio to {output_path}')
130
  return output_path
131
 
@@ -353,7 +359,7 @@ def generate_video(image, prompt):
353
  seed=-1,
354
  num_steps=25,
355
  cfg_strength=4.5,
356
- duration=8.0 # float 타입으로 명시
357
  )
358
 
359
  if final_path_with_audio != final_path:
 
65
  setup_eval_logging()
66
  net, feature_utils, seq_cfg = get_model()
67
 
 
68
  @spaces.GPU(duration=120)
69
  @torch.inference_mode()
70
  def video_to_audio(video_path: str, prompt: str, negative_prompt: str = "music",
71
  seed: int = -1, num_steps: int = 25,
72
+ cfg_strength: float = 4.5, target_duration: float = 8.0):
73
  try:
74
+ logger.info("Starting audio generation process")
75
+
76
  rng = torch.Generator(device=device)
77
  if seed >= 0:
78
  rng.manual_seed(seed)
79
  else:
80
  rng.seed()
81
+
82
  fm = FlowMatching(min_sigma=0, inference_mode='euler', num_steps=num_steps)
83
 
84
  # video_info = load_video(video_path, duration) 대신:
85
+ kwargs = {'static_duration': target_duration}
86
+ video_info = load_video(video_path, **kwargs)
87
 
88
  if video_info is None:
89
  logger.error("Failed to load video")
 
97
  logger.error("Failed to extract frames from video")
98
  return video_path
99
 
100
+ clip_frames = clip_frames.unsqueeze(0).to(device)
101
+ sync_frames = sync_frames.unsqueeze(0).to(device)
102
 
103
  # 시퀀스 길이 업데이트
104
  seq_cfg.duration = actual_duration
105
  net.update_seq_lengths(seq_cfg.latent_seq_len, seq_cfg.clip_seq_len, seq_cfg.sync_seq_len)
106
 
107
  # 오디오 생성
108
+ logger.info("Generating audio...")
109
  audios = generate(clip_frames,
110
  sync_frames,
111
  [prompt],
 
124
 
125
  # 결과 비디오 생성
126
  output_path = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4').name
127
+ logger.info(f"Creating final video with audio at {output_path}")
128
 
129
+ make_video(video_info, output_path, audio, sampling_rate=seq_cfg.sampling_rate)
130
+
131
+ if not os.path.exists(output_path):
132
+ logger.error("Failed to create output video")
133
  return video_path
134
+
135
  logger.info(f'Successfully saved video with audio to {output_path}')
136
  return output_path
137
 
 
359
  seed=-1,
360
  num_steps=25,
361
  cfg_strength=4.5,
362
+ target_duration=8.0 # duration을 target_duration으로 변경
363
  )
364
 
365
  if final_path_with_audio != final_path: