Dokdo-multimodal

Paused

App Files Files Community

aiqcamp commited on Dec 22, 2024

Commit

1cfe547

verified ·

1 Parent(s): decba1f

Update app.py

Browse files

Files changed (1) hide show

app.py +54 -20

app.py CHANGED Viewed

@@ -79,32 +79,56 @@ def video_to_audio(video_path: str, prompt: str, negative_prompt: str = "music",
             rng.seed()
         fm = FlowMatching(min_sigma=0, inference_mode='euler', num_steps=num_steps)
-        # duration 파라미터 전달 방식 수정
-        video_info = load_video(video_path, static_duration=duration)  # static_duration으로 변경
         clip_frames = video_info.clip_frames
         sync_frames = video_info.sync_frames
         actual_duration = video_info.duration_sec
         clip_frames = clip_frames.unsqueeze(0)
         sync_frames = sync_frames.unsqueeze(0)
         seq_cfg.duration = actual_duration
         net.update_seq_lengths(seq_cfg.latent_seq_len, seq_cfg.clip_seq_len, seq_cfg.sync_seq_len)
         audios = generate(clip_frames,
-                          sync_frames, [prompt],
-                          negative_text=[negative_prompt],
-                          feature_utils=feature_utils,
-                          net=net,
-                          fm=fm,
-                          rng=rng,
-                          cfg_strength=cfg_strength)
         audio = audios.float().cpu()[0]
-        video_save_path = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4').name
-        make_video(video_info, video_save_path, audio, sampling_rate=seq_cfg.sampling_rate)
-        logger.info(f'Saved video with audio to {video_save_path}')
-        return video_save_path
     except Exception as e:
         logger.error(f"Error in video_to_audio: {str(e)}")
         return video_path  # 오류 발생 시 원본 비디오 반환
@@ -321,6 +345,7 @@ def generate_video(image, prompt):
                     # 오디오 처리 추가
                     try:
                         final_path_with_audio = video_to_audio(
                             final_path,
                             prompt=prompt,
@@ -328,16 +353,25 @@ def generate_video(image, prompt):
                             seed=-1,
                             num_steps=25,
                             cfg_strength=4.5,
-                            duration=8
                         )
-                        # 임시 파일 정리
-                        if output_path != final_path:
-                            os.remove(output_path)
-                        if final_path != final_path_with_audio:
-                            os.remove(final_path)
-                        return final_path_with_audio
                     except Exception as e:
                         logger.error(f"Error in audio processing: {str(e)}")
                         return final_path  # 오디오 처리 실패 시 워터마크만 된 비디오 반환

             rng.seed()
         fm = FlowMatching(min_sigma=0, inference_mode='euler', num_steps=num_steps)
+        # video_info = load_video(video_path, duration) 대신:
+        video_info = load_video(video_path, static_duration=duration)
+        if video_info is None:
+            logger.error("Failed to load video")
+            return video_path
         clip_frames = video_info.clip_frames
         sync_frames = video_info.sync_frames
         actual_duration = video_info.duration_sec
+        if clip_frames is None or sync_frames is None:
+            logger.error("Failed to extract frames from video")
+            return video_path
         clip_frames = clip_frames.unsqueeze(0)
         sync_frames = sync_frames.unsqueeze(0)
+        # 시퀀스 길이 업데이트
         seq_cfg.duration = actual_duration
         net.update_seq_lengths(seq_cfg.latent_seq_len, seq_cfg.clip_seq_len, seq_cfg.sync_seq_len)
+        # 오디오 생성
         audios = generate(clip_frames,
+                         sync_frames,
+                         [prompt],
+                         negative_text=[negative_prompt],
+                         feature_utils=feature_utils,
+                         net=net,
+                         fm=fm,
+                         rng=rng,
+                         cfg_strength=cfg_strength)
+        if audios is None:
+            logger.error("Failed to generate audio")
+            return video_path
         audio = audios.float().cpu()[0]
+        # 결과 비디오 생성
+        output_path = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4').name
+        success = make_video(video_info, output_path, audio, sampling_rate=seq_cfg.sampling_rate)
+        if not success:
+            logger.error("Failed to create video with audio")
+            return video_path
+        logger.info(f'Successfully saved video with audio to {output_path}')
+        return output_path
     except Exception as e:
         logger.error(f"Error in video_to_audio: {str(e)}")
         return video_path  # 오류 발생 시 원본 비디오 반환
                     # 오디오 처리 추가
                     try:
+                        logger.info("Starting audio generation process")
                         final_path_with_audio = video_to_audio(
                             final_path,
                             prompt=prompt,
                             seed=-1,
                             num_steps=25,
                             cfg_strength=4.5,
+                            duration=8.0  # float 타입으로 명시
                         )
+                        if final_path_with_audio != final_path:
+                            logger.info("Audio generation successful")
+                            # 임시 파일 정리
+                            try:
+                                if output_path != final_path:
+                                    os.remove(output_path)
+                                if final_path != final_path_with_audio:
+                                    os.remove(final_path)
+                            except Exception as e:
+                                logger.warning(f"Error cleaning up temporary files: {str(e)}")
+                            return final_path_with_audio
+                        else:
+                            logger.warning("Audio generation skipped, using original video")
+                            return final_path
                     except Exception as e:
                         logger.error(f"Error in audio processing: {str(e)}")
                         return final_path  # 오디오 처리 실패 시 워터마크만 된 비디오 반환