Dokdo-multimodal

Paused

App Files Files Community

aiqcamp commited on Dec 22, 2024

Commit

ca607ce

verified ·

1 Parent(s): 9ae8acd

Update app.py

Browse files

Files changed (1) hide show

app.py +54 -1

app.py CHANGED Viewed

@@ -44,6 +44,59 @@ logger = logging.getLogger(__name__)
 CATBOX_USER_HASH = "30f52c895fd9d9cb387eee489"
 REPLICATE_API_TOKEN = os.getenv("API_KEY")
 def upload_to_catbox(file_path):
     """catbox.moe API를 사용하여 파일 업로드"""
     try:
@@ -287,7 +340,7 @@ footer {display: none}
 """
 with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange", css=css) as demo:
-    gr.HTML('<div style="text-align: center; font-size: 1.5em; margin: 10px 0;">🎥 Image to Video Generator</div>')
     with gr.Row():
         with gr.Column(scale=3):

 CATBOX_USER_HASH = "30f52c895fd9d9cb387eee489"
 REPLICATE_API_TOKEN = os.getenv("API_KEY")
+def get_model() -> tuple[MMAudio, FeaturesUtils, SequenceConfig]:
+    seq_cfg = model.seq_cfg
+    net: MMAudio = get_my_mmaudio(model.model_name).to(device, dtype).eval()
+    net.load_weights(torch.load(model.model_path, map_location=device, weights_only=True))
+    logger.info(f'Loaded weights from {model.model_path}')
+    feature_utils = FeaturesUtils(tod_vae_ckpt=model.vae_path,
+                                  synchformer_ckpt=model.synchformer_ckpt,
+                                  enable_conditions=True,
+                                  mode=model.mode,
+                                  bigvgan_vocoder_ckpt=model.bigvgan_16k_path,
+                                  need_vae_encoder=False)
+    feature_utils = feature_utils.to(device, dtype).eval()
+    return net, feature_utils, seq_cfg
+@spaces.GPU(duration=120)
+@torch.inference_mode()
+def video_to_audio(video_path: str, prompt: str, negative_prompt: str = "music",
+                   seed: int = -1, num_steps: int = 25,
+                   cfg_strength: float = 4.5, duration: float = 8):
+    rng = torch.Generator(device=device)
+    if seed >= 0:
+        rng.manual_seed(seed)
+    else:
+        rng.seed()
+    fm = FlowMatching(min_sigma=0, inference_mode='euler', num_steps=num_steps)
+    video_info = load_video(video_path, duration)
+    clip_frames = video_info.clip_frames
+    sync_frames = video_info.sync_frames
+    duration = video_info.duration_sec
+    clip_frames = clip_frames.unsqueeze(0)
+    sync_frames = sync_frames.unsqueeze(0)
+    seq_cfg.duration = duration
+    net.update_seq_lengths(seq_cfg.latent_seq_len, seq_cfg.clip_seq_len, seq_cfg.sync_seq_len)
+    audios = generate(clip_frames,
+                      sync_frames, [prompt],
+                      negative_text=[negative_prompt],
+                      feature_utils=feature_utils,
+                      net=net,
+                      fm=fm,
+                      rng=rng,
+                      cfg_strength=cfg_strength)
+    audio = audios.float().cpu()[0]
+    video_save_path = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4').name
+    make_video(video_info, video_save_path, audio, sampling_rate=seq_cfg.sampling_rate)
+    logger.info(f'Saved video with audio to {video_save_path}')
+    return video_save_path
 def upload_to_catbox(file_path):
     """catbox.moe API를 사용하여 파일 업로드"""
     try:
 """
 with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange", css=css) as demo:
     with gr.Row():
         with gr.Column(scale=3):