Update app.py
Browse files
app.py
CHANGED
@@ -40,8 +40,23 @@ def check_api(model_name):
|
|
40 |
except :
|
41 |
return "api not ready yet"
|
42 |
|
43 |
-
from moviepy.editor import
|
44 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
|
46 |
def extract_audio(video_in):
|
47 |
input_video = video_in
|
@@ -232,9 +247,25 @@ def get_musical_prompt(user_prompt, chosen_model):
|
|
232 |
print(f"SUGGESTED Musical prompt: {cleaned_text}")
|
233 |
return cleaned_text.lstrip("\n")
|
234 |
|
235 |
-
def
|
236 |
-
|
237 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
238 |
|
239 |
if chosen_model == [] :
|
240 |
raise gr.Error("Please pick a model")
|
@@ -242,6 +273,8 @@ def infer(image_in, chosen_model, api_status):
|
|
242 |
if api_status == "api not ready yet" :
|
243 |
raise gr.Error("This model is not ready yet, you can pick another one instead :)")
|
244 |
|
|
|
|
|
245 |
gr.Info("Getting image caption with Kosmos2...")
|
246 |
user_prompt = get_caption(image_in)
|
247 |
|
@@ -263,10 +296,11 @@ def infer(image_in, chosen_model, api_status):
|
|
263 |
elif chosen_model == "MusicGen" :
|
264 |
gr.Info("Now calling MusicGen for music...")
|
265 |
music_o = get_musicgen(musical_prompt)
|
266 |
-
|
267 |
-
return gr.update(value=musical_prompt, interactive=True), gr.update(visible=True), music_o
|
268 |
|
269 |
-
|
|
|
|
|
|
|
270 |
musical_prompt = caption
|
271 |
|
272 |
if chosen_model == "MAGNet" :
|
@@ -284,11 +318,11 @@ def retry(chosen_model, caption):
|
|
284 |
elif chosen_model == "MusicGen" :
|
285 |
gr.Info("Now calling MusicGen for music...")
|
286 |
music_o = get_musicgen(musical_prompt)
|
|
|
|
|
287 |
|
288 |
-
|
289 |
-
|
290 |
-
demo_title = "Image to Music V2"
|
291 |
-
description = "Get music from a picture, compare text-to-music models"
|
292 |
|
293 |
css = """
|
294 |
#col-container {
|
@@ -319,11 +353,7 @@ with gr.Blocks(css=css) as demo:
|
|
319 |
with gr.Row():
|
320 |
|
321 |
with gr.Column():
|
322 |
-
|
323 |
-
label = "Image reference",
|
324 |
-
type = "filepath",
|
325 |
-
elem_id = "image-in"
|
326 |
-
)
|
327 |
|
328 |
with gr.Row():
|
329 |
|
@@ -345,24 +375,9 @@ with gr.Blocks(css=css) as demo:
|
|
345 |
interactive=False
|
346 |
)
|
347 |
|
348 |
-
submit_btn = gr.Button("Make music from my
|
349 |
-
|
350 |
-
|
351 |
-
examples = [
|
352 |
-
["examples/ocean_poet.jpeg"],
|
353 |
-
["examples/jasper_horace.jpeg"],
|
354 |
-
["examples/summer.jpeg"],
|
355 |
-
["examples/mona_diner.png"],
|
356 |
-
["examples/monalisa.png"],
|
357 |
-
["examples/santa.png"],
|
358 |
-
["examples/winter_hiking.png"],
|
359 |
-
["examples/teatime.jpeg"],
|
360 |
-
["examples/news_experts.jpeg"]
|
361 |
-
],
|
362 |
-
fn = infer,
|
363 |
-
inputs = [image_in, chosen_model],
|
364 |
-
examples_per_page = 4
|
365 |
-
)
|
366 |
|
367 |
with gr.Column():
|
368 |
|
@@ -377,6 +392,8 @@ with gr.Blocks(css=css) as demo:
|
|
377 |
result = gr.Audio(
|
378 |
label = "Music"
|
379 |
)
|
|
|
|
|
380 |
|
381 |
|
382 |
chosen_model.change(
|
@@ -388,21 +405,22 @@ with gr.Blocks(css=css) as demo:
|
|
388 |
|
389 |
retry_btn.click(
|
390 |
fn = retry,
|
391 |
-
inputs = [chosen_model, caption],
|
392 |
-
outputs = [result]
|
393 |
)
|
394 |
|
395 |
submit_btn.click(
|
396 |
fn = infer,
|
397 |
inputs = [
|
398 |
-
|
399 |
chosen_model,
|
400 |
check_status
|
401 |
],
|
402 |
outputs =[
|
403 |
caption,
|
404 |
retry_btn,
|
405 |
-
result
|
|
|
406 |
],
|
407 |
concurrency_limit = 4
|
408 |
)
|
|
|
40 |
except :
|
41 |
return "api not ready yet"
|
42 |
|
43 |
+
from moviepy.editor import *
|
44 |
+
import cv2
|
45 |
+
|
46 |
+
def extract_firstframe(video_in):
|
47 |
+
vidcap = cv2.VideoCapture(video_in)
|
48 |
+
success,image = vidcap.read()
|
49 |
+
count = 0
|
50 |
+
while success:
|
51 |
+
if count == 0:
|
52 |
+
cv2.imwrite("first_frame.jpg", image) # save first extracted frame as jpg file named first_frame.jpg
|
53 |
+
else:
|
54 |
+
break # exit loop after saving first frame
|
55 |
+
success,image = vidcap.read()
|
56 |
+
print ('Read a new frame: ', success)
|
57 |
+
count += 1
|
58 |
+
print ("Done extracted first frame!")
|
59 |
+
return "first_frame.jpg"
|
60 |
|
61 |
def extract_audio(video_in):
|
62 |
input_video = video_in
|
|
|
247 |
print(f"SUGGESTED Musical prompt: {cleaned_text}")
|
248 |
return cleaned_text.lstrip("\n")
|
249 |
|
250 |
+
def blend_vmsc(video_in, audio_result):
|
251 |
+
audioClip = AudioFileClip(audio_result)
|
252 |
+
print(f"AUD: {audioClip.duration}")
|
253 |
+
clip = VideoFileClip(video_in)
|
254 |
+
print(f"VID: {clip.duration}")
|
255 |
+
if clip.duration < audioClip.duration :
|
256 |
+
audioClip = audioClip.subclip((0.0), (clip.duration))
|
257 |
+
elif clip.duration > audioClip.duration :
|
258 |
+
clip = clip.subclip((0.0), (audioClip.duration))
|
259 |
+
final_clip = clip.set_audio(audioClip)
|
260 |
+
# Set the output codec
|
261 |
+
codec = 'libx264'
|
262 |
+
audio_codec = 'aac'
|
263 |
+
final_clip.write_videofile('final_video_with_music.mp4', codec=codec, audio_codec=audio_codec)
|
264 |
+
return "final_video_with_music.mp4"
|
265 |
+
|
266 |
+
def infer(video_in, chosen_model, api_status):
|
267 |
+
if video_in == None :
|
268 |
+
raise gr.Error("Please provide a video input")
|
269 |
|
270 |
if chosen_model == [] :
|
271 |
raise gr.Error("Please pick a model")
|
|
|
273 |
if api_status == "api not ready yet" :
|
274 |
raise gr.Error("This model is not ready yet, you can pick another one instead :)")
|
275 |
|
276 |
+
image_in = extract_firstframe(video_in)
|
277 |
+
|
278 |
gr.Info("Getting image caption with Kosmos2...")
|
279 |
user_prompt = get_caption(image_in)
|
280 |
|
|
|
296 |
elif chosen_model == "MusicGen" :
|
297 |
gr.Info("Now calling MusicGen for music...")
|
298 |
music_o = get_musicgen(musical_prompt)
|
|
|
|
|
299 |
|
300 |
+
final_res = blend_vmsc(video_in, music_o)
|
301 |
+
return gr.update(value=musical_prompt, interactive=True), gr.update(visible=True), music_o, final_res
|
302 |
+
|
303 |
+
def retry(video_in, chosen_model, caption):
|
304 |
musical_prompt = caption
|
305 |
|
306 |
if chosen_model == "MAGNet" :
|
|
|
318 |
elif chosen_model == "MusicGen" :
|
319 |
gr.Info("Now calling MusicGen for music...")
|
320 |
music_o = get_musicgen(musical_prompt)
|
321 |
+
final_res = blend_vmsc(video_in, music_o)
|
322 |
+
return music_o, final_res
|
323 |
|
324 |
+
demo_title = "Video to Music"
|
325 |
+
description = "Get music from a video shot, compare text-to-music models"
|
|
|
|
|
326 |
|
327 |
css = """
|
328 |
#col-container {
|
|
|
353 |
with gr.Row():
|
354 |
|
355 |
with gr.Column():
|
356 |
+
video_in = gr.Video(sources=["upload"], label="Video input")
|
|
|
|
|
|
|
|
|
357 |
|
358 |
with gr.Row():
|
359 |
|
|
|
375 |
interactive=False
|
376 |
)
|
377 |
|
378 |
+
submit_btn = gr.Button("Make music from my shot !")
|
379 |
+
|
380 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
381 |
|
382 |
with gr.Column():
|
383 |
|
|
|
392 |
result = gr.Audio(
|
393 |
label = "Music"
|
394 |
)
|
395 |
+
|
396 |
+
video_o = gr.Video(label="Video with soundFX")
|
397 |
|
398 |
|
399 |
chosen_model.change(
|
|
|
405 |
|
406 |
retry_btn.click(
|
407 |
fn = retry,
|
408 |
+
inputs = [video_in, chosen_model, caption],
|
409 |
+
outputs = [result, video_o]
|
410 |
)
|
411 |
|
412 |
submit_btn.click(
|
413 |
fn = infer,
|
414 |
inputs = [
|
415 |
+
video_in,
|
416 |
chosen_model,
|
417 |
check_status
|
418 |
],
|
419 |
outputs =[
|
420 |
caption,
|
421 |
retry_btn,
|
422 |
+
result,
|
423 |
+
video_o
|
424 |
],
|
425 |
concurrency_limit = 4
|
426 |
)
|