fffiloni commited on
Commit
cd48b2f
1 Parent(s): 7013c77

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +57 -39
app.py CHANGED
@@ -40,8 +40,23 @@ def check_api(model_name):
40
  except :
41
  return "api not ready yet"
42
 
43
- from moviepy.editor import VideoFileClip
44
- from moviepy.audio.AudioClip import AudioClip
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
 
46
  def extract_audio(video_in):
47
  input_video = video_in
@@ -232,9 +247,25 @@ def get_musical_prompt(user_prompt, chosen_model):
232
  print(f"SUGGESTED Musical prompt: {cleaned_text}")
233
  return cleaned_text.lstrip("\n")
234
 
235
- def infer(image_in, chosen_model, api_status):
236
- if image_in == None :
237
- raise gr.Error("Please provide an image input")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
238
 
239
  if chosen_model == [] :
240
  raise gr.Error("Please pick a model")
@@ -242,6 +273,8 @@ def infer(image_in, chosen_model, api_status):
242
  if api_status == "api not ready yet" :
243
  raise gr.Error("This model is not ready yet, you can pick another one instead :)")
244
 
 
 
245
  gr.Info("Getting image caption with Kosmos2...")
246
  user_prompt = get_caption(image_in)
247
 
@@ -263,10 +296,11 @@ def infer(image_in, chosen_model, api_status):
263
  elif chosen_model == "MusicGen" :
264
  gr.Info("Now calling MusicGen for music...")
265
  music_o = get_musicgen(musical_prompt)
266
-
267
- return gr.update(value=musical_prompt, interactive=True), gr.update(visible=True), music_o
268
 
269
- def retry(chosen_model, caption):
 
 
 
270
  musical_prompt = caption
271
 
272
  if chosen_model == "MAGNet" :
@@ -284,11 +318,11 @@ def retry(chosen_model, caption):
284
  elif chosen_model == "MusicGen" :
285
  gr.Info("Now calling MusicGen for music...")
286
  music_o = get_musicgen(musical_prompt)
 
 
287
 
288
- return music_o
289
-
290
- demo_title = "Image to Music V2"
291
- description = "Get music from a picture, compare text-to-music models"
292
 
293
  css = """
294
  #col-container {
@@ -319,11 +353,7 @@ with gr.Blocks(css=css) as demo:
319
  with gr.Row():
320
 
321
  with gr.Column():
322
- image_in = gr.Image(
323
- label = "Image reference",
324
- type = "filepath",
325
- elem_id = "image-in"
326
- )
327
 
328
  with gr.Row():
329
 
@@ -345,24 +375,9 @@ with gr.Blocks(css=css) as demo:
345
  interactive=False
346
  )
347
 
348
- submit_btn = gr.Button("Make music from my pic !")
349
-
350
- gr.Examples(
351
- examples = [
352
- ["examples/ocean_poet.jpeg"],
353
- ["examples/jasper_horace.jpeg"],
354
- ["examples/summer.jpeg"],
355
- ["examples/mona_diner.png"],
356
- ["examples/monalisa.png"],
357
- ["examples/santa.png"],
358
- ["examples/winter_hiking.png"],
359
- ["examples/teatime.jpeg"],
360
- ["examples/news_experts.jpeg"]
361
- ],
362
- fn = infer,
363
- inputs = [image_in, chosen_model],
364
- examples_per_page = 4
365
- )
366
 
367
  with gr.Column():
368
 
@@ -377,6 +392,8 @@ with gr.Blocks(css=css) as demo:
377
  result = gr.Audio(
378
  label = "Music"
379
  )
 
 
380
 
381
 
382
  chosen_model.change(
@@ -388,21 +405,22 @@ with gr.Blocks(css=css) as demo:
388
 
389
  retry_btn.click(
390
  fn = retry,
391
- inputs = [chosen_model, caption],
392
- outputs = [result]
393
  )
394
 
395
  submit_btn.click(
396
  fn = infer,
397
  inputs = [
398
- image_in,
399
  chosen_model,
400
  check_status
401
  ],
402
  outputs =[
403
  caption,
404
  retry_btn,
405
- result
 
406
  ],
407
  concurrency_limit = 4
408
  )
 
40
  except :
41
  return "api not ready yet"
42
 
43
+ from moviepy.editor import *
44
+ import cv2
45
+
46
+ def extract_firstframe(video_in):
47
+ vidcap = cv2.VideoCapture(video_in)
48
+ success,image = vidcap.read()
49
+ count = 0
50
+ while success:
51
+ if count == 0:
52
+ cv2.imwrite("first_frame.jpg", image) # save first extracted frame as jpg file named first_frame.jpg
53
+ else:
54
+ break # exit loop after saving first frame
55
+ success,image = vidcap.read()
56
+ print ('Read a new frame: ', success)
57
+ count += 1
58
+ print ("Done extracted first frame!")
59
+ return "first_frame.jpg"
60
 
61
  def extract_audio(video_in):
62
  input_video = video_in
 
247
  print(f"SUGGESTED Musical prompt: {cleaned_text}")
248
  return cleaned_text.lstrip("\n")
249
 
250
+ def blend_vmsc(video_in, audio_result):
251
+ audioClip = AudioFileClip(audio_result)
252
+ print(f"AUD: {audioClip.duration}")
253
+ clip = VideoFileClip(video_in)
254
+ print(f"VID: {clip.duration}")
255
+ if clip.duration < audioClip.duration :
256
+ audioClip = audioClip.subclip((0.0), (clip.duration))
257
+ elif clip.duration > audioClip.duration :
258
+ clip = clip.subclip((0.0), (audioClip.duration))
259
+ final_clip = clip.set_audio(audioClip)
260
+ # Set the output codec
261
+ codec = 'libx264'
262
+ audio_codec = 'aac'
263
+ final_clip.write_videofile('final_video_with_music.mp4', codec=codec, audio_codec=audio_codec)
264
+ return "final_video_with_music.mp4"
265
+
266
+ def infer(video_in, chosen_model, api_status):
267
+ if video_in == None :
268
+ raise gr.Error("Please provide a video input")
269
 
270
  if chosen_model == [] :
271
  raise gr.Error("Please pick a model")
 
273
  if api_status == "api not ready yet" :
274
  raise gr.Error("This model is not ready yet, you can pick another one instead :)")
275
 
276
+ image_in = extract_firstframe(video_in)
277
+
278
  gr.Info("Getting image caption with Kosmos2...")
279
  user_prompt = get_caption(image_in)
280
 
 
296
  elif chosen_model == "MusicGen" :
297
  gr.Info("Now calling MusicGen for music...")
298
  music_o = get_musicgen(musical_prompt)
 
 
299
 
300
+ final_res = blend_vmsc(video_in, music_o)
301
+ return gr.update(value=musical_prompt, interactive=True), gr.update(visible=True), music_o, final_res
302
+
303
+ def retry(video_in, chosen_model, caption):
304
  musical_prompt = caption
305
 
306
  if chosen_model == "MAGNet" :
 
318
  elif chosen_model == "MusicGen" :
319
  gr.Info("Now calling MusicGen for music...")
320
  music_o = get_musicgen(musical_prompt)
321
+ final_res = blend_vmsc(video_in, music_o)
322
+ return music_o, final_res
323
 
324
+ demo_title = "Video to Music"
325
+ description = "Get music from a video shot, compare text-to-music models"
 
 
326
 
327
  css = """
328
  #col-container {
 
353
  with gr.Row():
354
 
355
  with gr.Column():
356
+ video_in = gr.Video(sources=["upload"], label="Video input")
 
 
 
 
357
 
358
  with gr.Row():
359
 
 
375
  interactive=False
376
  )
377
 
378
+ submit_btn = gr.Button("Make music from my shot !")
379
+
380
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
381
 
382
  with gr.Column():
383
 
 
392
  result = gr.Audio(
393
  label = "Music"
394
  )
395
+
396
+ video_o = gr.Video(label="Video with soundFX")
397
 
398
 
399
  chosen_model.change(
 
405
 
406
  retry_btn.click(
407
  fn = retry,
408
+ inputs = [video_in, chosen_model, caption],
409
+ outputs = [result, video_o]
410
  )
411
 
412
  submit_btn.click(
413
  fn = infer,
414
  inputs = [
415
+ video_in,
416
  chosen_model,
417
  check_status
418
  ],
419
  outputs =[
420
  caption,
421
  retry_btn,
422
+ result,
423
+ video_o
424
  ],
425
  concurrency_limit = 4
426
  )