vidsm

Runtime error

App Files Files Community

fffiloni commited on Feb 24, 2024

Commit

cd48b2f

verified ·

1 Parent(s): 7013c77

Update app.py

Browse files

Files changed (1) hide show

app.py +57 -39

app.py CHANGED Viewed

@@ -40,8 +40,23 @@ def check_api(model_name):
         except :
             return "api not ready yet"
-from moviepy.editor import VideoFileClip
-from moviepy.audio.AudioClip import AudioClip
 def extract_audio(video_in):
     input_video = video_in
@@ -232,9 +247,25 @@ def get_musical_prompt(user_prompt, chosen_model):
     print(f"SUGGESTED Musical prompt: {cleaned_text}")
     return cleaned_text.lstrip("\n")
-def infer(image_in, chosen_model, api_status):
-    if image_in == None :
-        raise gr.Error("Please provide an image input")
     if chosen_model == [] :
         raise gr.Error("Please pick a model")
@@ -242,6 +273,8 @@ def infer(image_in, chosen_model, api_status):
     if api_status == "api not ready yet" :
         raise gr.Error("This model is not ready yet, you can pick another one instead :)")
     gr.Info("Getting image caption with Kosmos2...")
     user_prompt = get_caption(image_in)
@@ -263,10 +296,11 @@ def infer(image_in, chosen_model, api_status):
     elif chosen_model == "MusicGen" :
         gr.Info("Now calling MusicGen for music...")
         music_o = get_musicgen(musical_prompt)
-    return gr.update(value=musical_prompt, interactive=True), gr.update(visible=True), music_o
-def retry(chosen_model, caption):
     musical_prompt = caption
     if chosen_model == "MAGNet" :
@@ -284,11 +318,11 @@ def retry(chosen_model, caption):
     elif chosen_model == "MusicGen" :
         gr.Info("Now calling MusicGen for music...")
         music_o = get_musicgen(musical_prompt)
-    return music_o
-demo_title = "Image to Music V2"
-description = "Get music from a picture, compare text-to-music models"
 css = """
 #col-container {
@@ -319,11 +353,7 @@ with gr.Blocks(css=css) as demo:
         with gr.Row():
             with gr.Column():
-                image_in = gr.Image(
-                    label = "Image reference",
-                    type = "filepath",
-                    elem_id = "image-in"
-                )
                 with gr.Row():
@@ -345,24 +375,9 @@ with gr.Blocks(css=css) as demo:
                         interactive=False
                     )
-                submit_btn = gr.Button("Make music from my pic !")
-                gr.Examples(
-                    examples = [
-                        ["examples/ocean_poet.jpeg"],
-                        ["examples/jasper_horace.jpeg"],
-                        ["examples/summer.jpeg"],
-                        ["examples/mona_diner.png"],
-                        ["examples/monalisa.png"],
-                        ["examples/santa.png"],
-                        ["examples/winter_hiking.png"],
-                        ["examples/teatime.jpeg"],
-                        ["examples/news_experts.jpeg"]
-                    ],
-                    fn = infer,
-                    inputs = [image_in, chosen_model],
-                    examples_per_page = 4
-                )
             with gr.Column():
@@ -377,6 +392,8 @@ with gr.Blocks(css=css) as demo:
                 result = gr.Audio(
                     label = "Music"
                 )
     chosen_model.change(
@@ -388,21 +405,22 @@ with gr.Blocks(css=css) as demo:
     retry_btn.click(
         fn = retry,
-        inputs = [chosen_model, caption],
-        outputs = [result]
     )
     submit_btn.click(
         fn = infer,
         inputs = [
-            image_in,
             chosen_model,
             check_status
         ],
         outputs =[
             caption,
             retry_btn,
-            result
         ],
         concurrency_limit = 4
     )

         except :
             return "api not ready yet"
+from moviepy.editor import *
+import cv2
+def extract_firstframe(video_in):
+    vidcap = cv2.VideoCapture(video_in)
+    success,image = vidcap.read()
+    count = 0
+    while success:
+        if count == 0:
+            cv2.imwrite("first_frame.jpg", image)     # save first extracted frame as jpg file named first_frame.jpg
+        else:
+            break   # exit loop after saving first frame
+        success,image = vidcap.read()
+        print ('Read a new frame: ', success)
+        count += 1
+    print ("Done extracted first frame!")
+    return "first_frame.jpg"
 def extract_audio(video_in):
     input_video = video_in
     print(f"SUGGESTED Musical prompt: {cleaned_text}")
     return cleaned_text.lstrip("\n")
+def blend_vmsc(video_in, audio_result):
+    audioClip = AudioFileClip(audio_result)
+    print(f"AUD: {audioClip.duration}")
+    clip = VideoFileClip(video_in)
+    print(f"VID: {clip.duration}")
+    if clip.duration < audioClip.duration :
+        audioClip = audioClip.subclip((0.0), (clip.duration))
+    elif clip.duration > audioClip.duration :
+        clip = clip.subclip((0.0), (audioClip.duration))
+    final_clip = clip.set_audio(audioClip)
+    # Set the output codec
+    codec = 'libx264'
+    audio_codec = 'aac'
+    final_clip.write_videofile('final_video_with_music.mp4', codec=codec, audio_codec=audio_codec)
+    return "final_video_with_music.mp4"
+def infer(video_in, chosen_model, api_status):
+    if video_in == None :
+        raise gr.Error("Please provide a video input")
     if chosen_model == [] :
         raise gr.Error("Please pick a model")
     if api_status == "api not ready yet" :
         raise gr.Error("This model is not ready yet, you can pick another one instead :)")
+    image_in = extract_firstframe(video_in)
     gr.Info("Getting image caption with Kosmos2...")
     user_prompt = get_caption(image_in)
     elif chosen_model == "MusicGen" :
         gr.Info("Now calling MusicGen for music...")
         music_o = get_musicgen(musical_prompt)
+    final_res = blend_vmsc(video_in, music_o)
+    return gr.update(value=musical_prompt, interactive=True), gr.update(visible=True), music_o, final_res
+def retry(video_in, chosen_model, caption):
     musical_prompt = caption
     if chosen_model == "MAGNet" :
     elif chosen_model == "MusicGen" :
         gr.Info("Now calling MusicGen for music...")
         music_o = get_musicgen(musical_prompt)
+    final_res = blend_vmsc(video_in, music_o)
+    return music_o, final_res
+demo_title = "Video to Music"
+description = "Get music from a video shot, compare text-to-music models"
 css = """
 #col-container {
         with gr.Row():
             with gr.Column():
+                video_in = gr.Video(sources=["upload"], label="Video input")
                 with gr.Row():
                         interactive=False
                     )
+                submit_btn = gr.Button("Make music from my shot !")
             with gr.Column():
                 result = gr.Audio(
                     label = "Music"
                 )
+                video_o = gr.Video(label="Video with soundFX")
     chosen_model.change(
     retry_btn.click(
         fn = retry,
+        inputs = [video_in, chosen_model, caption],
+        outputs = [result, video_o]
     )
     submit_btn.click(
         fn = infer,
         inputs = [
+            video_in,
             chosen_model,
             check_status
         ],
         outputs =[
             caption,
             retry_btn,
+            result,
+            video_o
         ],
         concurrency_limit = 4
     )