aach456 commited on
Commit
842b913
1 Parent(s): 1bc62c5

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +86 -0
app.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from PIL import Image
3
+ import torch
4
+ import skvideo.io
5
+ from diffusers import I2VGenXLPipeline
6
+ from diffusers.utils import export_to_video, load_image
7
+ import numpy as np
8
+ import imageio
9
+ from moviepy.editor import ImageSequenceClip
10
+ from transformers import MusicgenForConditionalGeneration, AutoProcessor
11
+ from scipy.io import wavfile
12
+ import ffmpeg
13
+
14
+ def generate_video(image, prompt, negative_prompt, video_length):
15
+ generator = torch.manual_seed(8888)
16
+ device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
17
+ print(f"Using device: {device}")
18
+ pipeline = I2VGenXLPipeline.from_pretrained("ali-vilab/i2vgen-xl", torch_dtype=torch.float32)
19
+ pipeline.to(device)
20
+ frames = pipeline(
21
+ prompt=prompt,
22
+ image=image,
23
+ num_inference_steps=2,
24
+ negative_prompt=negative_prompt,
25
+ guidance_scale=9.0,
26
+ generator=generator,
27
+ num_frames=video_length*20
28
+ ).frames[0]
29
+ return frames
30
+
31
+ def export_frames_to_video(frames, output_file):
32
+ frames_np = [np.array(frame) for frame in frames]
33
+ clip = ImageSequenceClip(frames_np, fps=30)
34
+ clip.write_videofile(output_file, codec='libx264', audio=False)
35
+
36
+ def generate_music(prompt, unconditional=False):
37
+ model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small")
38
+ device = "cuda:0" if torch.cuda.is_available() else "cpu"
39
+ model.to(device)
40
+ if unconditional:
41
+ unconditional_inputs = model.get_unconditional_inputs(num_samples=1)
42
+ audio_values = model.generate(**unconditional_inputs, do_sample=True, max_new_tokens=256)
43
+ else:
44
+ processor = AutoProcessor.from_pretrained("facebook/musicgen-small")
45
+ inputs = processor(
46
+ text=prompt,
47
+ padding=True,
48
+ return_tensors="pt",
49
+ )
50
+ audio_values = model.generate(**inputs.to(device), do_sample=True, guidance_scale=3, max_new_tokens=256)
51
+ sampling_rate = model.config.audio_encoder.sampling_rate
52
+ return audio_values[0].cpu().numpy(), sampling_rate
53
+
54
+ def combine_audio_video(audio_file, video_file, output_file):
55
+ audio = ffmpeg.input(audio_file)
56
+ video = ffmpeg.input(video_file)
57
+ output = ffmpeg.output(video, audio, output_file, vcodec='copy', acodec='aac')
58
+ ffmpeg.run(output)
59
+
60
+ st.title("AI-Powered Video and Music Generation")
61
+
62
+ st.sidebar.title("Options")
63
+
64
+ st.sidebar.subheader("Video Generation")
65
+ image = st.sidebar.file_uploader("Upload an image", type=["jpg", "png"])
66
+ prompt = st.sidebar.text_input("Enter the prompt")
67
+ negative_prompt = st.sidebar.text_input("Enter the negative prompt")
68
+ video_length = st.sidebar.number_input("Enter the video length (seconds)", min_value=1, value=10)
69
+
70
+ st.sidebar.subheader("Music Generation")
71
+ music_prompt = st.sidebar.text_input("Enter the music prompt")
72
+ unconditional = st.sidebar.checkbox("Generate unconditional music")
73
+
74
+ if st.sidebar.button("Generate Video and Music"):
75
+ if image is not None:
76
+ image = Image.open(image)
77
+ frames = generate_video(image, prompt, negative_prompt, video_length)
78
+ export_frames_to_video(frames, "output_video.mp4")
79
+ st.video("output_video.mp4")
80
+
81
+ audio_values, sampling_rate = generate_music(music_prompt, unconditional)
82
+ wavfile.write("musicgen_out.wav", sampling_rate, audio_values)
83
+ st.audio("musicgen_out.wav")
84
+
85
+ combine_audio_video("musicgen_out.wav", "output_video.mp4", "combined_output.mp4")
86
+ st.video("combined_output.mp4")