import gradio as gr from diffusers import StableVideoDiffusionPipeline, EulerDiscreteScheduler import torch from PIL import Image import tempfile import imageio import spaces # Load the Stable Video Diffusion model model_id = "stabilityai/stable-video-diffusion-img2vid-xt" try: pipe = StableVideoDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16, revision="main") pipe.scheduler = EulerDiscreteScheduler.from_config(pipe.scheduler.config) pipe.to("cuda") except Exception as e: raise RuntimeError(f"Failed to load the model: {e}") @spaces.GPU def generate_video(image, num_frames=25, height=576, width=1024): try: # Convert the image to a format suitable for the pipeline image = Image.open(image) # Generate the video video_frames = pipe(image=image, num_frames=num_frames, height=height, width=width).frames # Save the video frames to a temporary file with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as temp_video: video_path = temp_video.name # Save the frames as a video using imageio imageio.mimsave(video_path, video_frames, fps=30) return video_path except Exception as e: raise RuntimeError(f"Failed to generate the video: {e}") # Create the Gradio interface with gr.Blocks() as demo: gr.Markdown("## Image to Video with Stable Diffusion XT") with gr.Row(): with gr.Column(): image_input = gr.Image(type="filepath", label="Upload Image") num_frames_input = gr.Slider(1, 50, step=1, value=25, label="Number of Frames") height_input = gr.Number(label="Resolution Height", value=576) width_input = gr.Number(label="Resolution Width", value=1024) run_button = gr.Button("Generate Video") with gr.Column(): video_output = gr.Video(label="Generated Video") run_button.click( generate_video, inputs=[image_input, num_frames_input, height_input, width_input], outputs=video_output ) # Launch the interface if __name__ == "__main__": demo.launch()