import gradio as gr from transformers import pipeline import torch # Load the model and pipeline model_id = "stabilityai/stable-video-diffusion-img2vid-xt" pipe = pipeline("text-to-video-generation", model=model_id, torch_dtype=torch.float16, device="cuda") def generate_video(image, prompt): # Generate the video from the image and prompt video = pipe(prompt, image, num_inference_steps=50, guidance_scale=7.5) return video # Create the Gradio interface iface = gr.Interface( fn=generate_video, inputs=[ gr.Image(type="pil", label="Input Image"), gr.Textbox(lines=2, placeholder="Enter a prompt...", label="Prompt") ], outputs=gr.Video(label="Generated Video"), title="Stable Video Diffusion img2vid-xt", description="Generate a video from an image using the stabilityai/stable-video-diffusion-img2vid-xt model." ) # Launch the interface if __name__ == "__main__": iface.launch()