import gradio as gr
from transformers import pipeline
import torch

# Load the model and pipeline
model_id = "stabilityai/stable-video-diffusion-img2vid-xt"
pipe = pipeline("text-to-video-generation", model=model_id, torch_dtype=torch.float16, device="cuda")

def generate_video(image, prompt):
    # Generate the video from the image and prompt
    video = pipe(prompt, image, num_inference_steps=50, guidance_scale=7.5)
    return video

# Create the Gradio interface
iface = gr.Interface(
    fn=generate_video,
    inputs=[
        gr.Image(type="pil", label="Input Image"),
        gr.Textbox(lines=2, placeholder="Enter a prompt...", label="Prompt")
    ],
    outputs=gr.Video(label="Generated Video"),
    title="Stable Video Diffusion img2vid-xt",
    description="Generate a video from an image using the stabilityai/stable-video-diffusion-img2vid-xt model."
)

# Launch the interface
if __name__ == "__main__":
    iface.launch()