import gradio as gr
from diffusers import StableVideoDiffusionPipeline, EulerDiscreteScheduler
import torch
from PIL import Image
import tempfile
import imageio
import spaces

# Load the Stable Video Diffusion model
model_id = "stabilityai/stable-video-diffusion-img2vid-xt"
try:
    pipe = StableVideoDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16, revision="main")
    pipe.scheduler = EulerDiscreteScheduler.from_config(pipe.scheduler.config)
    pipe.to("cuda")
except Exception as e:
    raise RuntimeError(f"Failed to load the model: {e}")

@spaces.GPU
def generate_video(image, num_frames=25, height=576, width=1024):
    try:
        # Convert the image to a format suitable for the pipeline
        image = Image.open(image)
        # Generate the video
        video_frames = pipe(image=image, num_frames=num_frames, height=height, width=width).frames
        # Save the video frames to a temporary file
        with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as temp_video:
            video_path = temp_video.name
            # Save the frames as a video using imageio
            imageio.mimsave(video_path, video_frames, fps=30)
        return video_path
    except Exception as e:
        raise RuntimeError(f"Failed to generate the video: {e}")

# Create the Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("## Image to Video with Stable Diffusion XT")
    with gr.Row():
        with gr.Column():
            image_input = gr.Image(type="filepath", label="Upload Image")
            num_frames_input = gr.Slider(1, 50, step=1, value=25, label="Number of Frames")
            height_input = gr.Number(label="Resolution Height", value=576)
            width_input = gr.Number(label="Resolution Width", value=1024)
            run_button = gr.Button("Generate Video")
        with gr.Column():
            video_output = gr.Video(label="Generated Video")

    run_button.click(
        generate_video,
        inputs=[image_input, num_frames_input, height_input, width_input],
        outputs=video_output
    )

# Launch the interface
if __name__ == "__main__":
    demo.launch()