Spaces:
Runtime error
Runtime error
File size: 6,510 Bytes
5c6a167 9ac31b8 3b06696 775329e 9ac31b8 3b06696 a9235bb 25d3956 e3d310b dc2eed4 775329e dc2eed4 9ac31b8 775329e d56d267 9ac31b8 8010ebe 9ac31b8 d56d267 dc2eed4 d56d267 0cd72ee 9ac31b8 25d3956 3b06696 492fffc 3b06696 9ac31b8 d56d267 3b06696 9ac31b8 25d3956 9ac31b8 d56d267 9ac31b8 0cd72ee 5c6a167 9ac31b8 0cd72ee 3b06696 0cd72ee d56d267 3b06696 d56d267 3b06696 d56d267 ff46702 d56d267 dc2eed4 25d3956 0cd72ee 25d3956 c8d4706 3780d1e 492fffc 25d3956 d56d267 492fffc 9ac31b8 d56d267 decf237 ba7dc43 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 |
import spaces
import gradio as gr
import torch
import os
from glob import glob
from pathlib import Path
from typing import Optional
from huggingface_hub import HfFolder
from diffusers import StableVideoDiffusionPipeline
from diffusers.utils import load_image, export_to_video
from PIL import Image
import uuid
import random
from huggingface_hub import hf_hub_download
title = '''# ๐๐ปWelcome to Tonic's๐๐ฅStableVideo XT-1-1
๐๐ฅStableVideo XT-1-1 (SVD) Image-to-Video is a latent diffusion model trained to generate short video clips from an image conditioning. Check out the [Community demo for Stable Video Diffusion](https://huggingface.co/spaces/multimodalart/stable-video-diffusion) - Img2Vid - XT ([model](https://huggingface.co/stabilityai/stable-video-diffusion-img2vid-xt-1-1), [paper](https://stability.ai/research/stable-video-diffusion-scaling-latent-video-diffusion-models-to-large-datasets), [stability's ui waitlist](https://stability.ai/contact))
#### Research release ([_non-commercial_](https://huggingface.co/stabilityai/stable-video-diffusion-img2vid-xt/blob/main/LICENSE)): generate `4s` vid from a single image at (`25 frames` at `6 fps`). this demo uses [๐งจ diffusers for low VRAM and fast generation](https://huggingface.co/docs/diffusers/main/en/using-diffusers/svd).
Join us : ๐TeamTonic๐ is always making cool demos! Join our active builder's๐ ๏ธcommunity ๐ป [![Join us on Discord](https://img.shields.io/discord/1109943800132010065?label=Discord&logo=discord&style=flat-square)](https://discord.gg/GWpVpekp) On ๐คHuggingface: [TeamTonic](https://huggingface.co/TeamTonic) & [MultiTransformer](https://huggingface.co/MultiTransformer) On ๐Github: [Tonic-AI](https://github.com/tonic-ai) & contribute to ๐ [SciTonic](https://github.com/Tonic-AI/scitonic) ๐คBig thanks to Yuvi Sharma and all the folks at huggingface for the community grant ๐ค
'''
# Load the API token from an environment variable
hf_token = os.getenv("HF_TOKEN")
# If the token is not found, raise an error or handle it appropriately
if not hf_token:
raise ValueError("Hugging Face token not found. Please set the HF_TOKEN environment variable.")
# Use the token for authentication
HfFolder.save_token(hf_token)
pipe = StableVideoDiffusionPipeline.from_pretrained(
"stabilityai/stable-video-diffusion-img2vid-xt-1-1", torch_dtype=torch.float16, variant="fp16", use_auth_token=hf_token
)
pipe.to("cuda")
pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
#pipe.vae = torch.compile(pipe.vae, mode="reduce-overhead", fullgraph=True)
max_64_bit_int = 2**63 - 1
@spaces.GPU(enable_queue=True)
def sample(
image: Image,
seed: Optional[int] = 42,
randomize_seed: bool = True,
motion_bucket_id: int = 127,
fps_id: int = 6,
version: str = "svd_xt",
cond_aug: float = 0.02,
decoding_t: int = 3, # Number of frames decoded at a time! This eats most VRAM. Reduce if necessary.
device: str = "cuda",
output_folder: str = "outputs",
):
if image.mode == "RGBA":
image = image.convert("RGB")
if(randomize_seed):
seed = random.randint(0, max_64_bit_int)
generator = torch.manual_seed(seed)
os.makedirs(output_folder, exist_ok=True)
base_count = len(glob(os.path.join(output_folder, "*.mp4")))
video_path = os.path.join(output_folder, f"{base_count:06d}.mp4")
frames = pipe(image, decode_chunk_size=decoding_t, generator=generator, motion_bucket_id=motion_bucket_id, noise_aug_strength=0.1, num_frames=14).frames[0]
export_to_video(frames, video_path, fps=fps_id)
torch.manual_seed(seed)
return video_path, seed
def resize_image(image, output_size=(1024, 576)):
# Calculate aspect ratios
target_aspect = output_size[0] / output_size[1] # Aspect ratio of the desired size
image_aspect = image.width / image.height # Aspect ratio of the original image
# Resize then crop if the original image is larger
if image_aspect > target_aspect:
# Resize the image to match the target height, maintaining aspect ratio
new_height = output_size[1]
new_width = int(new_height * image_aspect)
resized_image = image.resize((new_width, new_height), Image.LANCZOS)
# Calculate coordinates for cropping
left = (new_width - output_size[0]) / 2
top = 0
right = (new_width + output_size[0]) / 2
bottom = output_size[1]
else:
# Resize the image to match the target width, maintaining aspect ratio
new_width = output_size[0]
new_height = int(new_width / image_aspect)
resized_image = image.resize((new_width, new_height), Image.LANCZOS)
# Calculate coordinates for cropping
left = 0
top = (new_height - output_size[1]) / 2
right = output_size[0]
bottom = (new_height + output_size[1]) / 2
# Crop the image
cropped_image = resized_image.crop((left, top, right, bottom))
return cropped_image
with gr.Blocks() as demo:
gr.Markdown(title)
with gr.Row():
with gr.Column():
image = gr.Image(label="Upload your image", type="pil")
generate_btn = gr.Button("Generate")
video = gr.Video()
with gr.Accordion("Advanced options", open=False):
seed = gr.Slider(label="Seed", value=42, randomize=True, minimum=0, maximum=max_64_bit_int, step=1)
randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
motion_bucket_id = gr.Slider(label="Motion bucket id", info="Controls how much motion to add/remove from the image", value=127, minimum=1, maximum=255)
fps_id = gr.Slider(label="Frames per second", info="The length of your video in seconds will be 25/fps", value=6, minimum=5, maximum=30)
image.upload(fn=resize_image, inputs=image, outputs=image, queue=False)
generate_btn.click(fn=sample, inputs=[image, seed, randomize_seed, motion_bucket_id, fps_id], outputs=[video, seed], api_name="video")
gr.Examples(
examples=[
"images/blink_meme.png",
"images/confused2_meme.png",
"images/disaster_meme.png",
"images/distracted_meme.png",
"images/hide_meme.png",
"images/nazare_meme.png",
"images/success_meme.png",
"images/willy_meme.png",
"images/wink_meme.png"
],
inputs=image,
outputs=[video, seed],
fn=sample,
cache_examples=True,
)
if __name__ == "__main__":
demo.queue(max_size=20)
demo.launch(share=True) |