|
import spaces |
|
import gradio as gr |
|
import torch |
|
from diffusers import StableDiffusion3Pipeline |
|
from huggingface_hub import snapshot_download,login |
|
from transformers import pipeline |
|
from PIL import Image |
|
import os |
|
|
|
|
|
|
|
huggingface_token = os.getenv("HUGGINGFACE_TOKEN") |
|
if huggingface_token is None: |
|
raise ValueError("HUGGINGFACE_TOKEN environment variable is not set.") |
|
|
|
|
|
login(token=huggingface_token) |
|
|
|
|
|
device = "cuda" if torch.cuda.is_available() else "cpu" |
|
|
|
|
|
if device == "cuda": |
|
print("CUDA is available. Using GPU.") |
|
else: |
|
print("CUDA is not available. Using CPU.") |
|
|
|
|
|
model_path = snapshot_download( |
|
repo_id="stabilityai/stable-diffusion-3-medium", |
|
revision="refs/pr/26", |
|
repo_type="model", |
|
ignore_patterns=["*.md", "*.gitattributes"], |
|
local_dir="stable-diffusion-3-medium", |
|
token=huggingface_token |
|
) |
|
image_gen = StableDiffusion3Pipeline.from_pretrained(model_path, text_encoder_3=None, tokenizer_3=None,torch_dtype=torch.float16) |
|
|
|
image_gen = image_gen.to(device) |
|
|
|
|
|
caption_image = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large", device=device) |
|
|
|
|
|
@spaces.GPU(enable_queue=True) |
|
def generate_image_from_caption(image, num_inference_steps=50, guidance_scale=7.5): |
|
|
|
caption = caption_image(image)[0]['generated_text'] |
|
print("Generated Caption:", caption) |
|
|
|
|
|
result = image_gen( |
|
prompt=caption, |
|
num_inference_steps=num_inference_steps, |
|
guidance_scale=guidance_scale, |
|
negative_prompt="blurred, ugly, watermark, low resolution, blurry", |
|
height=512, |
|
width=512 |
|
) |
|
|
|
generated_image = result.images[0] |
|
return generated_image |
|
|
|
|
|
iface = gr.Interface( |
|
fn=generate_image_from_caption, |
|
inputs=[ |
|
gr.Image(type="pil",label="Upload an image"), |
|
gr.Slider(label="Number of inference steps", minimum=1, maximum=100, value=50), |
|
gr.Slider(label="Guidance scale", minimum=1.0, maximum=20.0, value=7.5) |
|
], |
|
outputs=gr.Image(label="Generated Image"), |
|
title="Image-to-Image Generator using Caption", |
|
description="Upload an image to generate a caption, and then use the caption as a prompt to generate a new image using Stable Diffusion." |
|
) |
|
|
|
|
|
iface.launch() |
|
|