Spaces:

Walid-Ahmed
/

image_gen_from_src_image

Sleeping

App Files Files Community

image_gen_from_src_image / app.py

Walid-Ahmed

Update app.py

f985251 verified 8 months ago

raw

history blame contribute delete

2.58 kB

	import spaces
	import gradio as gr
	import torch
	from diffusers import StableDiffusion3Pipeline
	from huggingface_hub import snapshot_download,login
	from transformers import pipeline
	from PIL import Image
	import os


	# Retrieve the API token from the environment variable
	huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
	if huggingface_token is None:
	raise ValueError("HUGGINGFACE_TOKEN environment variable is not set.")

	# Log in to Hugging Face
	login(token=huggingface_token)

	# Check if CUDA is available
	device = "cuda" if torch.cuda.is_available() else "cpu"

	# Ensure GPU is available
	if device == "cuda":
	print("CUDA is available. Using GPU.")
	else:
	print("CUDA is not available. Using CPU.")

	# Download and load the Stable Diffusion model
	model_path = snapshot_download(
	repo_id="stabilityai/stable-diffusion-3-medium",
	revision="refs/pr/26",
	repo_type="model",
	ignore_patterns=[".md", ".gitattributes"],
	local_dir="stable-diffusion-3-medium",
	token=huggingface_token
	)
	image_gen = StableDiffusion3Pipeline.from_pretrained(model_path, text_encoder_3=None, tokenizer_3=None,torch_dtype=torch.float16)

	image_gen = image_gen.to(device)

	# Load the image-to-text pipeline
	caption_image = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large", device=device)


	@spaces.GPU(enable_queue=True)
	def generate_image_from_caption(image, num_inference_steps=50, guidance_scale=7.5):
	# Generate the caption
	caption = caption_image(image)[0]['generated_text']
	print("Generated Caption:", caption)

	# Generate the image from the caption
	result = image_gen(
	prompt=caption,
	num_inference_steps=num_inference_steps,
	guidance_scale=guidance_scale,
	negative_prompt="blurred, ugly, watermark, low resolution, blurry",
	height=512,
	width=512
	)
	# Get the generated image
	generated_image = result.images[0]
	return generated_image

	# Create the Gradio interface
	iface = gr.Interface(
	fn=generate_image_from_caption,
	inputs=[
	gr.Image(type="pil",label="Upload an image"),
	gr.Slider(label="Number of inference steps", minimum=1, maximum=100, value=50),
	gr.Slider(label="Guidance scale", minimum=1.0, maximum=20.0, value=7.5)
	],
	outputs=gr.Image(label="Generated Image"),
	title="Image-to-Image Generator using Caption",
	description="Upload an image to generate a caption, and then use the caption as a prompt to generate a new image using Stable Diffusion."
	)

	# Launch the Gradio app
	iface.launch()