Spaces:

AP123
/

InstaSoyjak

Runtime error

File size: 2,392 Bytes

2571a09
 
 
 
9191b3d
2571a09
18806cb
c193464
cc6fea7
9191b3d
 
 
 
 
2571a09
b40804f
 
9191b3d
 
b40804f
 
9191b3d
 
a826a95
18806cb
cc6fea7
9191b3d
2571a09
 
 
 
 
 
 
10ca3e2
18806cb
9191b3d
2571a09
cc6fea7
 
 
2571a09
 
9191b3d
2571a09
1b22bd4
2571a09
 
 
f32daf2
2571a09
cc6fea7
2571a09
 
7ebe221
 
 
086bd3f
7ebe221
2571a09
 
cc6fea7
2571a09
 
cc6fea7
1364b7b

import gradio as gr
import torch
from PIL import Image
from diffusers import AutoPipelineForText2Image, DDIMScheduler
from transformers import CLIPVisionModelWithProjection
import numpy as np
import spaces

# Initialize the image encoder and pipeline outside the function
image_encoder = CLIPVisionModelWithProjection.from_pretrained(
    "h94/IP-Adapter",
    subfolder="models/image_encoder",
    torch_dtype=torch.float16,
)

pipeline = AutoPipelineForText2Image.from_pretrained(
    "stabilityai/stable-diffusion-xl-base-1.0",
    torch_dtype=torch.float16,
    image_encoder=image_encoder,
)
pipeline.scheduler = DDIMScheduler.from_config(pipeline.scheduler.config)
pipeline.load_ip_adapter("h94/IP-Adapter", subfolder="sdxl_models", weight_name=["ip-adapter-plus_sdxl_vit-h.safetensors", "ip-adapter-plus-face_sdxl_vit-h.safetensors"])
pipeline.enable_model_cpu_offload()

@spaces.GPU
def transform_image(face_image, soy_strength, face_strength):
    generator = torch.Generator(device="cpu").manual_seed(0)

    if isinstance(face_image, Image.Image):
        processed_face_image = face_image
    elif isinstance(face_image, np.ndarray):
        processed_face_image = Image.fromarray(face_image)
    else:
        raise ValueError("Unsupported image format")

    style_image_path = "examples/soyjak2.jpg"
    style_image = Image.open(style_image_path)

    # Set the IP adapter scale dynamically based on the sliders
    pipeline.set_ip_adapter_scale([soy_strength, face_strength])

    image = pipeline(
        prompt="soyjak",
        ip_adapter_image=[style_image, processed_face_image],
        negative_prompt="monochrome, lowres, bad anatomy, worst quality, low quality",
        num_inference_steps=50,
        generator=generator,
    ).images[0]

    return image

# Gradio interface setup with dynamic sliders
demo = gr.Interface(
    fn=transform_image,
    inputs=[
        gr.Image(label="Upload your face image"),
        gr.Slider(minimum=0, maximum=1, step=0.05, value=0.7, label="Soy Strength"),
        gr.Slider(minimum=0, maximum=1, step=0.05, value=1.0, label="Face Strength")  # Renamed to Face Strength
    ],
    outputs=gr.Image(label="Your Soyjak"),
    title="InstaSoyjak - turn anyone into a Soyjak",
    description="All you need to do is upload an image and adjust the strengths. **Please use responsibly.**",
)

demo.queue(max_size=20)
demo.launch()