import spaces import gradio as gr import numpy as np import torch from toonmage import attention_processor as attention from toonmage.pipeline import ToonMagePipeline from toonmage.utils import resize_numpy_image_long, seed_everything torch.set_grad_enabled(False) pipeline = PuLIDPipeline() # other params DEFAULT_NEGATIVE_PROMPT = ( 'cross-eyed, blurry, deformed eyeballs, deformed, deformed or partially rendered eyes, partially rendered objects, low resolution, disfigured hands, ugly, mutated, glitch,' 'watermark, text, artifacts noise, worst quality, low quality, non-HDRi, lowres, flaws, flaws in the face, flaws in the eyes, extra limbs, signature' ) @spaces.GPU def run(*args): id_image = args[0] supp_images = args[1:4] prompt, neg_prompt, scale, n_samples, seed, steps, H, W, id_scale, mode, id_mix = args[4:] pipeline.debug_img_list = [] if mode == 'fidelity': attention.NUM_ZERO = 8 attention.ORTHO = False attention.ORTHO_v2 = True elif mode == 'extremely style': attention.NUM_ZERO = 16 attention.ORTHO = True attention.ORTHO_v2 = False else: raise ValueError if id_image is not None: id_image = resize_numpy_image_long(id_image, 1024) id_embeddings = pipeline.get_id_embedding(id_image) for supp_id_image in supp_images: if supp_id_image is not None: supp_id_image = resize_numpy_image_long(supp_id_image, 1024) supp_id_embeddings = pipeline.get_id_embedding(supp_id_image) id_embeddings = torch.cat( (id_embeddings, supp_id_embeddings if id_mix else supp_id_embeddings[:, :5]), dim=1 ) else: id_embeddings = None seed_everything(seed) ims = [] for _ in range(n_samples): img = pipeline.inference(prompt, (1, H, W), neg_prompt, id_embeddings, id_scale, scale, steps)[0] ims.append(np.array(img)) return ims, pipeline.debug_img_list _MARKDOWN_ = """ This demo utilizes FLUX Pipeline for Image to Image Translation **Tips** - Smaller value of timestep to start inserting ID would lead to higher fidelity, however, it will reduce the editability; and vice versa. Its value range is from 0 - 4. If you want to generate a stylized scene; use the value of 0 - 1. If you want to generate a photorealistic image; use the value of 4. -It is recommended to use fake CFG by setting the true CFG scale value to 1 while you can vary the guidance scale. However, in a few cases, utilizing a true CFG can yield better results. Try out with different prompts using your image and do provide your feedback. **Demo by [Sunder Ali Khowaja](https://sander-ali.github.io) - [X](https://x.com/SunderAKhowaja) -[Github](https://github.com/sander-ali) -[Hugging Face](https://huggingface.co/SunderAli17)** """ theme = gr.themes.Soft( font=[gr.themes.GoogleFont('Source Code Pro'), gr.themes.GoogleFont('Public Sans'), 'system-ui', 'sans-serif'], ) js_func = """ function refresh() { const url = new URL(window.location); if (url.searchParams.get('__theme') !== 'dark') { url.searchParams.set('__theme', 'dark'); window.location.href = url.href; } } """ with gr.Blocks(title="ToonMagev2", js = js_func, theme = theme") as SAK: gr.Markdown(_MARKDOWN_) with gr.Row(): with gr.Column(): with gr.Row(): face_image = gr.Image(label="ID image (main)", sources="upload", type="numpy", height=256) supp_image1 = gr.Image( label="Additional ID image (auxiliary)", sources="upload", type="numpy", height=256 ) supp_image2 = gr.Image( label="Additional ID image (auxiliary)", sources="upload", type="numpy", height=256 ) supp_image3 = gr.Image( label="Additional ID image (auxiliary)", sources="upload", type="numpy", height=256 ) prompt = gr.Textbox(label="Prompt", value='portrait,cinematic,wolf ears,white hair') submit = gr.Button("Generate") neg_prompt = gr.Textbox(label="Negative Prompt", value=DEFAULT_NEGATIVE_PROMPT) scale = gr.Slider( label="CFG, recommend value range [1, 1.5], 1 will be faster ", value=1.2, minimum=1, maximum=1.5, step=0.1, ) n_samples = gr.Slider(label="Num samples", value=4, minimum=1, maximum=4, step=1) seed = gr.Slider( label="Seed", value=42, minimum=np.iinfo(np.uint32).min, maximum=np.iinfo(np.uint32).max, step=1 ) steps = gr.Slider(label="Steps", value=4, minimum=1, maximum=8, step=1) with gr.Row(): H = gr.Slider(label="Height", value=1024, minimum=512, maximum=1280, step=64) W = gr.Slider(label="Width", value=768, minimum=512, maximum=1280, step=64) with gr.Row(): id_scale = gr.Slider(label="ID scale", minimum=0, maximum=5, step=0.05, value=0.8, interactive=True) mode = gr.Dropdown(label="mode", choices=['fidelity', 'extremely style'], value='fidelity') id_mix = gr.Checkbox( label="ID Mix (if you want to mix two ID image, please turn this on, otherwise, turn this off)", value=False, ) gr.Markdown("## Examples") example_inps = [ [ 'portrait,cinematic,wolf ears,white hair', 'sample_img/sample_img_test24.jpg', 'fidelity', ] ] gr.Examples(examples=example_inps, inputs=[prompt, face_image, mode], label='realistic') example_inps = [ [ 'portrait, impressionist painting, loose brushwork, vibrant color, light and shadow play', 'sample_img/sample_img_test1.jpg', 'fidelity', ] ] gr.Examples(examples=example_inps, inputs=[prompt, face_image, mode], label='painting style') example_inps = [ [ 'portrait, flat papercut style, silhouette, clean cuts, paper, sharp edges, minimalist,color block,man', 'sample_img/lecun.jpg', 'fidelity', ] ] gr.Examples(examples=example_inps, inputs=[prompt, face_image, mode], label='papercut style') example_inps = [ [ 'woman,cartoon,solo,Popmart Blind Box, Super Mario, 3d', 'sample_img/sample_img_test24.jpg', 'fidelity', ] ] gr.Examples(examples=example_inps, inputs=[prompt, face_image, mode], label='3d style') example_inps = [ [ 'portrait, the legend of zelda, anime', 'sample_img/image1.png', 'extremely style', ] ] gr.Examples(examples=example_inps, inputs=[prompt, face_image, mode], label='anime style') example_inps = [ [ 'portrait, superman', 'sample_img/lecun.jpg', 'sample_img/sample_img_test1.jpg', 'fidelity', True, ] ] gr.Examples(examples=example_inps, inputs=[prompt, face_image, supp_image1, mode, id_mix], label='id mix') with gr.Column(): output = gr.Gallery(label='Output', elem_id="gallery") intermediate_output = gr.Gallery(label='DebugImage', elem_id="gallery", visible=False) gr.Markdown(_CITE_) inps = [ face_image, supp_image1, supp_image2, supp_image3, prompt, neg_prompt, scale, n_samples, seed, steps, H, W, id_scale, mode, id_mix, ] submit.click(fn=run, inputs=inps, outputs=[output, intermediate_output]) demo.launch()