img2img-turbo-sketch

Sleeping

App Files Files Community

gaparmar commited on Mar 18

Commit

13ed5cd

•

1 Parent(s): d3864b2

sketch demo

Browse files

Files changed (2) hide show

app.py +4 -8
src/pix2pix_turbo.py +37 -24

app.py CHANGED Viewed

@@ -1,7 +1,3 @@
-"""
-3.43.1
-"""
 import os
 import sys
 import pdb
@@ -78,7 +74,8 @@ def run(image, prompt, prompt_template, style_name, seed, val_r):
     print("sketch updated")
     if image is None:
         ones = Image.new("L", (512, 512), 255)
-        return ones
     prompt = prompt_template.replace("{prompt}", prompt)
     image = image.convert("RGB")
     image_t = TF.to_tensor(image) > 0.5
@@ -234,8 +231,8 @@ with gr.Blocks(css="style.css") as demo:
                 <div class="pad2"> <button href="TODO" download="image" id="my-button-down" onclick='return theSketchDownloadFunction()'></button> </div>
             </div>
             """)
-            gr.Markdown("## Prompt", elem_id="tools_header")
-            prompt = gr.Textbox(label=None, value="", show_label=False)
             with gr.Row():
                 style = gr.Dropdown(label="Style", choices=STYLE_NAMES, value=DEFAULT_STYLE_NAME, scale=1)
                 prompt_temp = gr.Textbox(label="Prompt Style Template", value=styles[DEFAULT_STYLE_NAME], scale=2, max_lines=1)
@@ -269,4 +266,3 @@ with gr.Blocks(css="style.css") as demo:
 if __name__ == "__main__":
     demo.queue().launch(debug=True)

 import os
 import sys
 import pdb
     print("sketch updated")
     if image is None:
         ones = Image.new("L", (512, 512), 255)
+        temp_uri = pil_image_to_data_uri(ones)
+        return ones, gr.update(link=temp_uri), gr.update(link=temp_uri)
     prompt = prompt_template.replace("{prompt}", prompt)
     image = image.convert("RGB")
     image_t = TF.to_tensor(image) > 0.5
                 <div class="pad2"> <button href="TODO" download="image" id="my-button-down" onclick='return theSketchDownloadFunction()'></button> </div>
             </div>
             """)
+            # gr.Markdown("## Prompt", elem_id="tools_header")
+            prompt = gr.Textbox(label="Prompt", value="", show_label=True)
             with gr.Row():
                 style = gr.Dropdown(label="Style", choices=STYLE_NAMES, value=DEFAULT_STYLE_NAME, scale=1)
                 prompt_temp = gr.Textbox(label="Prompt Style Template", value=styles[DEFAULT_STYLE_NAME], scale=2, max_lines=1)
 if __name__ == "__main__":
     demo.queue().launch(debug=True)

src/pix2pix_turbo.py CHANGED Viewed

@@ -1,4 +1,6 @@
-import os, requests
 import pdb
 import copy
 from tqdm import tqdm
@@ -7,11 +9,13 @@ from transformers import AutoTokenizer, PretrainedConfig, CLIPTextModel
 from diffusers import AutoencoderKL, UNet2DConditionModel, DDPMScheduler
 from diffusers.utils.peft_utils import set_weights_and_activate_adapters
 from peft import LoraConfig
-from .model import make_1step_sched
 def my_vae_encoder_fwd(self, sample):
-    r"""The forward method of the `Encoder` class."""
     sample = self.conv_in(sample)
     l_blocks = []
     # down
@@ -27,6 +31,7 @@ def my_vae_encoder_fwd(self, sample):
     return sample
 def my_vae_decoder_fwd(self,sample, latent_embeds = None):
     sample = self.conv_in(sample)
     upscale_dtype = next(iter(self.up_blocks.parameters())).dtype
@@ -76,21 +81,33 @@ class Pix2Pix_Turbo(torch.nn.Module):
         vae = AutoencoderKL.from_pretrained("stabilityai/sd-turbo", subfolder="vae")
         unet = UNet2DConditionModel.from_pretrained("stabilityai/sd-turbo", subfolder="unet")
-        if name=="canny_to_image":
-            lora_rank = 8
-            P_UNET_SD="/home/gparmar/code/single_step_translation/output/paired/canny_canny_midjourney_512_512/sd21_turbo_direct_edge_withskip_opt_lora_8_proj/l2_lpips_gan_vagan_clip_224_patch_multilevel_sigmoid/lr_5e-5_l2_0.25_lpips_1_0.1_CLIPSIM_1.0/1node_8gpu_no_BS_1_GRAD_ACC_2/checkpoint-7501/unet_sd.pkl"
-            P_VAE_ENC_SD="/home/gparmar/code/single_step_translation/output/paired/canny_canny_midjourney_512_512/sd21_turbo_direct_edge_withskip_opt_lora_8_proj/l2_lpips_gan_vagan_clip_224_patch_multilevel_sigmoid/lr_5e-5_l2_0.25_lpips_1_0.1_CLIPSIM_1.0/1node_8gpu_no_BS_1_GRAD_ACC_2/checkpoint-7501/sd_vae_enc.pkl"
-            P_VAE_DEC_SD="/home/gparmar/code/single_step_translation/output/paired/canny_canny_midjourney_512_512/sd21_turbo_direct_edge_withskip_opt_lora_8_proj/l2_lpips_gan_vagan_clip_224_patch_multilevel_sigmoid/lr_5e-5_l2_0.25_lpips_1_0.1_CLIPSIM_1.0/1node_8gpu_no_BS_1_GRAD_ACC_2/checkpoint-7501/sd_vae_dec.pkl"
-            unet_lora_config = LoraConfig(r=lora_rank, init_lora_weights="gaussian", target_modules=[
-                "to_k", "to_q", "to_v", "to_out.0", "conv", "conv1", "conv2", "conv_shortcut", "conv_out",
-                "proj_in", "proj_out", "ff.net.2", "ff.net.0.proj"]
-            )
         if name=="sketch_to_image_stochastic":
             # download from url
-            url = "https://www.cs.cmu.edu/~clean-fid/tmp/img2img_turbo/ckpt/sketch_to_image_stochastic.pkl"
             os.makedirs(ckpt_folder, exist_ok=True)
-            outf = os.path.join(ckpt_folder, "sketch_to_image_stochastic.pkl")
             if not os.path.exists(outf):
                 print(f"Downloading checkpoint to {outf}")
                 response = requests.get(url, stream=True)
@@ -105,7 +122,6 @@ class Pix2Pix_Turbo(torch.nn.Module):
                 if total_size_in_bytes != 0 and progress_bar.n != total_size_in_bytes:
                     print("ERROR, something went wrong")
                 print(f"Downloaded successfully to {outf}")
-            # p_ckpt = "/home/gparmar/code/img2img-turbo/single_step_translation/notebooks/DEMO/sketch_to_image_stochastic.pkl"
             p_ckpt = outf
             sd = torch.load(p_ckpt, map_location="cpu")
             unet_lora_config = LoraConfig(r=sd["rank_unet"], init_lora_weights="gaussian", target_modules=sd["unet_lora_target_modules"])
@@ -123,15 +139,17 @@ class Pix2Pix_Turbo(torch.nn.Module):
         vae.decoder.ignore_skip = False
         vae.add_adapter(vae_lora_config, adapter_name="vae_skip")
         unet.add_adapter(unet_lora_config)
-        unet.load_state_dict(sd["state_dict_unet"])
         unet.enable_xformers_memory_efficient_attention()
-        vae.load_state_dict(sd["state_dict_vae"])
         unet.to("cuda")
         vae.to("cuda")
         unet.eval()
         vae.eval()
         self.unet, self.vae = unet, vae
         self.timesteps = torch.tensor([999], device="cuda").long()
@@ -141,7 +159,6 @@ class Pix2Pix_Turbo(torch.nn.Module):
         caption_tokens = self.tokenizer(prompt, max_length=self.tokenizer.model_max_length,
                 padding="max_length", truncation=True, return_tensors="pt").input_ids.cuda()
         caption_enc = self.text_encoder(caption_tokens)[0]
         if deterministic:
             encoded_control = self.vae.encode(c_t).latent_dist.sample()*self.vae.config.scaling_factor
             model_pred = self.unet(encoded_control, self.timesteps, encoder_hidden_states=caption_enc,).sample
@@ -161,8 +178,4 @@ class Pix2Pix_Turbo(torch.nn.Module):
             x_denoised = self.sched.step(unet_output, self.timesteps, unet_input, return_dict=True).prev_sample
             self.vae.decoder.incoming_skip_acts = self.vae.encoder.current_down_blocks
             output_image = (self.vae.decode(x_denoised / self.vae.config.scaling_factor ).sample).clamp(-1,1)
         return output_image

+import os
+import requests
+import sys
 import pdb
 import copy
 from tqdm import tqdm
 from diffusers import AutoencoderKL, UNet2DConditionModel, DDPMScheduler
 from diffusers.utils.peft_utils import set_weights_and_activate_adapters
 from peft import LoraConfig
+p = "src/"
+sys.path.append(p)
+from model import make_1step_sched
+"""The forward method of the `Encoder` class."""
 def my_vae_encoder_fwd(self, sample):
     sample = self.conv_in(sample)
     l_blocks = []
     # down
     return sample
+"""The forward method of the `Decoder` class."""
 def my_vae_decoder_fwd(self,sample, latent_embeds = None):
     sample = self.conv_in(sample)
     upscale_dtype = next(iter(self.up_blocks.parameters())).dtype
         vae = AutoencoderKL.from_pretrained("stabilityai/sd-turbo", subfolder="vae")
         unet = UNet2DConditionModel.from_pretrained("stabilityai/sd-turbo", subfolder="unet")
+        if name=="edge_to_image":
+            url = "https://www.cs.cmu.edu/~img2img-turbo/models/edge_to_image_loras.pkl"
+            os.makedirs(ckpt_folder, exist_ok=True)
+            outf = os.path.join(ckpt_folder, "edge_to_image_loras.pkl")
+            if not os.path.exists(outf):
+                print(f"Downloading checkpoint to {outf}")
+                response = requests.get(url, stream=True)
+                total_size_in_bytes= int(response.headers.get('content-length', 0))
+                block_size = 1024  # 1 Kibibyte
+                progress_bar = tqdm(total=total_size_in_bytes, unit='iB', unit_scale=True)
+                with open(outf, 'wb') as file:
+                    for data in response.iter_content(block_size):
+                        progress_bar.update(len(data))
+                        file.write(data)
+                progress_bar.close()
+                if total_size_in_bytes != 0 and progress_bar.n != total_size_in_bytes:
+                    print("ERROR, something went wrong")
+                print(f"Downloaded successfully to {outf}")
+            p_ckpt = outf
+            sd = torch.load(p_ckpt, map_location="cpu")
+            unet_lora_config = LoraConfig(r=sd["rank_unet"], init_lora_weights="gaussian", target_modules=sd["unet_lora_target_modules"])
         if name=="sketch_to_image_stochastic":
             # download from url
+            url = "https://www.cs.cmu.edu/~img2img-turbo/models/sketch_to_image_stochastic_lora.pkl"
             os.makedirs(ckpt_folder, exist_ok=True)
+            outf = os.path.join(ckpt_folder, "sketch_to_image_stochastic_lora.pkl")
             if not os.path.exists(outf):
                 print(f"Downloading checkpoint to {outf}")
                 response = requests.get(url, stream=True)
                 if total_size_in_bytes != 0 and progress_bar.n != total_size_in_bytes:
                     print("ERROR, something went wrong")
                 print(f"Downloaded successfully to {outf}")
             p_ckpt = outf
             sd = torch.load(p_ckpt, map_location="cpu")
             unet_lora_config = LoraConfig(r=sd["rank_unet"], init_lora_weights="gaussian", target_modules=sd["unet_lora_target_modules"])
         vae.decoder.ignore_skip = False
         vae.add_adapter(vae_lora_config, adapter_name="vae_skip")
         unet.add_adapter(unet_lora_config)
+        _sd_unet = unet.state_dict()
+        for k in sd["state_dict_unet"]: _sd_unet[k] = sd["state_dict_unet"][k]
+        unet.load_state_dict(_sd_unet)
         unet.enable_xformers_memory_efficient_attention()
+        _sd_vae = vae.state_dict()
+        for k in sd["state_dict_vae"]: _sd_vae[k] = sd["state_dict_vae"][k]
+        vae.load_state_dict(_sd_vae)
         unet.to("cuda")
         vae.to("cuda")
         unet.eval()
         vae.eval()
         self.unet, self.vae = unet, vae
         self.timesteps = torch.tensor([999], device="cuda").long()
         caption_tokens = self.tokenizer(prompt, max_length=self.tokenizer.model_max_length,
                 padding="max_length", truncation=True, return_tensors="pt").input_ids.cuda()
         caption_enc = self.text_encoder(caption_tokens)[0]
         if deterministic:
             encoded_control = self.vae.encode(c_t).latent_dist.sample()*self.vae.config.scaling_factor
             model_pred = self.unet(encoded_control, self.timesteps, encoder_hidden_states=caption_enc,).sample
             x_denoised = self.sched.step(unet_output, self.timesteps, unet_input, return_dict=True).prev_sample
             self.vae.decoder.incoming_skip_acts = self.vae.encoder.current_down_blocks
             output_image = (self.vae.decode(x_denoised / self.vae.config.scaling_factor ).sample).clamp(-1,1)
         return output_image