Spaces:

Prgckwb
/

sd-attention-visualizer

Runtime error

App Files Files Community

Prgckwb commited on Jun 9, 2024

Commit

6a91e71

1 Parent(s): 3c9c988

:tada: init

Browse files

Files changed (2) hide show

app.py +23 -7
assets/ramen.jpg +0 -0

app.py CHANGED Viewed

@@ -7,6 +7,7 @@ import gradio as gr
 import matplotlib.pyplot as plt
 import numpy as np
 import torch
 import torch.nn.functional as F
 from PIL import Image
 from pathlib import Path
@@ -188,7 +189,11 @@ unet.set_attn_processor(
 @torch.inference_mode()
-def inference(image_path: str, prompt: str, progress=gr.Progress(track_tqdm=False)):
     progress(0, "Initializing...")
     image = Image.open(image_path)
     image = image.convert("RGB").resize((512, 512))
@@ -269,7 +274,11 @@ def inference(image_path: str, prompt: str, progress=gr.Progress(track_tqdm=Fals
         ).squeeze(0)  # (77, 512, 512)
         # <bos> と <eos> トークンの間に挿入されたトークンのみを取得
-        mean_cross_attn_probs = mean_cross_attn_probs[:n_cond_tokens, ...]  # (n_tokens, 512, 512)
         cross_attention_probs_list.append(mean_cross_attn_probs)
     # list -> torch.Tensor
@@ -281,7 +290,10 @@ def inference(image_path: str, prompt: str, progress=gr.Progress(track_tqdm=Fals
     image_list = []
     # 各行ごとに画像を作成し保存
     for i in tqdm(range(cross_attention_probs.shape[0]), desc="Saving images..."):
-        fig, ax = plt.subplots(1, n_cond_tokens, figsize=(16, 4))  # 行ごとに画像を作成
         for j in range(cross_attention_probs.shape[1]):
             # 各クラスのアテンションマップを Min-Max 正規化 (0~1)
@@ -297,12 +309,15 @@ def inference(image_path: str, prompt: str, progress=gr.Progress(track_tqdm=Fals
         # 各行ごとの画像を保存
         out_dir = Path("output")
         out_dir.mkdir(exist_ok=True)
-        filepath = out_dir / f"output_row_{i}.png"
         plt.savefig(filepath, bbox_inches='tight', pad_inches=0)
         plt.close(fig)
         # 保存した画像をPILで読み込んでリストに追加
         image_list.append(Image.open(filepath))
     return image_list
@@ -333,13 +348,14 @@ if __name__ == '__main__':
         fn=inference,
         inputs=[
             gr.Image(type="filepath", label="Input", width=512, height=512),
-            gr.Textbox(label="Prompt", placeholder="e.g.) A photo of dog...")
         ],
         outputs=ca_output,
         cache_examples=True,
         examples=[
-            ["assets/aeroplane.png", "plane background"],
-            ["assets/dogcat.png", "a photo of dog and cat"],
         ]
     )

 import matplotlib.pyplot as plt
 import numpy as np
 import torch
+import uuid
 import torch.nn.functional as F
 from PIL import Image
 from pathlib import Path
 @torch.inference_mode()
+def inference(
+        image_path: str,
+        prompt: str,
+        has_include_special_tokens: bool = False,
+        progress=gr.Progress(track_tqdm=False)):
     progress(0, "Initializing...")
     image = Image.open(image_path)
     image = image.convert("RGB").resize((512, 512))
         ).squeeze(0)  # (77, 512, 512)
         # <bos> と <eos> トークンの間に挿入されたトークンのみを取得
+        if has_include_special_tokens:
+            mean_cross_attn_probs = mean_cross_attn_probs[:n_cond_tokens, ...]  # (n_tokens, 512, 512)
+        else:
+            mean_cross_attn_probs = mean_cross_attn_probs[1:n_cond_tokens - 1, ...]
         cross_attention_probs_list.append(mean_cross_attn_probs)
     # list -> torch.Tensor
     image_list = []
     # 各行ごとに画像を作成し保存
     for i in tqdm(range(cross_attention_probs.shape[0]), desc="Saving images..."):
+        if has_include_special_tokens:
+            fig, ax = plt.subplots(1, n_cond_tokens, figsize=(16, 4))
+        else:
+            fig, ax = plt.subplots(1, n_cond_tokens - 2, figsize=(16, 4))
         for j in range(cross_attention_probs.shape[1]):
             # 各クラスのアテンションマップを Min-Max 正規化 (0~1)
         # 各行ごとの画像を保存
         out_dir = Path("output")
         out_dir.mkdir(exist_ok=True)
+        # 一意なランダムファイル名を生成
+        unique_filename = str(uuid.uuid4())
+        filepath = out_dir / f"{unique_filename}.png"
         plt.savefig(filepath, bbox_inches='tight', pad_inches=0)
         plt.close(fig)
         # 保存した画像をPILで読み込んでリストに追加
         image_list.append(Image.open(filepath))
+    attn_processor.reset_attention_stores()
     return image_list
         fn=inference,
         inputs=[
             gr.Image(type="filepath", label="Input", width=512, height=512),
+            gr.Textbox(label="Prompt", placeholder="e.g.) A photo of dog..."),
+            gr.Checkbox(label="Include Special Tokens", value=False),
         ],
         outputs=ca_output,
         cache_examples=True,
         examples=[
+            ["assets/aeroplane.png", "plane background", False],
+            ["assets/dogcat.png", "a photo of dog", False],
         ]
     )

assets/ramen.jpg ADDED Viewed