Erasing-Concepts-In-Diffusion

Runtime error

App Files Files Community

Damian Stewart commited on Aug 6, 2023

Commit

6067469

1 Parent(s): c8aa68b

save every N steps and loss logging

Browse files

Files changed (4) hide show

StableDiffuser.py +3 -3
app.py +40 -18
isolate_rng.py +73 -0
train.py +22 -2

StableDiffuser.py CHANGED Viewed

@@ -95,8 +95,8 @@ class StableDiffuser(torch.nn.Module):
     def set_scheduler_timesteps(self, n_steps):
         self.scheduler.set_timesteps(n_steps, device=self.unet.device)
-    def get_initial_latents(self, n_imgs, width, height, n_prompts, generator=None):
-        noise = self.get_noise(n_imgs, width, height, generator=generator).repeat(n_prompts, 1, 1, 1)
         latents = noise * self.scheduler.init_noise_sigma
         return latents
@@ -199,7 +199,7 @@ class StableDiffuser(torch.nn.Module):
             prompts = [prompts]
         self.set_scheduler_timesteps(n_steps)
-        latents = self.get_initial_latents(n_imgs, width, height, len(prompts), generator=generator)
         text_embeddings = self.get_text_embeddings(prompts,negative_prompts,n_imgs=n_imgs)
         end_iteration = end_iteration or n_steps
         latents_steps, trace_steps = self.diffusion(

     def set_scheduler_timesteps(self, n_steps):
         self.scheduler.set_timesteps(n_steps, device=self.unet.device)
+    def get_initial_latents(self, n_imgs, height, width, n_prompts, generator=None):
+        noise = self.get_noise(n_imgs, height, width, generator=generator).repeat(n_prompts, 1, 1, 1)
         latents = noise * self.scheduler.init_noise_sigma
         return latents
             prompts = [prompts]
         self.set_scheduler_timesteps(n_steps)
+        latents = self.get_initial_latents(n_imgs, height, width, len(prompts), generator=generator)
         text_embeddings = self.get_text_embeddings(prompts,negative_prompts,n_imgs=n_imgs)
         end_iteration = end_iteration or n_steps
         latents_steps, trace_steps = self.diffusion(

app.py CHANGED Viewed

@@ -10,22 +10,16 @@ from memory_efficiency import MemoryEfficiencyWrapper
 from train import train
 import os
-model_map = {'Van Gogh': 'models/vangogh.pt',
-             'Pablo Picasso': 'models/pablopicasso.pt',
-             'Car': 'models/car.pt',
-             'Garbage Truck': 'models/garbagetruck.pt',
-             'French Horn': 'models/frenchhorn.pt',
-             'Kilian Eng': 'models/kilianeng.pt',
-             'Thomas Kinkade': 'models/thomaskinkade.pt',
-             'Tyler Edlin': 'models/tyleredlin.pt',
-             'Kelly McKernan': 'models/kellymckernan.pt',
-             'Rembrandt': 'models/rembrandt.pt' }
-for model_file in os.listdir('models'):
-    path = 'models/' + model_file
-    if any([existing_path == path for existing_path in model_map.values()]):
-        continue
-    model_map[model_file] = path
 ORIGINAL_SPACE_ID = 'baulab/Erasing-Concepts-In-Diffusion'
 SPACE_ID = os.getenv('SPACE_ID')
@@ -85,6 +79,10 @@ class Demo:
                                 value='Van Gogh',
                                 interactive=True
                             )
                             self.seed_infr = gr.Number(
                                 label="Seed",
@@ -196,6 +194,11 @@ class Demo:
                             label="Seed",
                             info="Set to a fixed number for reproducible training results, or use -1 to pick randomly"
                         )
                         with gr.Column():
                             self.train_memory_options = gr.Markdown(interactive=False,
@@ -215,6 +218,10 @@ class Demo:
                             value="Train",
                         )
                         self.download = gr.Files()
             with gr.Tab("Export") as export_column:
@@ -268,7 +275,10 @@ class Demo:
                 self.image_orig
             ]
         )
-        self.train_button.click(self.train, inputs = [
             self.train_model_input,
             self.train_img_size_input,
             self.prompt_input,
@@ -281,9 +291,12 @@ class Demo:
             self.train_use_amp_input,
             self.train_use_gradient_checkpointing_input,
             self.train_seed_input,
         ],
         outputs=[self.train_button, self.train_status, self.download, self.model_dropdown]
         )
         self.export_button.click(self.export, inputs = [
             self.model_dropdown_export,
             self.base_repo_id_or_path_input_export,
@@ -293,9 +306,15 @@ class Demo:
         outputs=[self.export_status]
         )
     def train(self, repo_id_or_path, img_size, prompt, train_method, neg_guidance, iterations, lr,
               use_adamw8bit=True, use_xformers=False, use_amp=False, use_gradient_checkpointing=False,
-              seed=-1,
               pbar = gr.Progress(track_tqdm=True)):
         if self.training:
@@ -331,10 +350,13 @@ class Demo:
         try:
             self.training = True
             train(repo_id_or_path, img_size, prompt, modules, frozen, iterations, neg_guidance, lr, save_path,
-                use_adamw8bit, use_xformers, use_amp, use_gradient_checkpointing, seed=int(seed))
         finally:
             self.training = False
         torch.cuda.empty_cache()

 from train import train
 import os
+def populate_model_map():
+    model_map = {}
+    for model_file in os.listdir('models'):
+        path = 'models/' + model_file
+        if any([existing_path == path for existing_path in model_map.values()]):
+            continue
+        model_map[model_file] = path
+    return model_map
+model_map = populate_model_map()
 ORIGINAL_SPACE_ID = 'baulab/Erasing-Concepts-In-Diffusion'
 SPACE_ID = os.getenv('SPACE_ID')
                                 value='Van Gogh',
                                 interactive=True
                             )
+                            self.model_reload_button = gr.Button(
+                                value="🔄",
+                                interactive=True
+                            )
                             self.seed_infr = gr.Number(
                                 label="Seed",
                             label="Seed",
                             info="Set to a fixed number for reproducible training results, or use -1 to pick randomly"
                         )
+                        self.train_save_every_input = gr.Number(
+                            value=-1,
+                            label="Save every N steps",
+                            info="If >0, save the model throughout training at the given step interval."
+                        )
                         with gr.Column():
                             self.train_memory_options = gr.Markdown(interactive=False,
                             value="Train",
                         )
+                        self.train_cancel_button = gr.Button(
+                            value="Cancel training"
+                        )
                         self.download = gr.Files()
             with gr.Tab("Export") as export_column:
                 self.image_orig
             ]
         )
+        self.model_reload_button.click(self.reload_models,
+                                       inputs=[self.model_dropdown],
+                                       outputs=[self.model_dropdown])
+        train_event = self.train_button.click(self.train, inputs = [
             self.train_model_input,
             self.train_img_size_input,
             self.prompt_input,
             self.train_use_amp_input,
             self.train_use_gradient_checkpointing_input,
             self.train_seed_input,
+            self.train_save_every_input,
         ],
         outputs=[self.train_button, self.train_status, self.download, self.model_dropdown]
         )
+        self.train_cancel_button.click(lambda x: print("cancel pressed"), cancels=[train_event])
         self.export_button.click(self.export, inputs = [
             self.model_dropdown_export,
             self.base_repo_id_or_path_input_export,
         outputs=[self.export_status]
         )
+    def reload_models(self, model_dropdown):
+        current_model_name = model_dropdown
+        global model_map
+        model_map = populate_model_map()
+        return [gr.Dropdown.update(choices=list(model_map.keys()), value=current_model_name)]
     def train(self, repo_id_or_path, img_size, prompt, train_method, neg_guidance, iterations, lr,
               use_adamw8bit=True, use_xformers=False, use_amp=False, use_gradient_checkpointing=False,
+              seed=-1, save_every=-1,
               pbar = gr.Progress(track_tqdm=True)):
         if self.training:
         try:
             self.training = True
+            self.train_cancel_button.update(interactive=True)
             train(repo_id_or_path, img_size, prompt, modules, frozen, iterations, neg_guidance, lr, save_path,
+                  use_adamw8bit, use_xformers, use_amp, use_gradient_checkpointing,
+                  seed=int(seed), save_every=int(save_every))
         finally:
             self.training = False
+            self.train_cancel_button.update(interactive=False)
         torch.cuda.empty_cache()

isolate_rng.py ADDED Viewed

	@@ -0,0 +1,73 @@

+# copy/pasted from pytorch lightning
+# https://github.com/Lightning-AI/lightning/blob/0d52f4577310b5a1624bed4d23d49e37fb05af9e/src/lightning_fabric/utilities/seed.py
+# and
+# https://github.com/Lightning-AI/lightning/blob/98f7696d1681974d34fad59c03b4b58d9524ed13/src/pytorch_lightning/utilities/seed.py
+# Copyright The Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from contextlib import contextmanager
+from typing import Generator, Dict, Any
+import torch
+import numpy as np
+from random import getstate as python_get_rng_state
+from random import setstate as python_set_rng_state
+def _collect_rng_states(include_cuda: bool = True) -> Dict[str, Any]:
+    """Collect the global random state of :mod:`torch`, :mod:`torch.cuda`, :mod:`numpy` and Python."""
+    states = {
+        "torch": torch.get_rng_state(),
+        "numpy": np.random.get_state(),
+        "python": python_get_rng_state(),
+    }
+    if include_cuda:
+        states["torch.cuda"] = torch.cuda.get_rng_state_all()
+    return states
+def _set_rng_states(rng_state_dict: Dict[str, Any]) -> None:
+    """Set the global random state of :mod:`torch`, :mod:`torch.cuda`, :mod:`numpy` and Python in the current
+    process."""
+    torch.set_rng_state(rng_state_dict["torch"])
+    # torch.cuda rng_state is only included since v1.8.
+    if "torch.cuda" in rng_state_dict:
+        torch.cuda.set_rng_state_all(rng_state_dict["torch.cuda"])
+    np.random.set_state(rng_state_dict["numpy"])
+    version, state, gauss = rng_state_dict["python"]
+    python_set_rng_state((version, tuple(state), gauss))
+@contextmanager
+def isolate_rng(include_cuda: bool = True) -> Generator[None, None, None]:
+    """A context manager that resets the global random state on exit to what it was before entering.
+    It supports isolating the states for PyTorch, Numpy, and Python built-in random number generators.
+    Args:
+        include_cuda: Whether to allow this function to also control the `torch.cuda` random number generator.
+            Set this to ``False`` when using the function in a forked process where CUDA re-initialization is
+            prohibited.
+    Example:
+        >>> import torch
+        >>> torch.manual_seed(1)  # doctest: +ELLIPSIS
+        <torch._C.Generator object at ...>
+        >>> with isolate_rng():
+        ...     [torch.rand(1) for _ in range(3)]
+        [tensor([0.7576]), tensor([0.2793]), tensor([0.4031])]
+        >>> torch.rand(1)
+        tensor([0.7576])
+    """
+    states = _collect_rng_states(include_cuda)
+    yield
+    _set_rng_states(states)

train.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from random import random
 from accelerate.utils import set_seed
 from torch.cuda.amp import autocast
@@ -8,11 +8,12 @@ from finetuning import FineTunedModel
 import torch
 from tqdm import tqdm
 from memory_efficiency import MemoryEfficiencyWrapper
 def train(repo_id_or_path, img_size, prompt, modules, freeze_modules, iterations, negative_guidance, lr, save_path,
-          use_adamw8bit=True, use_xformers=True, use_amp=True, use_gradient_checkpointing=False, seed=-1):
     nsteps = 50
     diffuser = StableDiffuser(scheduler='DDIM', repo_id_or_path=repo_id_or_path).to('cuda')
@@ -54,6 +55,9 @@ def train(repo_id_or_path, img_size, prompt, modules, freeze_modules, iterations
             seed = random.randint(0, 2 ** 30)
         set_seed(int(seed))
         for i in pbar:
             with torch.no_grad():
                 diffuser.set_scheduler_timesteps(nsteps)
@@ -92,6 +96,22 @@ def train(repo_id_or_path, img_size, prompt, modules, freeze_modules, iterations
             memory_efficiency_wrapper.step(optimizer, loss)
             optimizer.zero_grad()
     torch.save(finetuner.state_dict(), save_path)
     del diffuser, loss, optimizer, finetuner, negative_latents, neutral_latents, positive_latents, latents_steps, latents

+import random
 from accelerate.utils import set_seed
 from torch.cuda.amp import autocast
 import torch
 from tqdm import tqdm
+from isolate_rng import isolate_rng
 from memory_efficiency import MemoryEfficiencyWrapper
 def train(repo_id_or_path, img_size, prompt, modules, freeze_modules, iterations, negative_guidance, lr, save_path,
+          use_adamw8bit=True, use_xformers=True, use_amp=True, use_gradient_checkpointing=False, seed=-1, save_every=-1):
     nsteps = 50
     diffuser = StableDiffuser(scheduler='DDIM', repo_id_or_path=repo_id_or_path).to('cuda')
             seed = random.randint(0, 2 ** 30)
         set_seed(int(seed))
+        prev_losses = []
+        start_loss = None
+        max_prev_loss_count = 10
         for i in pbar:
             with torch.no_grad():
                 diffuser.set_scheduler_timesteps(nsteps)
             memory_efficiency_wrapper.step(optimizer, loss)
             optimizer.zero_grad()
+            # print moving average loss
+            prev_losses.append(loss.detach().clone())
+            if len(prev_losses) > max_prev_loss_count:
+                prev_losses.pop(0)
+            if start_loss is None:
+                start_loss = prev_losses[-1]
+            if len(prev_losses) >= max_prev_loss_count:
+                moving_average_loss = sum(prev_losses) / len(prev_losses)
+                print(
+                    f"step {i}: loss={loss.item()} (avg={moving_average_loss.item()}, start ∆={(moving_average_loss - start_loss).item()}")
+            else:
+                print(f"step {i}: loss={loss.item()}")
+            if save_every > 0 and ((i % save_every) == (save_every-1)):
+                torch.save(finetuner.state_dict(), save_path + f"__step_{i}.pt")
     torch.save(finetuner.state_dict(), save_path)
     del diffuser, loss, optimizer, finetuner, negative_latents, neutral_latents, positive_latents, latents_steps, latents