poipiii
commited on
Commit
•
372d15d
1
Parent(s):
fa64057
test latent fix
Browse files- pipeline.py +5 -14
pipeline.py
CHANGED
@@ -617,7 +617,7 @@ class StableDiffusionLongPromptWeightingPipeline(StableDiffusionPipeline):
|
|
617 |
extra_step_kwargs["generator"] = generator
|
618 |
return extra_step_kwargs
|
619 |
|
620 |
-
def prepare_latents(self, image, timestep, batch_size, height, width, dtype, device, generator,
|
621 |
if image is None:
|
622 |
shape = (
|
623 |
batch_size,
|
@@ -633,17 +633,13 @@ class StableDiffusionLongPromptWeightingPipeline(StableDiffusionPipeline):
|
|
633 |
else:
|
634 |
latents = torch.randn(shape, generator=generator, device=device, dtype=dtype)
|
635 |
else:
|
636 |
-
print(latents)
|
637 |
if latents.shape != shape:
|
638 |
raise ValueError(f"Unexpected latents shape, got {latents.shape}, expected {shape}")
|
639 |
latents = latents.to(device)
|
640 |
|
641 |
-
# if upscale:
|
642 |
-
# return latents, None, None
|
643 |
# scale the initial noise by the standard deviation required by the scheduler
|
644 |
-
|
645 |
-
|
646 |
-
return latents, None, None
|
647 |
else:
|
648 |
init_latent_dist = self.vae.encode(image).latent_dist
|
649 |
init_latents = init_latent_dist.sample(generator=generator)
|
@@ -677,7 +673,6 @@ class StableDiffusionLongPromptWeightingPipeline(StableDiffusionPipeline):
|
|
677 |
eta: float = 0.0,
|
678 |
generator: Optional[torch.Generator] = None,
|
679 |
latents: Optional[torch.FloatTensor] = None,
|
680 |
-
upscale: bool = False,
|
681 |
return_latents: bool = False,
|
682 |
max_embeddings_multiples: Optional[int] = 3,
|
683 |
output_type: Optional[str] = "pil",
|
@@ -812,9 +807,7 @@ class StableDiffusionLongPromptWeightingPipeline(StableDiffusionPipeline):
|
|
812 |
dtype,
|
813 |
device,
|
814 |
generator,
|
815 |
-
|
816 |
-
latents
|
817 |
-
|
818 |
)
|
819 |
|
820 |
# 7. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline
|
@@ -1030,7 +1023,6 @@ class StableDiffusionLongPromptWeightingPipeline(StableDiffusionPipeline):
|
|
1030 |
generator=generator,
|
1031 |
latents=latents,
|
1032 |
return_latents=True,
|
1033 |
-
upscale=False,
|
1034 |
max_embeddings_multiples=max_embeddings_multiples,
|
1035 |
output_type=output_type,
|
1036 |
return_dict=return_dict,
|
@@ -1040,7 +1032,7 @@ class StableDiffusionLongPromptWeightingPipeline(StableDiffusionPipeline):
|
|
1040 |
)
|
1041 |
|
1042 |
latents = torch.nn.functional.interpolate(latents, size=(int(height*resize_scale)//8, int(width*resize_scale)//8))
|
1043 |
-
|
1044 |
return self.__call__(
|
1045 |
prompt=prompt,
|
1046 |
negative_prompt=negative_prompt,
|
@@ -1054,7 +1046,6 @@ class StableDiffusionLongPromptWeightingPipeline(StableDiffusionPipeline):
|
|
1054 |
generator=generator,
|
1055 |
latents=latents,
|
1056 |
return_latents=False,
|
1057 |
-
upscale=True,
|
1058 |
max_embeddings_multiples=max_embeddings_multiples,
|
1059 |
output_type=output_type,
|
1060 |
return_dict=return_dict,
|
|
|
617 |
extra_step_kwargs["generator"] = generator
|
618 |
return extra_step_kwargs
|
619 |
|
620 |
+
def prepare_latents(self, image, timestep, batch_size, height, width, dtype, device, generator, latents=None):
|
621 |
if image is None:
|
622 |
shape = (
|
623 |
batch_size,
|
|
|
633 |
else:
|
634 |
latents = torch.randn(shape, generator=generator, device=device, dtype=dtype)
|
635 |
else:
|
|
|
636 |
if latents.shape != shape:
|
637 |
raise ValueError(f"Unexpected latents shape, got {latents.shape}, expected {shape}")
|
638 |
latents = latents.to(device)
|
639 |
|
|
|
|
|
640 |
# scale the initial noise by the standard deviation required by the scheduler
|
641 |
+
latents = latents * self.scheduler.init_noise_sigma
|
642 |
+
return latents, None, None
|
|
|
643 |
else:
|
644 |
init_latent_dist = self.vae.encode(image).latent_dist
|
645 |
init_latents = init_latent_dist.sample(generator=generator)
|
|
|
673 |
eta: float = 0.0,
|
674 |
generator: Optional[torch.Generator] = None,
|
675 |
latents: Optional[torch.FloatTensor] = None,
|
|
|
676 |
return_latents: bool = False,
|
677 |
max_embeddings_multiples: Optional[int] = 3,
|
678 |
output_type: Optional[str] = "pil",
|
|
|
807 |
dtype,
|
808 |
device,
|
809 |
generator,
|
810 |
+
latents,
|
|
|
|
|
811 |
)
|
812 |
|
813 |
# 7. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline
|
|
|
1023 |
generator=generator,
|
1024 |
latents=latents,
|
1025 |
return_latents=True,
|
|
|
1026 |
max_embeddings_multiples=max_embeddings_multiples,
|
1027 |
output_type=output_type,
|
1028 |
return_dict=return_dict,
|
|
|
1032 |
)
|
1033 |
|
1034 |
latents = torch.nn.functional.interpolate(latents, size=(int(height*resize_scale)//8, int(width*resize_scale)//8))
|
1035 |
+
|
1036 |
return self.__call__(
|
1037 |
prompt=prompt,
|
1038 |
negative_prompt=negative_prompt,
|
|
|
1046 |
generator=generator,
|
1047 |
latents=latents,
|
1048 |
return_latents=False,
|
|
|
1049 |
max_embeddings_multiples=max_embeddings_multiples,
|
1050 |
output_type=output_type,
|
1051 |
return_dict=return_dict,
|