Spaces:
Running
on
Zero
Running
on
Zero
Update pipline_StableDiffusion_ConsistentID.py
Browse files
pipline_StableDiffusion_ConsistentID.py
CHANGED
@@ -15,8 +15,8 @@ from diffusers.utils import _get_model_file
|
|
15 |
from functions import process_text_with_markers, masks_for_unique_values, fetch_mask_raw_image, tokenize_and_mask_noun_phrases_ends, prepare_image_token_idx
|
16 |
from functions import ProjPlusModel, masks_for_unique_values
|
17 |
from attention import Consistent_IPAttProcessor, Consistent_AttProcessor, FacialEncoder
|
18 |
-
from modelscope.outputs import OutputKeys
|
19 |
-
from modelscope.pipelines import pipeline
|
20 |
|
21 |
#TODO
|
22 |
import sys
|
@@ -43,7 +43,7 @@ class ConsistentIDStableDiffusionPipeline(StableDiffusionPipeline):
|
|
43 |
subfolder: str = '',
|
44 |
trigger_word_ID: str = '<|image|>',
|
45 |
trigger_word_facial: str = '<|facial|>',
|
46 |
-
image_encoder_path: str = 'CLIP-ViT-H-14-laion2B-s32B-b79K', # TODO
|
47 |
torch_dtype = torch.float16,
|
48 |
num_tokens = 4,
|
49 |
lora_rank= 128,
|
@@ -83,7 +83,7 @@ class ConsistentIDStableDiffusionPipeline(StableDiffusionPipeline):
|
|
83 |
[0, 255, 255], [85, 255, 255], [170, 255, 255]]
|
84 |
|
85 |
### LLVA Optional
|
86 |
-
self.llva_model_path = "llava-
|
87 |
self.llva_prompt = "Describe this person's facial features for me, including face, ears, eyes, nose, and mouth."
|
88 |
self.llva_tokenizer, self.llva_model, self.llva_image_processor, self.llva_context_len = None,None,None,None #load_pretrained_model(self.llva_model_path)
|
89 |
|
@@ -95,7 +95,7 @@ class ConsistentIDStableDiffusionPipeline(StableDiffusionPipeline):
|
|
95 |
).to(self.device, dtype=self.torch_dtype)
|
96 |
self.FacialEncoder = FacialEncoder(self.image_encoder).to(self.device, dtype=self.torch_dtype)
|
97 |
|
98 |
-
self.skin_retouching = pipeline('skin-retouching-torch', model='damo/cv_unet_skin_retouching_torch', model_revision='v1.0.2')
|
99 |
|
100 |
# Load the main state dict first.
|
101 |
cache_dir = kwargs.pop("cache_dir", None)
|
@@ -589,10 +589,10 @@ class ConsistentIDStableDiffusionPipeline(StableDiffusionPipeline):
|
|
589 |
# 9.3 Convert to PIL list
|
590 |
image = self.numpy_to_pil(image)
|
591 |
|
592 |
-
if retouching:
|
593 |
-
|
594 |
-
|
595 |
-
|
596 |
else:
|
597 |
# 9.1 Post-processing
|
598 |
image = self.decode_latents(latents)
|
|
|
15 |
from functions import process_text_with_markers, masks_for_unique_values, fetch_mask_raw_image, tokenize_and_mask_noun_phrases_ends, prepare_image_token_idx
|
16 |
from functions import ProjPlusModel, masks_for_unique_values
|
17 |
from attention import Consistent_IPAttProcessor, Consistent_AttProcessor, FacialEncoder
|
18 |
+
# from modelscope.outputs import OutputKeys
|
19 |
+
# from modelscope.pipelines import pipeline
|
20 |
|
21 |
#TODO
|
22 |
import sys
|
|
|
43 |
subfolder: str = '',
|
44 |
trigger_word_ID: str = '<|image|>',
|
45 |
trigger_word_facial: str = '<|facial|>',
|
46 |
+
image_encoder_path: str = 'laion/CLIP-ViT-H-14-laion2B-s32B-b79K', # TODO
|
47 |
torch_dtype = torch.float16,
|
48 |
num_tokens = 4,
|
49 |
lora_rank= 128,
|
|
|
83 |
[0, 255, 255], [85, 255, 255], [170, 255, 255]]
|
84 |
|
85 |
### LLVA Optional
|
86 |
+
self.llva_model_path = "llava-hf/llava-1.5-7b-hf" #TODO
|
87 |
self.llva_prompt = "Describe this person's facial features for me, including face, ears, eyes, nose, and mouth."
|
88 |
self.llva_tokenizer, self.llva_model, self.llva_image_processor, self.llva_context_len = None,None,None,None #load_pretrained_model(self.llva_model_path)
|
89 |
|
|
|
95 |
).to(self.device, dtype=self.torch_dtype)
|
96 |
self.FacialEncoder = FacialEncoder(self.image_encoder).to(self.device, dtype=self.torch_dtype)
|
97 |
|
98 |
+
# self.skin_retouching = pipeline('skin-retouching-torch', model='damo/cv_unet_skin_retouching_torch', model_revision='v1.0.2')
|
99 |
|
100 |
# Load the main state dict first.
|
101 |
cache_dir = kwargs.pop("cache_dir", None)
|
|
|
589 |
# 9.3 Convert to PIL list
|
590 |
image = self.numpy_to_pil(image)
|
591 |
|
592 |
+
# if retouching:
|
593 |
+
# after_retouching = self.skin_retouching(image[0])
|
594 |
+
# if OutputKeys.OUTPUT_IMG in after_retouching:
|
595 |
+
# image = [Image.fromarray(cv2.cvtColor(after_retouching[OutputKeys.OUTPUT_IMG], cv2.COLOR_BGR2RGB))]
|
596 |
else:
|
597 |
# 9.1 Post-processing
|
598 |
image = self.decode_latents(latents)
|