JackAILab commited on
Commit
278acb0
1 Parent(s): 43bce3d

Update pipline_StableDiffusion_ConsistentID.py

Browse files
pipline_StableDiffusion_ConsistentID.py CHANGED
@@ -15,8 +15,8 @@ from diffusers.utils import _get_model_file
15
  from functions import process_text_with_markers, masks_for_unique_values, fetch_mask_raw_image, tokenize_and_mask_noun_phrases_ends, prepare_image_token_idx
16
  from functions import ProjPlusModel, masks_for_unique_values
17
  from attention import Consistent_IPAttProcessor, Consistent_AttProcessor, FacialEncoder
18
- from modelscope.outputs import OutputKeys
19
- from modelscope.pipelines import pipeline
20
 
21
  #TODO
22
  import sys
@@ -43,7 +43,7 @@ class ConsistentIDStableDiffusionPipeline(StableDiffusionPipeline):
43
  subfolder: str = '',
44
  trigger_word_ID: str = '<|image|>',
45
  trigger_word_facial: str = '<|facial|>',
46
- image_encoder_path: str = 'CLIP-ViT-H-14-laion2B-s32B-b79K', # TODO
47
  torch_dtype = torch.float16,
48
  num_tokens = 4,
49
  lora_rank= 128,
@@ -83,7 +83,7 @@ class ConsistentIDStableDiffusionPipeline(StableDiffusionPipeline):
83
  [0, 255, 255], [85, 255, 255], [170, 255, 255]]
84
 
85
  ### LLVA Optional
86
- self.llva_model_path = "llava-v1.5-7b" #TODO
87
  self.llva_prompt = "Describe this person's facial features for me, including face, ears, eyes, nose, and mouth."
88
  self.llva_tokenizer, self.llva_model, self.llva_image_processor, self.llva_context_len = None,None,None,None #load_pretrained_model(self.llva_model_path)
89
 
@@ -95,7 +95,7 @@ class ConsistentIDStableDiffusionPipeline(StableDiffusionPipeline):
95
  ).to(self.device, dtype=self.torch_dtype)
96
  self.FacialEncoder = FacialEncoder(self.image_encoder).to(self.device, dtype=self.torch_dtype)
97
 
98
- self.skin_retouching = pipeline('skin-retouching-torch', model='damo/cv_unet_skin_retouching_torch', model_revision='v1.0.2')
99
 
100
  # Load the main state dict first.
101
  cache_dir = kwargs.pop("cache_dir", None)
@@ -589,10 +589,10 @@ class ConsistentIDStableDiffusionPipeline(StableDiffusionPipeline):
589
  # 9.3 Convert to PIL list
590
  image = self.numpy_to_pil(image)
591
 
592
- if retouching:
593
- after_retouching = self.skin_retouching(image[0])
594
- if OutputKeys.OUTPUT_IMG in after_retouching:
595
- image = [Image.fromarray(cv2.cvtColor(after_retouching[OutputKeys.OUTPUT_IMG], cv2.COLOR_BGR2RGB))]
596
  else:
597
  # 9.1 Post-processing
598
  image = self.decode_latents(latents)
 
15
  from functions import process_text_with_markers, masks_for_unique_values, fetch_mask_raw_image, tokenize_and_mask_noun_phrases_ends, prepare_image_token_idx
16
  from functions import ProjPlusModel, masks_for_unique_values
17
  from attention import Consistent_IPAttProcessor, Consistent_AttProcessor, FacialEncoder
18
+ # from modelscope.outputs import OutputKeys
19
+ # from modelscope.pipelines import pipeline
20
 
21
  #TODO
22
  import sys
 
43
  subfolder: str = '',
44
  trigger_word_ID: str = '<|image|>',
45
  trigger_word_facial: str = '<|facial|>',
46
+ image_encoder_path: str = 'laion/CLIP-ViT-H-14-laion2B-s32B-b79K', # TODO
47
  torch_dtype = torch.float16,
48
  num_tokens = 4,
49
  lora_rank= 128,
 
83
  [0, 255, 255], [85, 255, 255], [170, 255, 255]]
84
 
85
  ### LLVA Optional
86
+ self.llva_model_path = "llava-hf/llava-1.5-7b-hf" #TODO
87
  self.llva_prompt = "Describe this person's facial features for me, including face, ears, eyes, nose, and mouth."
88
  self.llva_tokenizer, self.llva_model, self.llva_image_processor, self.llva_context_len = None,None,None,None #load_pretrained_model(self.llva_model_path)
89
 
 
95
  ).to(self.device, dtype=self.torch_dtype)
96
  self.FacialEncoder = FacialEncoder(self.image_encoder).to(self.device, dtype=self.torch_dtype)
97
 
98
+ # self.skin_retouching = pipeline('skin-retouching-torch', model='damo/cv_unet_skin_retouching_torch', model_revision='v1.0.2')
99
 
100
  # Load the main state dict first.
101
  cache_dir = kwargs.pop("cache_dir", None)
 
589
  # 9.3 Convert to PIL list
590
  image = self.numpy_to_pil(image)
591
 
592
+ # if retouching:
593
+ # after_retouching = self.skin_retouching(image[0])
594
+ # if OutputKeys.OUTPUT_IMG in after_retouching:
595
+ # image = [Image.fromarray(cv2.cvtColor(after_retouching[OutputKeys.OUTPUT_IMG], cv2.COLOR_BGR2RGB))]
596
  else:
597
  # 9.1 Post-processing
598
  image = self.decode_latents(latents)