Spaces:

JackAILab
/

ConsistentID

Running on Zero

App Files Files Community

JackAILab commited on May 7

Commit

278acb0

•

1 Parent(s): 43bce3d

Update pipline_StableDiffusion_ConsistentID.py

Browse files

Files changed (1) hide show

pipline_StableDiffusion_ConsistentID.py +9 -9

pipline_StableDiffusion_ConsistentID.py CHANGED Viewed

@@ -15,8 +15,8 @@ from diffusers.utils import _get_model_file
 from functions import process_text_with_markers, masks_for_unique_values, fetch_mask_raw_image, tokenize_and_mask_noun_phrases_ends, prepare_image_token_idx
 from functions import ProjPlusModel, masks_for_unique_values
 from attention import Consistent_IPAttProcessor, Consistent_AttProcessor, FacialEncoder
-from modelscope.outputs import OutputKeys
-from modelscope.pipelines import pipeline
 #TODO
 import sys
@@ -43,7 +43,7 @@ class ConsistentIDStableDiffusionPipeline(StableDiffusionPipeline):
         subfolder: str = '',
         trigger_word_ID: str = '<|image|>',
         trigger_word_facial: str = '<|facial|>',
-        image_encoder_path: str = 'CLIP-ViT-H-14-laion2B-s32B-b79K',   # TODO
         torch_dtype = torch.float16,
         num_tokens = 4,
         lora_rank= 128,
@@ -83,7 +83,7 @@ class ConsistentIDStableDiffusionPipeline(StableDiffusionPipeline):
                     [0, 255, 255], [85, 255, 255], [170, 255, 255]]
         ### LLVA Optional
-        self.llva_model_path = "llava-v1.5-7b" #TODO
         self.llva_prompt = "Describe this person's facial features for me, including face, ears, eyes, nose, and mouth."
         self.llva_tokenizer, self.llva_model, self.llva_image_processor, self.llva_context_len = None,None,None,None #load_pretrained_model(self.llva_model_path)
@@ -95,7 +95,7 @@ class ConsistentIDStableDiffusionPipeline(StableDiffusionPipeline):
         ).to(self.device, dtype=self.torch_dtype)
         self.FacialEncoder = FacialEncoder(self.image_encoder).to(self.device, dtype=self.torch_dtype)
-        self.skin_retouching = pipeline('skin-retouching-torch', model='damo/cv_unet_skin_retouching_torch', model_revision='v1.0.2')
         # Load the main state dict first.
         cache_dir = kwargs.pop("cache_dir", None)
@@ -589,10 +589,10 @@ class ConsistentIDStableDiffusionPipeline(StableDiffusionPipeline):
             # 9.3 Convert to PIL list
             image = self.numpy_to_pil(image)
-            if retouching:
-                after_retouching = self.skin_retouching(image[0])
-                if OutputKeys.OUTPUT_IMG in after_retouching:
-                    image = [Image.fromarray(cv2.cvtColor(after_retouching[OutputKeys.OUTPUT_IMG], cv2.COLOR_BGR2RGB))]
         else:
             # 9.1 Post-processing
             image = self.decode_latents(latents)

 from functions import process_text_with_markers, masks_for_unique_values, fetch_mask_raw_image, tokenize_and_mask_noun_phrases_ends, prepare_image_token_idx
 from functions import ProjPlusModel, masks_for_unique_values
 from attention import Consistent_IPAttProcessor, Consistent_AttProcessor, FacialEncoder
+# from modelscope.outputs import OutputKeys
+# from modelscope.pipelines import pipeline
 #TODO
 import sys
         subfolder: str = '',
         trigger_word_ID: str = '<|image|>',
         trigger_word_facial: str = '<|facial|>',
+        image_encoder_path: str = 'laion/CLIP-ViT-H-14-laion2B-s32B-b79K',   # TODO
         torch_dtype = torch.float16,
         num_tokens = 4,
         lora_rank= 128,
                     [0, 255, 255], [85, 255, 255], [170, 255, 255]]
         ### LLVA Optional
+        self.llva_model_path = "llava-hf/llava-1.5-7b-hf" #TODO
         self.llva_prompt = "Describe this person's facial features for me, including face, ears, eyes, nose, and mouth."
         self.llva_tokenizer, self.llva_model, self.llva_image_processor, self.llva_context_len = None,None,None,None #load_pretrained_model(self.llva_model_path)
         ).to(self.device, dtype=self.torch_dtype)
         self.FacialEncoder = FacialEncoder(self.image_encoder).to(self.device, dtype=self.torch_dtype)
+        # self.skin_retouching = pipeline('skin-retouching-torch', model='damo/cv_unet_skin_retouching_torch', model_revision='v1.0.2')
         # Load the main state dict first.
         cache_dir = kwargs.pop("cache_dir", None)
             # 9.3 Convert to PIL list
             image = self.numpy_to_pil(image)
+            # if retouching:
+            #     after_retouching = self.skin_retouching(image[0])
+            #     if OutputKeys.OUTPUT_IMG in after_retouching:
+            #         image = [Image.fromarray(cv2.cvtColor(after_retouching[OutputKeys.OUTPUT_IMG], cv2.COLOR_BGR2RGB))]
         else:
             # 9.1 Post-processing
             image = self.decode_latents(latents)