Spaces:
Runtime error
Runtime error
LanHarmony
commited on
Commit
•
ceebd56
1
Parent(s):
f97efd7
support GroundingDINO and segment-anything
Browse files- app.py +0 -1
- visual_foundation_models.py +13 -31
app.py
CHANGED
@@ -210,7 +210,6 @@ bot = ConversationBot({'Text2Box': 'cuda:0',
|
|
210 |
'Inpainting': 'cuda:0',
|
211 |
'Text2Image': 'cuda:0',
|
212 |
'ImageCaptioning': 'cuda:0',
|
213 |
-
'ImageEditing': 'cuda:0',
|
214 |
'VisualQuestionAnswering': 'cuda:0',
|
215 |
'Image2Canny': 'cpu',
|
216 |
'CannyText2Image': 'cuda:0',
|
|
|
210 |
'Inpainting': 'cuda:0',
|
211 |
'Text2Image': 'cuda:0',
|
212 |
'ImageCaptioning': 'cuda:0',
|
|
|
213 |
'VisualQuestionAnswering': 'cuda:0',
|
214 |
'Image2Canny': 'cpu',
|
215 |
'CannyText2Image': 'cuda:0',
|
visual_foundation_models.py
CHANGED
@@ -935,19 +935,18 @@ class Inpainting:
|
|
935 |
self.inpaint = StableDiffusionInpaintPipeline.from_pretrained(
|
936 |
"runwayml/stable-diffusion-inpainting", revision=self.revision, torch_dtype=self.torch_dtype).to(device)
|
937 |
|
938 |
-
def __call__(self, prompt,
|
939 |
-
update_image = self.inpaint(prompt=prompt, image=
|
940 |
-
mask_image=mask_image.resize((
|
|
|
941 |
return update_image
|
942 |
|
943 |
|
944 |
class InfinityOutPainting:
|
945 |
-
template_model = True
|
946 |
-
|
947 |
-
def __init__(self, ImageCaptioning, ImageEditing, VisualQuestionAnswering):
|
948 |
-
self.llm = OpenAI(temperature=0)
|
949 |
self.ImageCaption = ImageCaptioning
|
950 |
-
self.
|
951 |
self.ImageVQA = VisualQuestionAnswering
|
952 |
self.a_prompt = 'best quality, extremely detailed'
|
953 |
self.n_prompt = 'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, ' \
|
@@ -963,32 +962,15 @@ class InfinityOutPainting:
|
|
963 |
|
964 |
def get_BLIP_caption(self, image):
|
965 |
inputs = self.ImageCaption.processor(image, return_tensors="pt").to(self.ImageCaption.device,
|
966 |
-
|
967 |
out = self.ImageCaption.model.generate(**inputs)
|
968 |
BLIP_caption = self.ImageCaption.processor.decode(out[0], skip_special_tokens=True)
|
969 |
return BLIP_caption
|
970 |
|
971 |
-
def check_prompt(self, prompt):
|
972 |
-
check = f"Here is a paragraph with adjectives. " \
|
973 |
-
f"{prompt} " \
|
974 |
-
f"Please change all plural forms in the adjectives to singular forms. "
|
975 |
-
return self.llm(check)
|
976 |
-
|
977 |
def get_imagine_caption(self, image, imagine):
|
978 |
BLIP_caption = self.get_BLIP_caption(image)
|
979 |
-
|
980 |
-
|
981 |
-
imagine_prompt = f"let's pretend you are an excellent painter and now " \
|
982 |
-
f"there is an incomplete painting with {BLIP_caption} in the center, " \
|
983 |
-
f"please imagine the complete painting and describe it" \
|
984 |
-
f"you should consider the background color is {background_color}, the style is {style}" \
|
985 |
-
f"You should make the painting as vivid and realistic as possible" \
|
986 |
-
f"You can not use words like painting or picture" \
|
987 |
-
f"and you should use no more than 50 words to describe it"
|
988 |
-
caption = self.llm(imagine_prompt) if imagine else BLIP_caption
|
989 |
-
caption = self.check_prompt(caption)
|
990 |
-
print(f'BLIP observation: {BLIP_caption}, ChatGPT imagine to {caption}') if imagine else print(
|
991 |
-
f'Prompt: {caption}')
|
992 |
return caption
|
993 |
|
994 |
def resize_image(self, image, max_size=1000000, multiple=8):
|
@@ -1014,9 +996,9 @@ class InfinityOutPainting:
|
|
1014 |
temp_canvas.paste(old_img, (x, y))
|
1015 |
temp_mask.paste(0, (x, y, x + old_img.width, y + old_img.height))
|
1016 |
resized_temp_canvas, resized_temp_mask = self.resize_image(temp_canvas), self.resize_image(temp_mask)
|
1017 |
-
image = self.
|
1018 |
height=resized_temp_canvas.height, width=resized_temp_canvas.width,
|
1019 |
-
num_inference_steps=50).
|
1020 |
(temp_canvas.width, temp_canvas.height), Image.ANTIALIAS)
|
1021 |
image = blend_gt2pt(old_img, image)
|
1022 |
old_img = image
|
@@ -1119,7 +1101,7 @@ class ImageEditing:
|
|
1119 |
mask = self.pad_edge(mask, padding=20) # numpy
|
1120 |
mask_image = Image.fromarray(mask)
|
1121 |
|
1122 |
-
updated_image = self.inpaint(prompt=replace_with_txt,
|
1123 |
mask_image=mask_image)
|
1124 |
updated_image_path = get_new_image_name(image_path, func_name="replace-something")
|
1125 |
updated_image = updated_image.resize(image_pil.size)
|
|
|
935 |
self.inpaint = StableDiffusionInpaintPipeline.from_pretrained(
|
936 |
"runwayml/stable-diffusion-inpainting", revision=self.revision, torch_dtype=self.torch_dtype).to(device)
|
937 |
|
938 |
+
def __call__(self, prompt, image, mask_image, height=512, width=512, num_inference_steps=50):
|
939 |
+
update_image = self.inpaint(prompt=prompt, image=image.resize((width, height)),
|
940 |
+
mask_image=mask_image.resize((width, height)), height=height, width=width,
|
941 |
+
num_inference_steps=num_inference_steps).images[0]
|
942 |
return update_image
|
943 |
|
944 |
|
945 |
class InfinityOutPainting:
|
946 |
+
template_model = True # Add this line to show this is a template model.
|
947 |
+
def __init__(self, ImageCaptioning, Inpainting, VisualQuestionAnswering):
|
|
|
|
|
948 |
self.ImageCaption = ImageCaptioning
|
949 |
+
self.inpaint = Inpainting
|
950 |
self.ImageVQA = VisualQuestionAnswering
|
951 |
self.a_prompt = 'best quality, extremely detailed'
|
952 |
self.n_prompt = 'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, ' \
|
|
|
962 |
|
963 |
def get_BLIP_caption(self, image):
|
964 |
inputs = self.ImageCaption.processor(image, return_tensors="pt").to(self.ImageCaption.device,
|
965 |
+
self.ImageCaption.torch_dtype)
|
966 |
out = self.ImageCaption.model.generate(**inputs)
|
967 |
BLIP_caption = self.ImageCaption.processor.decode(out[0], skip_special_tokens=True)
|
968 |
return BLIP_caption
|
969 |
|
|
|
|
|
|
|
|
|
|
|
|
|
970 |
def get_imagine_caption(self, image, imagine):
|
971 |
BLIP_caption = self.get_BLIP_caption(image)
|
972 |
+
caption = BLIP_caption
|
973 |
+
print(f'Prompt: {caption}')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
974 |
return caption
|
975 |
|
976 |
def resize_image(self, image, max_size=1000000, multiple=8):
|
|
|
996 |
temp_canvas.paste(old_img, (x, y))
|
997 |
temp_mask.paste(0, (x, y, x + old_img.width, y + old_img.height))
|
998 |
resized_temp_canvas, resized_temp_mask = self.resize_image(temp_canvas), self.resize_image(temp_mask)
|
999 |
+
image = self.inpaint(prompt=prompt, image=resized_temp_canvas, mask_image=resized_temp_mask,
|
1000 |
height=resized_temp_canvas.height, width=resized_temp_canvas.width,
|
1001 |
+
num_inference_steps=50).resize(
|
1002 |
(temp_canvas.width, temp_canvas.height), Image.ANTIALIAS)
|
1003 |
image = blend_gt2pt(old_img, image)
|
1004 |
old_img = image
|
|
|
1101 |
mask = self.pad_edge(mask, padding=20) # numpy
|
1102 |
mask_image = Image.fromarray(mask)
|
1103 |
|
1104 |
+
updated_image = self.inpaint(prompt=replace_with_txt, image=image_pil,
|
1105 |
mask_image=mask_image)
|
1106 |
updated_image_path = get_new_image_name(image_path, func_name="replace-something")
|
1107 |
updated_image = updated_image.resize(image_pil.size)
|