Spaces:
Runtime error
Runtime error
make the pipeline simple
Browse files- app.py +10 -8
- models/__pycache__/controlnet_model.cpython-38.pyc +0 -0
- models/__pycache__/image_text_transformation.cpython-38.pyc +0 -0
- models/image_text_transformation.py +2 -1
- models/segment_models/__pycache__/semantic_segment_anything_model.cpython-38.pyc +0 -0
- pretrained_models/blip-image-captioning-large +0 -1
- pretrained_models/blip2-opt-2.7b +0 -1
- pretrained_models/clip-vit-large-patch14 +0 -1
- pretrained_models/clipseg-rd64-refined +0 -1
- pretrained_models/oneformer_ade20k_swin_large +0 -1
- pretrained_models/oneformer_coco_swin_large +0 -1
- pretrained_models/stable-diffusion-v1-5 +0 -1
app.py
CHANGED
@@ -49,7 +49,8 @@ def process_image(image_src, options=None, processor=None):
|
|
49 |
print(options)
|
50 |
if options is None:
|
51 |
options = []
|
52 |
-
processor.args.semantic_segment = "Semantic Segment" in options
|
|
|
53 |
image_generation_status = "Image Generation" in options
|
54 |
image_caption, dense_caption, region_semantic, gen_text = processor.image_to_text(image_src)
|
55 |
if image_generation_status:
|
@@ -93,7 +94,7 @@ processor = ImageTextTransformation(args)
|
|
93 |
|
94 |
# Create Gradio input and output components
|
95 |
image_input = gr.inputs.Image(type='filepath', label="Input Image")
|
96 |
-
semantic_segment_checkbox = gr.inputs.Checkbox(label="Semantic Segment", default=False)
|
97 |
image_generation_checkbox = gr.inputs.Checkbox(label="Image Generation", default=False)
|
98 |
|
99 |
logo_base64 = add_logo()
|
@@ -101,7 +102,7 @@ logo_base64 = add_logo()
|
|
101 |
title_with_logo = f'<img src="data:image/jpeg;base64,{logo_base64}" width="400" style="vertical-align: middle;"> Understanding Image with Text'
|
102 |
|
103 |
examples = [
|
104 |
-
["examples/
|
105 |
]
|
106 |
|
107 |
# Create Gradio interface
|
@@ -110,17 +111,18 @@ interface = gr.Interface(
|
|
110 |
inputs=[image_input,
|
111 |
gr.CheckboxGroup(
|
112 |
label="Options",
|
113 |
-
choices=["
|
114 |
),
|
115 |
],
|
116 |
outputs=gr.outputs.HTML(),
|
117 |
title=title_with_logo,
|
118 |
-
|
119 |
description="""
|
120 |
This code support image to text transformation. Then the generated text can do retrieval, question answering et al to conduct zero-shot.
|
121 |
-
\n
|
122 |
-
\n
|
123 |
-
\n
|
|
|
124 |
"""
|
125 |
)
|
126 |
|
|
|
49 |
print(options)
|
50 |
if options is None:
|
51 |
options = []
|
52 |
+
# processor.args.semantic_segment = "Semantic Segment" in options
|
53 |
+
processor.args.semantic_segment = False
|
54 |
image_generation_status = "Image Generation" in options
|
55 |
image_caption, dense_caption, region_semantic, gen_text = processor.image_to_text(image_src)
|
56 |
if image_generation_status:
|
|
|
94 |
|
95 |
# Create Gradio input and output components
|
96 |
image_input = gr.inputs.Image(type='filepath', label="Input Image")
|
97 |
+
# semantic_segment_checkbox = gr.inputs.Checkbox(label="Semantic Segment", default=False)
|
98 |
image_generation_checkbox = gr.inputs.Checkbox(label="Image Generation", default=False)
|
99 |
|
100 |
logo_base64 = add_logo()
|
|
|
102 |
title_with_logo = f'<img src="data:image/jpeg;base64,{logo_base64}" width="400" style="vertical-align: middle;"> Understanding Image with Text'
|
103 |
|
104 |
examples = [
|
105 |
+
["examples/test_4.jpg"],
|
106 |
]
|
107 |
|
108 |
# Create Gradio interface
|
|
|
111 |
inputs=[image_input,
|
112 |
gr.CheckboxGroup(
|
113 |
label="Options",
|
114 |
+
choices=["Image Generation"],
|
115 |
),
|
116 |
],
|
117 |
outputs=gr.outputs.HTML(),
|
118 |
title=title_with_logo,
|
119 |
+
examples=examples,
|
120 |
description="""
|
121 |
This code support image to text transformation. Then the generated text can do retrieval, question answering et al to conduct zero-shot.
|
122 |
+
\n Github: https://github.com/showlab/Image2Paragraph
|
123 |
+
\n Twitter: https://twitter.com/awinyimgprocess/status/1646225454599372800?s=46&t=HvOe9T2n35iFuCHP5aIHpQ
|
124 |
+
\n Since GPU is expensive, we use CPU for demo and not include semantic segment anything. Run code local with gpu or google colab we provided for fast speed.
|
125 |
+
\n Ttext2image model is controlnet ( very slow in cpu(~2m)), which used canny edge as reference.
|
126 |
"""
|
127 |
)
|
128 |
|
models/__pycache__/controlnet_model.cpython-38.pyc
CHANGED
Binary files a/models/__pycache__/controlnet_model.cpython-38.pyc and b/models/__pycache__/controlnet_model.cpython-38.pyc differ
|
|
models/__pycache__/image_text_transformation.cpython-38.pyc
CHANGED
Binary files a/models/__pycache__/image_text_transformation.cpython-38.pyc and b/models/__pycache__/image_text_transformation.cpython-38.pyc differ
|
|
models/image_text_transformation.py
CHANGED
@@ -33,7 +33,8 @@ class ImageTextTransformation:
|
|
33 |
self.dense_caption_model = DenseCaptioning(device=self.args.dense_caption_device)
|
34 |
self.gpt_model = ImageToText(openai_key)
|
35 |
self.controlnet_model = TextToImage(device=self.args.contolnet_device)
|
36 |
-
|
|
|
37 |
print('\033[1;32m' + "Model initialization finished!".center(50, '-') + '\033[0m')
|
38 |
|
39 |
|
|
|
33 |
self.dense_caption_model = DenseCaptioning(device=self.args.dense_caption_device)
|
34 |
self.gpt_model = ImageToText(openai_key)
|
35 |
self.controlnet_model = TextToImage(device=self.args.contolnet_device)
|
36 |
+
# time-conusimg on CPU, run on local
|
37 |
+
# self.region_semantic_model = RegionSemantic(device=self.args.semantic_segment_device)
|
38 |
print('\033[1;32m' + "Model initialization finished!".center(50, '-') + '\033[0m')
|
39 |
|
40 |
|
models/segment_models/__pycache__/semantic_segment_anything_model.cpython-38.pyc
CHANGED
Binary files a/models/segment_models/__pycache__/semantic_segment_anything_model.cpython-38.pyc and b/models/segment_models/__pycache__/semantic_segment_anything_model.cpython-38.pyc differ
|
|
pretrained_models/blip-image-captioning-large
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
Subproject commit 293ab01f2dc41c1c214299314f11de635d0937dc
|
|
|
|
pretrained_models/blip2-opt-2.7b
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
Subproject commit 56e1fe81e7e7c346e95e196ace7b442b3f8ff483
|
|
|
|
pretrained_models/clip-vit-large-patch14
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
Subproject commit 8d052a0f05efbaefbc9e8786ba291cfdf93e5bff
|
|
|
|
pretrained_models/clipseg-rd64-refined
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
Subproject commit 583b388deb98a04feb3e1f816dcdb8f3062ee205
|
|
|
|
pretrained_models/oneformer_ade20k_swin_large
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
Subproject commit 4a5bac8e64f82681a12db2e151a4c2f4ce6092b2
|
|
|
|
pretrained_models/oneformer_coco_swin_large
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
Subproject commit 3a263017ca5c75adbea145f25f81b118243d4394
|
|
|
|
pretrained_models/stable-diffusion-v1-5
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
Subproject commit 39593d5650112b4cc580433f6b0435385882d819
|
|
|
|