Spaces:

shi-labs
/

VCoder

Runtime error

App Files Files Community

praeclarumjj3 commited on Dec 20, 2023

Commit

2d49497

1 Parent(s): 7015bfd

Update app.py

Browse files

Files changed (1) hide show

app.py +27 -21

app.py CHANGED Viewed

@@ -5,7 +5,6 @@ import os
 import time
 import gradio as gr
-import requests
 import hashlib
 from vcoder_llava.vcoder_conversation import (default_conversation, conv_templates,
@@ -200,7 +199,8 @@ def http_bot(state, model_selector, temperature, top_p, max_new_tokens, request:
                     yield (state, state.to_gradio_chatbot()) + (disable_btn, disable_btn, disable_btn, enable_btn, enable_btn)
                     return
                 time.sleep(0.03)
-    except:
         state.messages[-1][-1] = server_error_msg
         yield (state, state.to_gradio_chatbot()) + (disable_btn, disable_btn, disable_btn, enable_btn, enable_btn)
         return
@@ -225,23 +225,24 @@ def http_bot(state, model_selector, temperature, top_p, max_new_tokens, request:
         }
         fout.write(json.dumps(data) + "\n")
-title_markdown = ("""
-# 🌋 LLaVA: Large Language and Vision Assistant
-[[Project Page]](https://llava-vl.github.io) [[Paper]](https://arxiv.org/abs/2304.08485) [[Code]](https://github.com/haotian-liu/LLaVA) [[Model]](https://github.com/haotian-liu/LLaVA/blob/main/docs/MODEL_ZOO.md)
-""")
 tos_markdown = ("""
 ### Terms of use
 By using this service, users are required to agree to the following terms:
-The service is a research preview intended for non-commercial use only. It only provides limited safety measures and may generate offensive content. It must not be used for any illegal, harmful, violent, racist, or sexual purposes. The service may collect user dialogue data for future research.
-Please click the "Flag" button if you get any inappropriate answer! We will collect those to keep improving our moderator.
-For an optimal experience, please use desktop computers for this demo, as mobile devices may compromise its quality.
 """)
 learn_more_markdown = ("""
 ### License
-The service is a research preview intended for non-commercial use only, subject to the model [License](https://github.com/facebookresearch/llama/blob/main/MODEL_CARD.md) of LLaMA, [Terms of Use](https://openai.com/policies/terms-of-use) of the data generated by OpenAI, and [Privacy Practices](https://chrome.google.com/webstore/detail/sharegpt-share-your-chatg/daiacboceoaocpibfodeljbdfacokfjb) of ShareGPT. Please contact us if you find any potential violation.
 """)
 block_css = """
@@ -259,7 +260,8 @@ def build_demo(embed_mode):
         state = gr.State()
         if not embed_mode:
-            gr.Markdown(title_markdown)
         with gr.Row():
             with gr.Column(scale=3):
@@ -284,15 +286,9 @@ def build_demo(embed_mode):
                     value="Default",
                     label="Preprocess for non-square Seg Map", visible=False)
-                cur_dir = os.path.dirname(os.path.abspath(__file__))
-                gr.Examples(examples=[
-                    [f"{cur_dir}/examples/3.jpg", f"{cur_dir}/examples/3_pan.png", "What objects can be seen in the image?"],
-                    [f"{cur_dir}/examples/3.jpg", f"{cur_dir}/examples/3_ins.png", "What objects can be seen in the image?"],
-                ], inputs=[imagebox, segbox, textbox])
                 with gr.Accordion("Parameters", open=False) as parameter_row:
-                    temperature = gr.Slider(minimum=0.0, maximum=1.0, value=0.2, step=0.1, interactive=True, label="Temperature",)
-                    top_p = gr.Slider(minimum=0.0, maximum=1.0, value=0.7, step=0.1, interactive=True, label="Top P",)
                     max_output_tokens = gr.Slider(minimum=0, maximum=1024, value=512, step=64, interactive=True, label="Max output tokens",)
             with gr.Column(scale=8):
@@ -310,6 +306,16 @@ def build_demo(embed_mode):
                     regenerate_btn = gr.Button(value="🔄  Regenerate", interactive=False)
                     clear_btn = gr.Button(value="🗑️  Clear", interactive=False)
         if not embed_mode:
             gr.Markdown(tos_markdown)
             gr.Markdown(learn_more_markdown)
@@ -342,7 +348,7 @@ def build_demo(embed_mode):
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
-    parser.add_argument("--model-path", type=str, default="facebook/opt-350m")
     parser.add_argument("--model-base", type=str, default=None)
     parser.add_argument("--model-name", type=str)
     parser.add_argument("--load-8bit", action="store_true")
@@ -386,4 +392,4 @@ if __name__ == "__main__":
         server_name=args.host,
         server_port=args.port,
         share=args.share
-    )

 import time
 import gradio as gr
 import hashlib
 from vcoder_llava.vcoder_conversation import (default_conversation, conv_templates,
                     yield (state, state.to_gradio_chatbot()) + (disable_btn, disable_btn, disable_btn, enable_btn, enable_btn)
                     return
                 time.sleep(0.03)
+    except Exception:
+        gr.Warning(server_error_msg)
         state.messages[-1][-1] = server_error_msg
         yield (state, state.to_gradio_chatbot()) + (disable_btn, disable_btn, disable_btn, enable_btn, enable_btn)
         return
         }
         fout.write(json.dumps(data) + "\n")
+title = "<h1 style='margin-bottom: -10px; text-align: center'>VCoder: Versatile Vision Encoders for Multimodal Large Language Models</h1>"
+# style='
+description = "<p style='font-size: 16px; margin: 5px; font-weight: w300; text-align: center'> <a href='https://praeclarumjj3.github.io/' style='text-decoration:none' target='_blank'>Jitesh Jain, </a> <a href='https://jwyang.github.io/' style='text-decoration:none' target='_blank'>Jianwei Yang, <a href='https://www.humphreyshi.com/home' style='text-decoration:none' target='_blank'>Humphrey Shi</a></p>" \
+            + "<p style='font-size: 16px; margin: 5px; font-weight: w600; text-align: center'> <a href='https://praeclarumjj3.github.io/vcoder/' target='_blank'>Project Page</a> | <a href='https://praeclarumjj3.github.io/vcoder/' target='_blank'>Video</a> | <a href='https://arxiv.org/abs/2211.06220' target='_blank'>ArXiv Paper</a> | <a href='https://github.com/SHI-Labs/VCoder' target='_blank'>Github Repo</a></p>" \
+            + "<p style='text-align: center; font-size: 16px; margin: 5px; font-weight: w300;'> [Note: Please click on Regenerate button if you are unsatisfied with the generated response. You may find screenshots of our demo trials <a href='https://github.com/SHI-Labs/VCoder/blob/main/images/' style='text-decoration:none' target='_blank'>here</a>.]</p>" \
+            + "<p style='text-align: center; font-size: 16px; margin: 5px; font-weight: w300;'> [Note: You can obtain segmentation maps for your image using the <a href='https://huggingface.co/spaces/shi-labs/OneFormer' style='text-decoration:none' target='_blank'>OneFormer Demo</a>. Please click on Regenerate button if you are unsatisfied with the generated response. You may find screenshots of our demo trials <a href='https://github.com/SHI-Labs/VCoder/blob/main/images/' style='text-decoration:none' target='_blank'>here</a>.]</p>"
 tos_markdown = ("""
 ### Terms of use
 By using this service, users are required to agree to the following terms:
+The service is a research preview intended for non-commercial use only. It only provides limited safety measures and may generate offensive content. It must not be used for any illegal, harmful, violent, racist, or sexual purposes.
 """)
 learn_more_markdown = ("""
 ### License
+The service is a research preview intended for non-commercial use only, subject to the [License](https://huggingface.co/lmsys/vicuna-7b-v1.5) of Vicuna-v1.5, [License](https://github.com/haotian-liu/LLaVA/blob/main/LICENSE) of LLaVA, [Terms of Use](https://cocodataset.org/#termsofuse) of the COCO dataset, [Terms of Use](https://openai.com/policies/terms-of-use) of the data generated by OpenAI, and [Privacy Practices](https://chrome.google.com/webstore/detail/sharegpt-share-your-chatg/daiacboceoaocpibfodeljbdfacokfjb) of ShareGPT. Please contact us if you find any potential violation.
 """)
 block_css = """
         state = gr.State()
         if not embed_mode:
+            gr.Markdown(title)
+            gr.Markdown(description)
         with gr.Row():
             with gr.Column(scale=3):
                     value="Default",
                     label="Preprocess for non-square Seg Map", visible=False)
                 with gr.Accordion("Parameters", open=False) as parameter_row:
+                    temperature = gr.Slider(minimum=0.0, maximum=1.0, value=0.8, step=0.1, interactive=True, label="Temperature",)
+                    top_p = gr.Slider(minimum=0.0, maximum=1.0, value=0.9, step=0.1, interactive=True, label="Top P",)
                     max_output_tokens = gr.Slider(minimum=0, maximum=1024, value=512, step=64, interactive=True, label="Max output tokens",)
             with gr.Column(scale=8):
                     regenerate_btn = gr.Button(value="🔄  Regenerate", interactive=False)
                     clear_btn = gr.Button(value="🗑️  Clear", interactive=False)
+        cur_dir = os.path.dirname(os.path.abspath(__file__))
+        gr.Examples(examples=[
+            [f"{cur_dir}/examples/people.jpg", f"{cur_dir}/examples/people_pan.png", "What objects can be seen in the image?", "0.9", "1.0"],
+            [f"{cur_dir}/examples/corgi.jpg", f"{cur_dir}/examples/corgi_pan.png", "What objects can be seen in the image?", "0.6", "0.7"],
+            [f"{cur_dir}/examples/friends.jpg", f"{cur_dir}/examples/friends_pan.png", "Can you count the number of people in the image?", "0.8", "0.9"],
+            [f"{cur_dir}/examples/friends.jpg", f"{cur_dir}/examples/friends_pan.png", "What is happening in the image?", "0.8", "0.9"],
+            [f"{cur_dir}/examples/suits.jpg", f"{cur_dir}/examples/suits_pan.png", "What objects can be seen in the image?", "0.5", "0.5"],
+            [f"{cur_dir}/examples/suits.jpg", f"{cur_dir}/examples/suits_ins.png", "What objects can be seen in the image?", "0.5", "0.5"],
+        ], inputs=[imagebox, segbox, textbox, temperature, top_p])
         if not embed_mode:
             gr.Markdown(tos_markdown)
             gr.Markdown(learn_more_markdown)
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
+    parser.add_argument("--model-path", type=str, default="shi-labs/vcoder_ds_llava-v1.5-13b")
     parser.add_argument("--model-base", type=str, default=None)
     parser.add_argument("--model-name", type=str)
     parser.add_argument("--load-8bit", action="store_true")
         server_name=args.host,
         server_port=args.port,
         share=args.share
+    )