Spaces:

fffiloni
/

llm-from-image

Sleeping

App Files Files Community

fffiloni commited on Feb 4, 2024

Commit

ec771a2

verified ·

1 Parent(s): 8d15222

Update app.py

Browse files

Files changed (1) hide show

app.py +18 -10

app.py CHANGED Viewed

@@ -8,7 +8,7 @@ from gradio_client import Client
 #fuyu_client = Client("https://adept-fuyu-8b-demo.hf.space/")
 kosmos2_client = Client("https://ydshieh-kosmos-2.hf.space/")
-def get_caption(image_in):
     """
     fuyu_result = fuyu_client.predict(
 	    image_in,	# str representing input in 'raw_image' Image component
@@ -106,11 +106,12 @@ Here's another example. If a user types, "In the image, there is a drawing of a
     return outputs
-def infer(image_in):
     gr.Info("Getting image description...")
-    user_prompt = get_caption_from_MD(image_in)
     gr.Info("Building a system according to the image caption ...")
     outputs = get_llm_idea(user_prompt)
@@ -148,6 +149,14 @@ with gr.Blocks(css=css) as demo:
                     type = "filepath",
                     elem_id = "image-in"
                 )
                 submit_btn = gr.Button("Make LLM system from my pic !")
             with gr.Column():
                 caption = gr.Textbox(
@@ -172,15 +181,14 @@ with gr.Blocks(css=css) as demo:
                     ["examples/chicken_adobo.jpeg"]
                 ],
                 fn = infer,
-                inputs = [image_in],
-                outputs = [caption, result],
-                cache_examples = False
             )
     submit_btn.click(
         fn = infer,
         inputs = [
-            image_in
         ],
         outputs =[
             caption,
@@ -188,4 +196,4 @@ with gr.Blocks(css=css) as demo:
         ]
     )
-demo.queue().launch(show_api=False)

 #fuyu_client = Client("https://adept-fuyu-8b-demo.hf.space/")
 kosmos2_client = Client("https://ydshieh-kosmos-2.hf.space/")
+def get_caption_from_kosmos(image_in):
     """
     fuyu_result = fuyu_client.predict(
 	    image_in,	# str representing input in 'raw_image' Image component
     return outputs
+def infer(image_in, cap_type):
     gr.Info("Getting image description...")
+    if cap_type == "Fictional" :
+        user_prompt = get_caption_from_MD(image_in)
+    elif cap_type == "Literal" :
+        user_prompt = get_caption_from_kosmos(image_in)
     gr.Info("Building a system according to the image caption ...")
     outputs = get_llm_idea(user_prompt)
                     type = "filepath",
                     elem_id = "image-in"
                 )
+                cap_type = gr.Radio(
+                    label = "Caption type",
+                    choices = [
+                        "Literal",
+                        "Fictional"
+                    ],
+                    value = "Fictional"
+                )
                 submit_btn = gr.Button("Make LLM system from my pic !")
             with gr.Column():
                 caption = gr.Textbox(
                     ["examples/chicken_adobo.jpeg"]
                 ],
                 fn = infer,
+                inputs = [image_in, cap_type]
             )
     submit_btn.click(
         fn = infer,
         inputs = [
+            image_in,
+            cap_type
         ],
         outputs =[
             caption,
         ]
     )
+demo.queue().launch(show_api=False, show_error=True)