Spaces:

khulnasoft
/

HuggingSpaces

Runtime error

App Files Files Community

khulnasoft commited on Jul 6, 2024

Commit

395e92a

verified ·

1 Parent(s): 3dcc707

Create demos/demo_parse_task.json

Browse files

Files changed (1) hide show

demos/demo_parse_task.json +365 -0

demos/demo_parse_task.json ADDED Viewed

	@@ -0,0 +1,365 @@

+[
+    {
+        "role": "user",
+        "content": "Give you some pictures e1.jpg, e2.png, e3.jpg, help me count the number of sheep?"
+    },
+    {
+        "role": "assistant",
+        "content": [
+            {
+                "task": "image-to-text",
+                "id": 0,
+                "dep": [-1],
+                "args": {
+                    "image": "e1.jpg"
+                }
+            },
+            {
+                "task": "object-detection",
+                "id": 1,
+                "dep": [-1],
+                "args": {
+                    "image": "e1.jpg"
+                }
+            },
+            {
+                "task": "visual-question-answering",
+                "id": 2,
+                "dep": [1],
+                "args": {
+                    "image": "<GENERATED>-1",
+                    "text": "How many sheep in the picture"
+                }
+            },
+            {
+                "task": "image-to-text",
+                "id": 3,
+                "dep": [-1],
+                "args": {
+                    "image": "e2.png"
+                }
+            },
+            {
+                "task": "object-detection",
+                "id": 4,
+                "dep": [-1],
+                "args": {
+                    "image": "e2.png"
+                }
+            },
+            {
+                "task": "visual-question-answering",
+                "id": 5,
+                "dep": [4],
+                "args": {
+                    "image": "<GENERATED>-4",
+                    "text": "How many sheep in the picture"
+                }
+            },
+            {
+                "task": "image-to-text",
+                "id": 6,
+                "dep": [-1],
+                "args": {
+                    "image": "e3.jpg"
+                }
+            },
+            {
+                "task": "object-detection",
+                "id": 7,
+                "dep": [-1],
+                "args": {
+                    "image": "e3.jpg"
+                }
+            },
+            {
+                "task": "visual-question-answering",
+                "id": 8,
+                "dep": [7],
+                "args": {
+                    "image": "<GENERATED>-7",
+                    "text": "How many sheep in the picture"
+                }
+            }
+        ]
+    },
+    {
+        "role": "user",
+        "content": "Look at /e.jpg, can you tell me how many objects in the picture? Give me a picture and video similar to this one."
+    },
+    {
+        "role": "assistant",
+        "content": [
+            {
+                "task": "image-to-text",
+                "id": 0,
+                "dep": [-1],
+                "args": {
+                    "image": "/e.jpg"
+                }
+            },
+            {
+                "task": "object-detection",
+                "id": 1,
+                "dep": [-1],
+                "args": {
+                    "image": "/e.jpg"
+                }
+            },
+            {
+                "task": "visual-question-answering",
+                "id": 2,
+                "dep": [1],
+                "args": {
+                    "image": "<GENERATED>-1",
+                    "text": "how many objects in the picture?"
+                }
+            },
+            {
+                "task": "text-to-image",
+                "id": 3,
+                "dep": [0],
+                "args": {
+                    "text": "<GENERATED-0>"
+                }
+            },
+            {
+                "task": "image-to-image",
+                "id": 4,
+                "dep": [-1],
+                "args": {
+                    "image": "/e.jpg"
+                }
+            },
+            {
+                "task": "text-to-video",
+                "id": 5,
+                "dep": [0],
+                "args": {
+                    "text": "<GENERATED-0>"
+                }
+            }
+        ]
+    },
+    {
+        "role": "user",
+        "content": "given a document /images/e.jpeg, answer me what is the student amount? And describe the image with your voice"
+    },
+    {
+        "role": "assistant",
+        "content": [
+            {
+                "task": "document-question-answering",
+                "id": 0,
+                "dep": [-1],
+                "args": {
+                    "image": "/images/e.jpeg",
+                    "text": "what is the student amount?"
+                }
+            },
+            {
+                "task": "visual-question-answering",
+                "id": 1,
+                "dep": [-1],
+                "args": {
+                    "image": "/images/e.jpeg",
+                    "text": "what is the student amount?"
+                }
+            },
+            {
+                "task": "image-to-text",
+                "id": 2,
+                "dep": [-1],
+                "args": {
+                    "image": "/images/e.jpeg"
+                }
+            },
+            {
+                "task": "text-to-speech",
+                "id": 3,
+                "dep": [2],
+                "args": {
+                    "text": "<GENERATED>-2"
+                }
+            }
+        ]
+    },
+    {
+        "role": "user",
+        "content": "Given an image /example.jpg, first generate a pose image, then based on the pose generate a new image where a girl is reading a book"
+    },
+    {
+        "role": "assistant",
+        "content": [
+            {
+                "task": "openpose-control",
+                "id": 0,
+                "dep": [-1],
+                "args": {
+                    "image": "/example.jpg"
+                }
+            },
+            {
+                "task": "openpose-text-to-image",
+                "id": 1,
+                "dep": [0],
+                "args": {
+                    "text": "a girl is reading a book",
+                    "image": "<GENERATED>-0"
+                }
+            }
+        ]
+    },
+    {
+        "role": "user",
+        "content": "please show me a video and an image of (based on the text) 'a boy is running' and dub it"
+    },
+    {
+        "role": "assistant",
+        "content": [
+            {
+                "task": "text-to-video",
+                "id": 0,
+                "dep": [-1],
+                "args": {
+                    "text": "a boy is running"
+                }
+            },
+            {
+                "task": "text-to-speech",
+                "id": 1,
+                "dep": [-1],
+                "args": {
+                    "text": "a boy is running"
+                }
+            },
+            {
+                "task": "text-to-image",
+                "id": 2,
+                "dep": [-1],
+                "args": {
+                    "text": "a boy is running"
+                }
+            }
+        ]
+    },
+    {
+        "role": "user",
+        "content": "please show me a joke and an image of cat"
+    },
+    {
+        "role": "assistant",
+        "content": [
+            {
+                "task": "conversational",
+                "id": 0,
+                "dep": [-1],
+                "args": {
+                    "text": "please show me a joke of cat"
+                }
+            },
+            {
+                "task": "text-to-image",
+                "id": 1,
+                "dep": [-1],
+                "args": {
+                    "text": "a photo of cat"
+                }
+            }
+        ]
+    },
+    {
+        "role": "user",
+        "content": "give me a picture about a cut dog, then describe the image to me and tell a story about it"
+    },
+    {
+        "role": "assistant",
+        "content": [
+            {
+                "task": "text-to-image",
+                "id": 0,
+                "dep": [-1],
+                "args": {
+                    "text": "a picture of a cute dog"
+                }
+            },
+            {
+                "task": "image-to-text",
+                "id": 1,
+                "dep": [0],
+                "args": {
+                    "image": "<GENERATED>-0"
+                }
+            },
+            {
+                "task": "text-generation",
+                "id": 2,
+                "dep": [1],
+                "args": {
+                    "text": "<GENERATED>-1"
+                }
+            },
+            {
+                "task": "text-to-speech",
+                "id": 3,
+                "dep": [2],
+                "args": {
+                    "text": "<GENERATED>-2"
+                }
+            }
+        ]
+    },
+    {
+        "role": "user",
+        "content": "give you a picture /example.jpg, what's in it and tell me a joke about it"
+    },
+    {
+        "role": "assistant",
+        "content": [
+            {
+                "task": "image-to-text",
+                "id": 0,
+                "dep": [-1],
+                "args": {
+                    "image": "/example.jpg"
+                }
+            },
+            {
+                "task": "object-detection",
+                "id": 1,
+                "dep": [-1],
+                "args": {
+                    "image": "/example.jpg"
+                }
+            },
+            {
+                "task": "conversational",
+                "id": 2,
+                "dep": [0],
+                "args": {
+                    "text": "<GENERATED>-0"
+                }
+            },
+            {
+                "task": "text-to-speech",
+                "id": 3,
+                "dep": [2],
+                "args": {
+                    "text": "<GENERATED>-1"
+                }
+            }
+        ]
+    }
+]