Update README.md
Browse files
README.md
CHANGED
@@ -31,7 +31,15 @@ https://llava-vl.github.io/
|
|
31 |
## How to use the model
|
32 |
|
33 |
First, make sure to have `transformers >= 4.35.3`.
|
34 |
-
The model supports multi-image and multi-prompt generation. Meaning that you can pass multiple images in your prompt. Make sure also to follow the correct prompt template
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
|
36 |
### Using `pipeline`:
|
37 |
|
@@ -46,7 +54,8 @@ pipe = pipeline("image-to-text", model=model_id)
|
|
46 |
url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/ai2d-demo.jpg"
|
47 |
|
48 |
image = Image.open(requests.get(url, stream=True).raw)
|
49 |
-
|
|
|
50 |
|
51 |
outputs = pipe(image, prompt=prompt, generate_kwargs={"max_new_tokens": 200})
|
52 |
print(outputs)
|
@@ -65,7 +74,9 @@ from transformers import AutoProcessor, VipLlavaForConditionalGeneration
|
|
65 |
|
66 |
model_id = "llava-hf/vip-llava-7b-hf"
|
67 |
|
68 |
-
|
|
|
|
|
69 |
image_file = "http://images.cocodataset.org/val2017/000000039769.jpg"
|
70 |
|
71 |
model = VipLlavaForConditionalGeneration.from_pretrained(
|
|
|
31 |
## How to use the model
|
32 |
|
33 |
First, make sure to have `transformers >= 4.35.3`.
|
34 |
+
The model supports multi-image and multi-prompt generation. Meaning that you can pass multiple images in your prompt. Make sure also to follow the correct prompt template and add the token `<image>` to the location where you want to query images:
|
35 |
+
|
36 |
+
According to the official code base, it is recommeneded to use this template:
|
37 |
+
|
38 |
+
```bash
|
39 |
+
A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.###Human: <image>\n<prompt>###Assistant:
|
40 |
+
```
|
41 |
+
|
42 |
+
Where `<prompt>` denotes the prompt asked by the user
|
43 |
|
44 |
### Using `pipeline`:
|
45 |
|
|
|
54 |
url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/ai2d-demo.jpg"
|
55 |
|
56 |
image = Image.open(requests.get(url, stream=True).raw)
|
57 |
+
question = "What does the label 15 represent? (1) lava (2) core (3) tunnel (4) ash cloud"
|
58 |
+
prompt = f"A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.###Human: <image>\n{question}###Assistant:"
|
59 |
|
60 |
outputs = pipe(image, prompt=prompt, generate_kwargs={"max_new_tokens": 200})
|
61 |
print(outputs)
|
|
|
74 |
|
75 |
model_id = "llava-hf/vip-llava-7b-hf"
|
76 |
|
77 |
+
question = "What are these?"
|
78 |
+
prompt = f"A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.###Human: <image>\n{question}###Assistant:"
|
79 |
+
|
80 |
image_file = "http://images.cocodataset.org/val2017/000000039769.jpg"
|
81 |
|
82 |
model = VipLlavaForConditionalGeneration.from_pretrained(
|