Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -4,12 +4,14 @@ from threading import Thread
|
|
4 |
import gradio as gr
|
5 |
import torch
|
6 |
from PIL import Image
|
7 |
-
from transformers import AutoProcessor, LlavaForConditionalGeneration
|
8 |
from transformers import TextIteratorStreamer
|
|
|
|
|
|
|
9 |
|
10 |
import spaces
|
11 |
|
12 |
-
|
13 |
PLACEHOLDER = """
|
14 |
<div style="padding: 30px; text-align: center; display: flex; flex-direction: column; align-items: center;">
|
15 |
<img src="https://cdn-uploads.huggingface.co/production/uploads/64ccdc322e592905f922a06e/DDIW0kbWmdOQWwy4XMhwX.png" style="width: 80%; max-width: 550px; height: auto; opacity: 0.55; ">
|
@@ -17,20 +19,45 @@ PLACEHOLDER = """
|
|
17 |
<p style="font-size: 18px; margin-bottom: 2px; opacity: 0.65;">Llava-Llama-3-8b is a LLaVA model fine-tuned from Meta-Llama-3-8B-Instruct and CLIP-ViT-Large-patch14-336 with ShareGPT4V-PT and InternVL-SFT by XTuner</p>
|
18 |
</div>
|
19 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
|
|
|
|
|
21 |
|
22 |
-
|
|
|
23 |
|
24 |
-
|
25 |
|
26 |
-
model = LlavaForConditionalGeneration.from_pretrained(
|
27 |
-
model_id,
|
28 |
-
torch_dtype=torch.float16,
|
29 |
-
low_cpu_mem_usage=True,
|
30 |
-
)
|
31 |
|
32 |
model.to("cuda:0")
|
33 |
-
model.generation_config.eos_token_id = 128009
|
34 |
|
35 |
|
36 |
@spaces.GPU
|
@@ -91,7 +118,7 @@ chat_input = gr.MultimodalTextbox(interactive=True, file_types=["image"], placeh
|
|
91 |
with gr.Blocks(fill_height=True, ) as demo:
|
92 |
gr.ChatInterface(
|
93 |
fn=bot_streaming,
|
94 |
-
title="
|
95 |
examples=[{"text": "What is on the flower?", "files": ["./bee.jpg"]},
|
96 |
{"text": "How to make this pastry?", "files": ["./baklava.png"]}],
|
97 |
description="Try [LLaVA Llama-3-8B](https://huggingface.co/xtuner/llava-llama-3-8b-v1_1-transformers). Upload an image and start chatting about it, or simply try one of the examples below. If you don't upload an image, you will receive an error.",
|
|
|
4 |
import gradio as gr
|
5 |
import torch
|
6 |
from PIL import Image
|
7 |
+
#from transformers import AutoProcessor, LlavaForConditionalGeneration
|
8 |
from transformers import TextIteratorStreamer
|
9 |
+
from transformers import LlavaNextForConditionalGeneration, LlavaNextProcessor
|
10 |
+
from PIL import Image
|
11 |
+
import requests
|
12 |
|
13 |
import spaces
|
14 |
|
|
|
15 |
PLACEHOLDER = """
|
16 |
<div style="padding: 30px; text-align: center; display: flex; flex-direction: column; align-items: center;">
|
17 |
<img src="https://cdn-uploads.huggingface.co/production/uploads/64ccdc322e592905f922a06e/DDIW0kbWmdOQWwy4XMhwX.png" style="width: 80%; max-width: 550px; height: auto; opacity: 0.55; ">
|
|
|
19 |
<p style="font-size: 18px; margin-bottom: 2px; opacity: 0.65;">Llava-Llama-3-8b is a LLaVA model fine-tuned from Meta-Llama-3-8B-Instruct and CLIP-ViT-Large-patch14-336 with ShareGPT4V-PT and InternVL-SFT by XTuner</p>
|
20 |
</div>
|
21 |
"""
|
22 |
+
#####################
|
23 |
+
|
24 |
+
'''processor = LlavaNextProcessor.from_pretrained("tiiuae/falcon-11B-vlm", tokenizer_class='PreTrainedTokenizerFast')
|
25 |
+
model = LlavaNextForConditionalGeneration.from_pretrained("tiiuae/falcon-11B-vlm", torch_dtype=torch.bfloat16)
|
26 |
+
|
27 |
+
url = "http://images.cocodataset.org/val2017/000000039769.jpg"
|
28 |
+
cats_image = Image.open(requests.get(url, stream=True).raw)
|
29 |
+
instruction = 'Write a long paragraph about this picture.'
|
30 |
+
|
31 |
+
prompt = f"""User:<image>\n{instruction} Falcon:"""
|
32 |
+
inputs = processor(prompt, images=cats_image, return_tensors="pt", padding=True).to('cuda:0')
|
33 |
+
|
34 |
+
model.to('cuda:0')
|
35 |
+
output = model.generate(**inputs, max_new_tokens=256)
|
36 |
+
|
37 |
+
|
38 |
+
prompt_length = inputs['input_ids'].shape[1]
|
39 |
+
generated_captions = processor.decode(output[0], skip_special_tokens=True).strip()
|
40 |
+
|
41 |
+
print(generated_captions)
|
42 |
+
'''
|
43 |
+
#############################
|
44 |
|
45 |
+
#model_id = "xtuner/llava-llama-3-8b-v1_1-transformers"
|
46 |
+
model_id = "tiiuae/falcon-11B-vlm"
|
47 |
|
48 |
+
#processor = AutoProcessor.from_pretrained(model_id)
|
49 |
+
processor = LlavaNextProcessor.from_pretrained("tiiuae/falcon-11B-vlm", tokenizer_class='PreTrainedTokenizerFast')
|
50 |
|
51 |
+
model = LlavaNextForConditionalGeneration.from_pretrained("tiiuae/falcon-11B-vlm", torch_dtype=torch.bfloat16)
|
52 |
|
53 |
+
#model = LlavaForConditionalGeneration.from_pretrained(
|
54 |
+
# model_id,
|
55 |
+
# torch_dtype=torch.float16,
|
56 |
+
# low_cpu_mem_usage=True,
|
57 |
+
#)
|
58 |
|
59 |
model.to("cuda:0")
|
60 |
+
#model.generation_config.eos_token_id = 128009
|
61 |
|
62 |
|
63 |
@spaces.GPU
|
|
|
118 |
with gr.Blocks(fill_height=True, ) as demo:
|
119 |
gr.ChatInterface(
|
120 |
fn=bot_streaming,
|
121 |
+
title="FalconVLM",
|
122 |
examples=[{"text": "What is on the flower?", "files": ["./bee.jpg"]},
|
123 |
{"text": "How to make this pastry?", "files": ["./baklava.png"]}],
|
124 |
description="Try [LLaVA Llama-3-8B](https://huggingface.co/xtuner/llava-llama-3-8b-v1_1-transformers). Upload an image and start chatting about it, or simply try one of the examples below. If you don't upload an image, you will receive an error.",
|