Falcon2-11b-FalconVLM

Runtime error

App Files Files Community

ysharma HF staff commited on May 21

Commit

4005ef3

•

1 Parent(s): c7462b6

Update app.py

Browse files

Files changed (1) hide show

app.py +38 -11

app.py CHANGED Viewed

@@ -4,12 +4,14 @@ from threading import Thread
 import gradio as gr
 import torch
 from PIL import Image
-from transformers import AutoProcessor, LlavaForConditionalGeneration
 from transformers import TextIteratorStreamer
 import spaces
 PLACEHOLDER = """
 <div style="padding: 30px; text-align: center; display: flex; flex-direction: column; align-items: center;">
    <img src="https://cdn-uploads.huggingface.co/production/uploads/64ccdc322e592905f922a06e/DDIW0kbWmdOQWwy4XMhwX.png" style="width: 80%; max-width: 550px; height: auto; opacity: 0.55;  ">
@@ -17,20 +19,45 @@ PLACEHOLDER = """
    <p style="font-size: 18px; margin-bottom: 2px; opacity: 0.65;">Llava-Llama-3-8b is a LLaVA model fine-tuned from Meta-Llama-3-8B-Instruct and CLIP-ViT-Large-patch14-336 with ShareGPT4V-PT and InternVL-SFT by XTuner</p>
 </div>
 """
-model_id = "xtuner/llava-llama-3-8b-v1_1-transformers"
-processor = AutoProcessor.from_pretrained(model_id)
-model = LlavaForConditionalGeneration.from_pretrained(
-    model_id,
-    torch_dtype=torch.float16,
-    low_cpu_mem_usage=True,
-)
 model.to("cuda:0")
-model.generation_config.eos_token_id = 128009
 @spaces.GPU
@@ -91,7 +118,7 @@ chat_input = gr.MultimodalTextbox(interactive=True, file_types=["image"], placeh
 with gr.Blocks(fill_height=True, ) as demo:
     gr.ChatInterface(
     fn=bot_streaming,
-    title="LLaVA Llama-3-8B",
     examples=[{"text": "What is on the flower?", "files": ["./bee.jpg"]},
               {"text": "How to make this pastry?", "files": ["./baklava.png"]}],
     description="Try [LLaVA Llama-3-8B](https://huggingface.co/xtuner/llava-llama-3-8b-v1_1-transformers). Upload an image and start chatting about it, or simply try one of the examples below. If you don't upload an image, you will receive an error.",

 import gradio as gr
 import torch
 from PIL import Image
+#from transformers import AutoProcessor, LlavaForConditionalGeneration
 from transformers import TextIteratorStreamer
+from transformers import LlavaNextForConditionalGeneration, LlavaNextProcessor
+from PIL import Image
+import requests
 import spaces
 PLACEHOLDER = """
 <div style="padding: 30px; text-align: center; display: flex; flex-direction: column; align-items: center;">
    <img src="https://cdn-uploads.huggingface.co/production/uploads/64ccdc322e592905f922a06e/DDIW0kbWmdOQWwy4XMhwX.png" style="width: 80%; max-width: 550px; height: auto; opacity: 0.55;  ">
    <p style="font-size: 18px; margin-bottom: 2px; opacity: 0.65;">Llava-Llama-3-8b is a LLaVA model fine-tuned from Meta-Llama-3-8B-Instruct and CLIP-ViT-Large-patch14-336 with ShareGPT4V-PT and InternVL-SFT by XTuner</p>
 </div>
 """
+#####################
+'''processor = LlavaNextProcessor.from_pretrained("tiiuae/falcon-11B-vlm", tokenizer_class='PreTrainedTokenizerFast')
+model = LlavaNextForConditionalGeneration.from_pretrained("tiiuae/falcon-11B-vlm", torch_dtype=torch.bfloat16)
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+cats_image = Image.open(requests.get(url, stream=True).raw)
+instruction = 'Write a long paragraph about this picture.'
+prompt = f"""User:<image>\n{instruction} Falcon:"""
+inputs = processor(prompt, images=cats_image, return_tensors="pt", padding=True).to('cuda:0')
+model.to('cuda:0')
+output = model.generate(**inputs, max_new_tokens=256)
+prompt_length = inputs['input_ids'].shape[1]
+generated_captions = processor.decode(output[0], skip_special_tokens=True).strip()
+print(generated_captions)
+'''
+#############################
+#model_id = "xtuner/llava-llama-3-8b-v1_1-transformers"
+model_id = "tiiuae/falcon-11B-vlm"
+#processor = AutoProcessor.from_pretrained(model_id)
+processor = LlavaNextProcessor.from_pretrained("tiiuae/falcon-11B-vlm", tokenizer_class='PreTrainedTokenizerFast')
+model = LlavaNextForConditionalGeneration.from_pretrained("tiiuae/falcon-11B-vlm", torch_dtype=torch.bfloat16)
+#model = LlavaForConditionalGeneration.from_pretrained(
+#    model_id,
+#    torch_dtype=torch.float16,
+#    low_cpu_mem_usage=True,
+#)
 model.to("cuda:0")
+#model.generation_config.eos_token_id = 128009
 @spaces.GPU
 with gr.Blocks(fill_height=True, ) as demo:
     gr.ChatInterface(
     fn=bot_streaming,
+    title="FalconVLM",
     examples=[{"text": "What is on the flower?", "files": ["./bee.jpg"]},
               {"text": "How to make this pastry?", "files": ["./baklava.png"]}],
     description="Try [LLaVA Llama-3-8B](https://huggingface.co/xtuner/llava-llama-3-8b-v1_1-transformers). Upload an image and start chatting about it, or simply try one of the examples below. If you don't upload an image, you will receive an error.",