|
import os |
|
import gradio as gr |
|
from transformers import pipeline |
|
|
|
from transformers import AutoProcessor, BlipForQuestionAnswering |
|
|
|
processor = AutoProcessor.from_pretrained( |
|
"Salesforce/blip-vqa-base") |
|
|
|
model = BlipForQuestionAnswering.from_pretrained( |
|
"Salesforce/blip-vqa-base") |
|
|
|
def launch(pil_image, question): |
|
inputs = processor(pil_image, question, return_tensors="pt") |
|
out = model.generate(**inputs) |
|
return processor.decode(out[0], skip_special_tokens=True) |
|
|
|
iface = gr.Interface(fn=launch, |
|
inputs=[gr.Image(label="Input image", type='pil'), |
|
gr.Textbox(label="Question", lines=3)], |
|
outputs=[gr.Textbox(label="Answer", lines=3)], |
|
title="Image Q&A with Salesforce BLIP", |
|
description="1. Upload an image.\n2. Type a question.\n3. Press submit.\n4. Get an answer.", |
|
allow_flagging="never" |
|
) |
|
|
|
iface.launch() |