noamrot commited on
Commit
6c77aaa
1 Parent(s): 85e9f46

demo fix commit

Browse files
Files changed (1) hide show
  1. app.py +51 -0
app.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # import gradio as gr
2
+
3
+ # def greet(image):
4
+ # return "Shape " + image.shape + "!!"
5
+
6
+ # iface = gr.Interface(fn=greet, inputs="image", outputs="text")
7
+ # iface.launch()
8
+
9
+
10
+ import sys
11
+ from IPython.display import display, HTML
12
+ from BLIP.models.blip import blip_decoder
13
+ from google_drive_downloader import GoogleDriveDownloader as gdd
14
+ from PIL import Image
15
+ import requests
16
+ import torch
17
+ from torchvision import transforms
18
+ from torchvision.transforms.functional import InterpolationMode
19
+ from urllib.parse import urlparse
20
+ from google_drive_downloader import GoogleDriveDownloader as gdd
21
+
22
+
23
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
24
+
25
+
26
+ transform = transforms.Compose([
27
+ transforms.Resize((image_size,image_size),interpolation=InterpolationMode.BICUBIC),
28
+ transforms.ToTensor(),
29
+ transforms.Normalize((0.48145466, 0.4578275, 0.40821073), (0.26862954, 0.26130258, 0.27577711))
30
+ ])
31
+
32
+ model_url = "https://technionmail-my.sharepoint.com/personal/snoamr_campus_technion_ac_il/_layouts/15/download.aspx?share=EZxgXQaBXGREgDsQiaTcwAAB0z8jQA_hgAnwwPQDt8Dgew"
33
+ model = blip_decoder(pretrained=model_url, image_size=384, vit='base')
34
+ model.eval()
35
+ model = model.to(device)
36
+
37
+ def inference(raw_image):
38
+ image = transform(raw_image).unsqueeze(0).to(device)
39
+ with torch.no_grad():
40
+ caption = model.generate(image, sample=False, num_beams=1, max_length=60, min_length=5)
41
+ return caption[0]
42
+
43
+
44
+ inputs = [gr.Image(type='pil', interactive=False),]
45
+ outputs = gr.outputs.Textbox(label="Caption")
46
+
47
+ title = "FuseCap"
48
+ description = "Gradio demo for FuseCap: Leveraging Large Language Models to Fuse Visual Data into Enriched Image Captions. This demo features a BLIP-based model, trained using FuseCap."
49
+
50
+ article = "place holder"
51
+ gr.Interface(inference, inputs, outputs, title=title, description=description, article=article, examples=[['birthday_dog.jpeg']]).launch(enable_queue=True)