SSahas commited on
Commit
b3fb035
·
1 Parent(s): 7a9a8e9

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -0
app.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ #import requests
3
+ from PIL import Image
4
+ from transformers import BlipProcessor, BlipForConditionalGeneration
5
+
6
+ processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
7
+ model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
8
+
9
+ #num_captions = gr.Dropdown([1, 2, 3, 4,5],label = "select no.of captions to generate")
10
+
11
+ def caption_generator(image, num_captions):
12
+ num_captions = int(float(num_captions))
13
+ raw_image = Image.fromarray(image).convert('RGB')
14
+ inputs = processor(raw_image, return_tensors="pt")
15
+ out = model.generate(
16
+ **inputs,
17
+ num_return_sequences=num_captions, # generate 3 captions
18
+ max_length=32, # maximum length of generated captions
19
+ early_stopping=True, # stop generating captions when all beam hypotheses have finished
20
+ num_beams=num_captions, # number of beams for beam search
21
+ no_repeat_ngram_size=2, # avoid repeating n-grams of size 2 or larger
22
+ length_penalty=0.8 # higher penalty value will encourage shorter captions
23
+ )
24
+ captions = ""
25
+ for i, caption in enumerate(out):
26
+ captions = captions +processor.decode(caption, skip_special_tokens=True) + " ,"
27
+ return captions
28
+
29
+ gr.Interface(caption_generator, inputs= [gr.inputs.Image(), gr.Dropdown([1, 2, 3, 4,5],value = [2], label = "select no.of captions to generate")], outputs = gr.outputs.Textbox(), live = True).launch()