marcellohalfeld commited on
Commit
fdbf104
1 Parent(s): f8ad3f6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -3
app.py CHANGED
@@ -3,20 +3,34 @@ import numpy as np
3
  from PIL import Image
4
  from transformers import AutoProcessor, BlipForConditionalGeneration
5
 
6
- processor = # write your code here
7
- model = # write your code here
 
 
 
 
 
 
8
 
9
  def caption_image(input_image: np.ndarray):
10
  # Convert numpy array to PIL Image and convert to RGB
11
  raw_image = Image.fromarray(input_image).convert('RGB')
12
 
13
  # Process the image
 
 
 
14
 
15
 
16
  # Generate a caption for the image
17
-
 
18
 
19
  # Decode the generated tokens to text and store it into `caption`
 
 
 
 
20
 
21
 
22
  return caption
 
3
  from PIL import Image
4
  from transformers import AutoProcessor, BlipForConditionalGeneration
5
 
6
+ # HuggingFace
7
+ # Load model directly
8
+ from transformers import AutoProcessor, AutoModelForSeq2SeqLM
9
+
10
+ processor = AutoProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
11
+ model = AutoModelForSeq2SeqLM.from_pretrained("Salesforce/blip-image-captioning-base")
12
+ #processor = # write your code here
13
+ #model = # write your code here
14
 
15
  def caption_image(input_image: np.ndarray):
16
  # Convert numpy array to PIL Image and convert to RGB
17
  raw_image = Image.fromarray(input_image).convert('RGB')
18
 
19
  # Process the image
20
+ # You do not need a question for image captioning
21
+ text = "the image of"
22
+ inputs = processor(images=image, text=text, return_tensors="pt")
23
 
24
 
25
  # Generate a caption for the image
26
+ # Generate a caption for the image
27
+ outputs = model.generate(**inputs, max_length=50)
28
 
29
  # Decode the generated tokens to text and store it into `caption`
30
+ # Decode the generated tokens to text
31
+ caption = processor.decode(outputs[0], skip_special_tokens=True)
32
+ # Print the caption
33
+ #print(caption)
34
 
35
 
36
  return caption