Spaces:

marcellohalfeld
/

ibm_1

Sleeping

marcellohalfeld commited on Jul 9, 2024

Commit

fdbf104

verified ·

1 Parent(s): f8ad3f6

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -3,20 +3,34 @@ import numpy as np
 from PIL import Image
 from transformers import AutoProcessor, BlipForConditionalGeneration
-processor = # write your code here
-model = # write your code here
 def caption_image(input_image: np.ndarray):
     # Convert numpy array to PIL Image and convert to RGB
     raw_image = Image.fromarray(input_image).convert('RGB')
     # Process the image
     # Generate a caption for the image
     # Decode the generated tokens to text and store it into `caption`
     return caption

 from PIL import Image
 from transformers import AutoProcessor, BlipForConditionalGeneration
+# HuggingFace
+# Load model directly
+from transformers import AutoProcessor, AutoModelForSeq2SeqLM
+processor = AutoProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
+model = AutoModelForSeq2SeqLM.from_pretrained("Salesforce/blip-image-captioning-base")
+#processor = # write your code here
+#model = # write your code here
 def caption_image(input_image: np.ndarray):
     # Convert numpy array to PIL Image and convert to RGB
     raw_image = Image.fromarray(input_image).convert('RGB')
     # Process the image
+    # You do not need a question for image captioning
+    text = "the image of"
+    inputs = processor(images=image, text=text, return_tensors="pt")
     # Generate a caption for the image
+    # Generate a caption for the image
+    outputs = model.generate(**inputs, max_length=50)
     # Decode the generated tokens to text and store it into `caption`
+    # Decode the generated tokens to text
+    caption = processor.decode(outputs[0], skip_special_tokens=True)
+    # Print the caption
+    #print(caption)
     return caption