adil9858's picture
Update app.py
3930e60 verified
import streamlit as st
from transformers import AutoProcessor, AutoModelForCausalLM
from PIL import Image
import torch
# Load the Florence model and processor
@st.cache_resource
def load_model():
model_id = 'microsoft/Florence-2-large'
model = AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True).eval().to(torch.float32)
processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
return model, processor
# Load the model and processor globally
model, processor = load_model()
# Function to run the model
def run_example(task_prompt, image, text_input=None):
if text_input is None:
prompt = task_prompt
else:
prompt = task_prompt + text_input
# Prepare inputs
inputs = processor(text=prompt, images=image, return_tensors="pt")
inputs["input_ids"] = inputs["input_ids"].to(torch.float32)
inputs["pixel_values"] = inputs["pixel_values"].to(torch.float32)
# Ensure the model is in float32 mode
# The model has already been converted to float32 during loading, so this is not needed here.
# Generate predictions
generated_ids = model.generate(
input_ids=inputs["input_ids"],
pixel_values=inputs["pixel_values"],
max_new_tokens=1024,
early_stopping=False,
do_sample=False,
num_beams=3,
)
generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
parsed_answer = processor.post_process_generation(
generated_text,
task=task_prompt,
image_size=(image.width, image.height)
)
return parsed_answer
# Streamlit UI
st.title("Microsoft Florence Image Captioning (CPU)")
# File uploader
uploaded_file = st.file_uploader("Upload an image (PNG or JPG)", type=["png", "jpg", "jpeg"])
if uploaded_file is not None:
# Convert and display the image
image = Image.open(uploaded_file).convert("RGB")
st.image(image, caption="Uploaded Image", use_column_width=True)
# Generate captions
st.subheader("Generated Captions")
with st.spinner("Generating caption..."):
try:
caption = run_example('<CAPTION>', image)
detailed_caption = run_example('<DETAILED_CAPTION>', image)
more_detailed_caption = run_example('<MORE_DETAILED_CAPTION>', image)
st.write("**Caption:**", caption)
st.write("**Detailed Caption:**", detailed_caption)
st.write("**More Detailed Caption:**", more_detailed_caption)
# Option to save the output
if st.button("Save Captions"):
output_path = "captions.txt"
with open(output_path, "w") as file:
file.write(f"Caption: {caption}\n")
file.write(f"Detailed Caption: {detailed_caption}\n")
file.write(f"More Detailed Caption: {more_detailed_caption}\n")
st.success(f"Captions saved to {output_path}!")
except Exception as e:
st.error(f"Error: {e}")