Spaces:
Sleeping
Sleeping
import streamlit as st | |
from transformers import AutoProcessor, AutoModelForCausalLM | |
from PIL import Image | |
import torch | |
# Load the Florence model and processor | |
def load_model(): | |
model_id = 'microsoft/Florence-2-large' | |
model = AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True).eval().to(torch.float32) | |
processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True) | |
return model, processor | |
# Load the model and processor globally | |
model, processor = load_model() | |
# Function to run the model | |
def run_example(task_prompt, image, text_input=None): | |
if text_input is None: | |
prompt = task_prompt | |
else: | |
prompt = task_prompt + text_input | |
# Prepare inputs | |
inputs = processor(text=prompt, images=image, return_tensors="pt") | |
inputs["input_ids"] = inputs["input_ids"].to(torch.float32) | |
inputs["pixel_values"] = inputs["pixel_values"].to(torch.float32) | |
# Ensure the model is in float32 mode | |
# The model has already been converted to float32 during loading, so this is not needed here. | |
# Generate predictions | |
generated_ids = model.generate( | |
input_ids=inputs["input_ids"], | |
pixel_values=inputs["pixel_values"], | |
max_new_tokens=1024, | |
early_stopping=False, | |
do_sample=False, | |
num_beams=3, | |
) | |
generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0] | |
parsed_answer = processor.post_process_generation( | |
generated_text, | |
task=task_prompt, | |
image_size=(image.width, image.height) | |
) | |
return parsed_answer | |
# Streamlit UI | |
st.title("Microsoft Florence Image Captioning (CPU)") | |
# File uploader | |
uploaded_file = st.file_uploader("Upload an image (PNG or JPG)", type=["png", "jpg", "jpeg"]) | |
if uploaded_file is not None: | |
# Convert and display the image | |
image = Image.open(uploaded_file).convert("RGB") | |
st.image(image, caption="Uploaded Image", use_column_width=True) | |
# Generate captions | |
st.subheader("Generated Captions") | |
with st.spinner("Generating caption..."): | |
try: | |
caption = run_example('<CAPTION>', image) | |
detailed_caption = run_example('<DETAILED_CAPTION>', image) | |
more_detailed_caption = run_example('<MORE_DETAILED_CAPTION>', image) | |
st.write("**Caption:**", caption) | |
st.write("**Detailed Caption:**", detailed_caption) | |
st.write("**More Detailed Caption:**", more_detailed_caption) | |
# Option to save the output | |
if st.button("Save Captions"): | |
output_path = "captions.txt" | |
with open(output_path, "w") as file: | |
file.write(f"Caption: {caption}\n") | |
file.write(f"Detailed Caption: {detailed_caption}\n") | |
file.write(f"More Detailed Caption: {more_detailed_caption}\n") | |
st.success(f"Captions saved to {output_path}!") | |
except Exception as e: | |
st.error(f"Error: {e}") | |