Spaces:

adil9858
/

caption_generator_ai

Sleeping

App Files Files Community

caption_generator_ai / app.py

adil9858

Update app.py

3930e60 verified 2 months ago

raw

history blame contribute delete

3.04 kB

	import streamlit as st
	from transformers import AutoProcessor, AutoModelForCausalLM
	from PIL import Image
	import torch

	# Load the Florence model and processor
	@st.cache_resource
	def load_model():
	model_id = 'microsoft/Florence-2-large'
	model = AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True).eval().to(torch.float32)
	processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
	return model, processor

	# Load the model and processor globally
	model, processor = load_model()

	# Function to run the model
	def run_example(task_prompt, image, text_input=None):
	if text_input is None:
	prompt = task_prompt
	else:
	prompt = task_prompt + text_input

	# Prepare inputs
	inputs = processor(text=prompt, images=image, return_tensors="pt")
	inputs["input_ids"] = inputs["input_ids"].to(torch.float32)
	inputs["pixel_values"] = inputs["pixel_values"].to(torch.float32)

	# Ensure the model is in float32 mode
	# The model has already been converted to float32 during loading, so this is not needed here.

	# Generate predictions
	generated_ids = model.generate(
	input_ids=inputs["input_ids"],
	pixel_values=inputs["pixel_values"],
	max_new_tokens=1024,
	early_stopping=False,
	do_sample=False,
	num_beams=3,
	)
	generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
	parsed_answer = processor.post_process_generation(
	generated_text,
	task=task_prompt,
	image_size=(image.width, image.height)
	)
	return parsed_answer

	# Streamlit UI
	st.title("Microsoft Florence Image Captioning (CPU)")

	# File uploader
	uploaded_file = st.file_uploader("Upload an image (PNG or JPG)", type=["png", "jpg", "jpeg"])

	if uploaded_file is not None:
	# Convert and display the image
	image = Image.open(uploaded_file).convert("RGB")
	st.image(image, caption="Uploaded Image", use_column_width=True)

	# Generate captions
	st.subheader("Generated Captions")

	with st.spinner("Generating caption..."):
	try:
	caption = run_example('<CAPTION>', image)
	detailed_caption = run_example('<DETAILED_CAPTION>', image)
	more_detailed_caption = run_example('<MORE_DETAILED_CAPTION>', image)

	st.write("Caption:", caption)
	st.write("Detailed Caption:", detailed_caption)
	st.write("More Detailed Caption:", more_detailed_caption)

	# Option to save the output
	if st.button("Save Captions"):
	output_path = "captions.txt"
	with open(output_path, "w") as file:
	file.write(f"Caption: {caption}\n")
	file.write(f"Detailed Caption: {detailed_caption}\n")
	file.write(f"More Detailed Caption: {more_detailed_caption}\n")
	st.success(f"Captions saved to {output_path}!")
	except Exception as e:
	st.error(f"Error: {e}")