import gradio as gr from transformers import VisionEncoderDecoderModel, AutoTokenizer from PIL import Image import io # Load model model = LLaVAForVisionTextGeneration.from_pretrained("liuhaotian/LLaVA-1.5-7b") tokenizer = LLaVATokenizer.from_pretrained("liuhaotian/LLaVA-1.5-7b") # Function to analyze the image def analyze_image(image_blob): image = Image.open(io.BytesIO(image_blob)) inputs = tokenizer("Analyze the emotions in this image", return_tensors="pt") outputs = model.generate(**inputs, images=image) return tokenizer.decode(outputs[0]) # Set up the Gradio interface iface = gr.Interface(fn=analyze_image, inputs="file", outputs="text") iface.launch()