import gradio as gr import torch from transformers import CLIPProcessor, CLIPModel, BlipProcessor, BlipForConditionalGeneration from PIL import Image, ImageChops, ImageFilter import numpy as np import matplotlib.pyplot as plt from openai import OpenAI # 初始化模型 clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32") clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32") blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base") blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base") # 图像处理函数 def compute_difference_images(img_a, img_b): # 线稿提取 def extract_sketch(image): grayscale = image.convert("L") inverted = ImageChops.invert(grayscale) sketch = ImageChops.screen(grayscale, inverted) return sketch # 法向量图像(模拟法向量处理为简单的边缘增强) def compute_normal_map(image): edges = image.filter(ImageFilter.FIND_EDGES) return edges # 图像混合差异 diff_overlay = ImageChops.difference(img_a, img_b) return { "original_a": img_a, "original_b": img_b, "sketch_a": extract_sketch(img_a), "sketch_b": extract_sketch(img_b), "normal_a": compute_normal_map(img_a), "normal_b": compute_normal_map(img_b), "diff_overlay": diff_overlay } # 保存图像到文件 def save_images(images): paths = [] for key, img in images.items(): path = f"{key}.png" img.save(path) paths.append((path, key.replace("_", " ").capitalize())) return paths # BLIP生成更详尽描述 def generate_detailed_caption(image): inputs = blip_processor(image, return_tensors="pt") caption = blip_model.generate(**inputs, max_length=128, num_beams=5, no_repeat_ngram_size=2) return blip_processor.decode(caption[0], skip_special_tokens=True) # 特征差异可视化 def plot_feature_differences(latent_diff): diff_magnitude = [abs(x) for x in latent_diff[0]] indices = range(len(diff_magnitude)) # 柱状图 plt.figure(figsize=(8, 4)) plt.bar(indices, diff_magnitude, alpha=0.7) plt.xlabel("Feature Index") plt.ylabel("Magnitude of Difference") plt.title("Feature Differences (Bar Chart)") bar_chart_path = "bar_chart.png" plt.savefig(bar_chart_path) plt.close() # 饼图 plt.figure(figsize=(6, 6)) plt.pie(diff_magnitude[:10], labels=range(10), autopct="%1.1f%%", startangle=140) plt.title("Top 10 Feature Differences (Pie Chart)") pie_chart_path = "pie_chart.png" plt.savefig(pie_chart_path) plt.close() return bar_chart_path, pie_chart_path # 分析函数 def analyze_images(image_a, image_b, api_key): # 调用 OpenAI 客户端 client = OpenAI(api_key=api_key, base_url="https://api.deepseek.com") # 图像差异处理 img_a = image_a.convert("RGB") img_b = image_b.convert("RGB") images_diff = compute_difference_images(img_a, img_b) saved_images = save_images(images_diff) # BLIP生成描述 caption_a = generate_detailed_caption(img_a) caption_b = generate_detailed_caption(img_b) # CLIP特征提取 def extract_features(image): inputs = clip_processor(images=image, return_tensors="pt") features = clip_model.get_image_features(**inputs) return features.detach().numpy() features_a = extract_features(img_a) features_b = extract_features(img_b) latent_diff = np.abs(features_a - features_b).tolist() # 调用 GPT 获取更详细描述 gpt_response = client.chat.completions.create( model="deepseek-chat", messages=[ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": f"图片A的描述为:{caption_a}。图片B的描述为:{caption_b}。\n请对两张图片的内容和潜在特征区别进行详细分析,并输出一个简洁但富有条理的总结。"} ], stream=False ) text_analysis = gpt_response.choices[0].message.content.strip() # 可视化特征差异 bar_chart_path, pie_chart_path = plot_feature_differences(latent_diff) return { "caption_a": caption_a, "caption_b": caption_b, "text_analysis": text_analysis, "saved_images": saved_images, "bar_chart": bar_chart_path, "pie_chart": pie_chart_path } # Gradio界面 with gr.Blocks() as demo: gr.Markdown("# 图像对比分析工具") api_key_input = gr.Textbox(label="API Key", placeholder="输入您的 DeepSeek API Key", type="password") with gr.Row(): with gr.Column(): image_a = gr.Image(label="图片A", type="pil") with gr.Column(): image_b = gr.Image(label="图片B", type="pil") analyze_button = gr.Button("分析图片") with gr.Row(): gr.Markdown("## 图像差异") result_diff = gr.Gallery(label="混合差异图像") with gr.Row(): result_caption_a = gr.Textbox(label="图片A描述", interactive=False) result_caption_b = gr.Textbox(label="图片B描述", interactive=False) with gr.Row(): gr.Markdown("## 差异分析") result_text_analysis = gr.Textbox(label="详细分析", interactive=False, lines=5) result_bar_chart = gr.Image(label="特征差异柱状图") result_pie_chart = gr.Image(label="特征差异饼图") # 分析逻辑 def process_analysis(img_a, img_b, api_key): results = analyze_images(img_a, img_b, api_key) return ( results["saved_images"], results["caption_a"], results["caption_b"], results["text_analysis"], results["bar_chart"], results["pie_chart"] ) analyze_button.click( fn=process_analysis, inputs=[image_a, image_b, api_key_input], outputs=[ result_diff, result_caption_a, result_caption_b, result_text_analysis, result_bar_chart, result_pie_chart ] ) demo.launch()