Spaces:

kendrickfff
/

ask-me-anything

Running

App Files Files Community

kendrickfff commited on Dec 3, 2024

Commit

b05e484

verified ·

1 Parent(s): 61993c7

Update app.py

Browse files

Files changed (1) hide show

app.py +50 -114

app.py CHANGED Viewed

@@ -1,35 +1,12 @@
 import os
 import gradio as gr
 from transformers import DetrImageProcessor, DetrForObjectDetection
-from langchain_google_genai.chat_models import ChatGoogleGenerativeAI  # Import Gemini
 from PIL import Image
-import torch
-import json
 import requests
-# Load credentials (stringified JSON) from environment variable for Gemini
-credentials_string = os.environ.get("GOOGLE_APPLICATION_CREDENTIALS")
-if not credentials_string:
-    raise ValueError("GOOGLE_APPLICATION_CREDENTIALS is not set in the environment!")
-# Parse the stringified JSON back to a Python dictionary
-credentials = json.loads(credentials_string)
-# Save the credentials to a temporary JSON file (required by Google SDKs)
-with open("service_account.json", "w") as f:
-    json.dump(credentials, f)
-# Set the GOOGLE_APPLICATION_CREDENTIALS environment variable to the temporary file
-os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "service_account.json"
-# Initialize Gemini model (chatbot)
-llm = ChatGoogleGenerativeAI(model='gemini-1.5-pro')
-# Initialize DETR model and processor for object detection
-processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50")
-model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50")
-# Load COCO class label
 COCO_CLASSES = [
     'airplane', 'apple', 'backpack', 'banana', 'baseball hat', 'baseball glove', 'bear', 'bed', 'bench', 'bicycle',
     'bird', 'boat', 'book', 'bottle', 'bowl', 'broccoli', 'bus', 'cake', 'car', 'carrot', 'cat', 'cell phone', 'chair',
@@ -41,105 +18,64 @@ COCO_CLASSES = [
     'traffic light', 'train', 'truck', 'tv', 'umbrella', 'vase', 'wine glass'
 ]
-# Global chat history variable
-chat_history = []
-# Function for chatting with Gemini
-def chat_with_gemini(message):
-    global chat_history
-    bot_response = llm.predict(message)  # This will interact with the Gemini model
-    chat_history.append((message, bot_response))
-    return chat_history
-# Function for analyzing the uploaded image
 def analyze_image(image_path):
-    global chat_history
     try:
-        # Open and preprocess the image
-        image = Image.open(image_path).convert("RGB")
-        inputs = processor(images=image, return_tensors="pt")
-        # Perform inference
-        with torch.no_grad():
-            outputs = model(**inputs)
-        # Set a target size for post-processing
-        target_sizes = torch.tensor([image.size[::-1]])  # (height, width)
-        results = processor.post_process_object_detection(outputs, target_sizes=target_sizes)[0]
-        # Collect detected objects
-        detected_objects = []
-        for idx, label in enumerate(results["labels"]):
-            # Get the object label based on label index
-            object_name = COCO_CLASSES[label.item()]  # Assuming COCO_CLASSES is available
-            detected_objects.append(object_name)
-        if detected_objects:
-            bot_response = f"Objects detected: {', '.join(detected_objects)}."
-        else:
-            bot_response = "No objects detected."
-        chat_history.append(("Uploaded an image for analysis", bot_response))
-        return chat_history
     except Exception as e:
-        error_msg = f"Error processing the image: {str(e)}"
-        chat_history.append(("Error during image analysis", error_msg))
-        return chat_history
-# Build the Gradio interface
-with gr.Blocks() as demo:
-    gr.Markdown("# Ken Chatbot")
-    gr.Markdown("Ask me anything or upload an image for analysis!")
-    # Chatbot display without "User" or "Bot" labels
-    chatbot = gr.Chatbot(elem_id="chatbot")
     # User input components
-    msg = gr.Textbox(label="Type your message here...", placeholder="Enter your message...", show_label=False)
-    send_btn = gr.Button("Send")
-    img_upload = gr.Image(type="filepath", label="Upload an image for analysis (Only detect 80 types of images recognized from COCO dataset. Check the list on https://blog.roboflow.com/microsoft-coco-classes/")
-    # Define interactions
-    def handle_text_message(message):
-        return chat_with_gemini(message)
-    def handle_image_upload(image_path):
-        return analyze_image(image_path)
-    # Set up Gradio components with Enter key for sending
-    msg.submit(handle_text_message, msg, chatbot)
-    send_btn.click(handle_text_message, msg, chatbot)
-    send_btn.click(lambda: "", None, msg)  # Clear input field
-    img_upload.change(handle_image_upload, img_upload, chatbot)
-    # Custom CSS for styling without usernames
-    gr.HTML("""
-    <style>
-    #chatbot .message-container {
-        display: flex;
-        flex-direction: column;
-        margin-bottom: 10px;
-        max-width: 70%;
-    }
-    #chatbot .message {
-        border-radius: 15px;
-        padding: 10px;
-        margin: 5px 0;
-        word-wrap: break-word;
-    }
-    #chatbot .message.user {
-        background-color: #DCF8C6;
-        margin-left: auto;
-        text-align: right;
-    }
-    #chatbot .message.bot {
-        background-color: #E1E1E1;
-        margin-right: auto;
-        text-align: left;
-    }
-    </style>
-    """)
-# Launch the Gradio interface
 demo.launch()

 import os
 import gradio as gr
+import torch
 from transformers import DetrImageProcessor, DetrForObjectDetection
 from PIL import Image
 import requests
+import json
+# Custom Object Labels
 COCO_CLASSES = [
     'airplane', 'apple', 'backpack', 'banana', 'baseball hat', 'baseball glove', 'bear', 'bed', 'bench', 'bicycle',
     'bird', 'boat', 'book', 'bottle', 'bowl', 'broccoli', 'bus', 'cake', 'car', 'carrot', 'cat', 'cell phone', 'chair',
     'traffic light', 'train', 'truck', 'tv', 'umbrella', 'vase', 'wine glass'
 ]
+# Load the DETR model and processor
+model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50")
+processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50")
+# Initialize Gradio interface
 def analyze_image(image_path):
     try:
+        # Open the image
+        image = Image.open(image_path)
+        # Preprocess the image
+        inputs = processor(images=image, return_tensors="pt")
+        # Perform object detection
+        outputs = model(**inputs)
+        # Get the logits (class predictions) and boxes (bounding boxes)
+        logits = outputs.logits
+        boxes = outputs.pred_boxes
+        # Get the predicted labels (class IDs)
+        class_ids = logits.argmax(-1)
+        # Filter out detections with low confidence and map to custom labels
+        results = []
+        for idx, class_id in enumerate(class_ids[0]):
+            confidence = logits[0, idx, class_id].item()
+            if confidence > 0.5:  # Confidence threshold
+                label = COCO_CLASSES[class_id]
+                box = boxes[0, idx].tolist()
+                results.append({
+                    'label': label,
+                    'confidence': confidence,
+                    'box': box
+                })
+        if len(results) == 0:
+            return "No objects detected."
+        # Generate a response with the detected objects
+        detected_objects = "\n".join([f"{result['label']} (confidence: {result['confidence']:.2f})" for result in results])
+        return f"Detected Objects:\n{detected_objects}"
     except Exception as e:
+        return f"Error processing the image: {str(e)}"
+# Gradio Interface Setup
+with gr.Blocks() as demo:
+    gr.Markdown("## Object Detection with Custom Labels")
+    gr.Markdown("Upload an image for analysis!")
     # User input components
+    img_upload = gr.Image(type="filepath", label="Upload an image for analysis")
+    output_text = gr.Textbox(label="Detection Results", interactive=False)
+    # Define the interaction
+    img_upload.change(analyze_image, img_upload, output_text)
+# Launch the interface
 demo.launch()