import os
import gradio as gr
from transformers import DetrImageProcessor, DetrForObjectDetection
from langchain_google_genai.chat_models import ChatGoogleGenerativeAI  # Import Gemini
from PIL import Image
import torch
import json
import requests

# Load credentials (stringified JSON) from environment variable for Gemini
credentials_string = os.environ.get("GOOGLE_APPLICATION_CREDENTIALS")
if not credentials_string:
    raise ValueError("GOOGLE_APPLICATION_CREDENTIALS is not set in the environment!")

# Parse the stringified JSON back to a Python dictionary
credentials = json.loads(credentials_string)

# Save the credentials to a temporary JSON file (required by Google SDKs)
with open("service_account.json", "w") as f:
    json.dump(credentials, f)

# Set the GOOGLE_APPLICATION_CREDENTIALS environment variable to the temporary file
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "service_account.json"

# Initialize Gemini model (chatbot)
llm = ChatGoogleGenerativeAI(model='gemini-1.5-pro')

# Initialize DETR model and processor for object detection
processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50")
model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50")

# Load COCO class labels (from the official COCO dataset)
COCO_CLASSES = [
    'airplane', 'apple', 'backpack', 'banana', 'baseball hat', 'baseball glove', 'bear', 'bed', 'bench', 'bicycle', 
    'bird', 'boat', 'book', 'bottle', 'bowl', 'broccoli', 'bus', 'cake', 'car', 'carrot', 'cat', 'cell phone', 'chair', 
    'clock', 'couch', 'cow', 'cup', 'dining table', 'dog', 'donut', 'elephant', 'fire hydrant', 'fork', 'frisbee', 
    'giraffe', 'hair drier', 'handbag', 'horse', 'hot dog', 'keyboard', 'kite', 'knife', 'laptop', 'microwave', 
    'motorcycle', 'mouse', 'orange', 'oven', 'parking meter', 'person', 'pizza', 'potted plant', 'refrigerator', 
    'remote', 'sandwich', 'scissors', 'sheep', 'sink', 'skateboard', 'skis', 'snowboard', 'spoon', 'sports ball', 
    'stop sign', 'suitcase', 'surfboard', 'teddy bear', 'tennis racket', 'tie', 'toaster', 'toilet', 'toothbrush', 
    'traffic light', 'train', 'truck', 'tv', 'umbrella', 'vase', 'wine glass'
]

# Global chat history variable
chat_history = []

# Function for chatting with Gemini
def chat_with_gemini(message):
    global chat_history
    bot_response = llm.predict(message)  # This will interact with the Gemini model
    chat_history.append((message, bot_response))
    return chat_history

# Function for analyzing the uploaded image
def analyze_image(image_path):
    global chat_history
    try:
        # Open and preprocess the image
        image = Image.open(image_path).convert("RGB")
        inputs = processor(images=image, return_tensors="pt")

        # Perform inference
        with torch.no_grad():
            outputs = model(**inputs)

        # Set a target size for post-processing
        target_sizes = torch.tensor([image.size[::-1]])  # (height, width)
        results = processor.post_process_object_detection(outputs, target_sizes=target_sizes)[0]

        # Collect detected objects (with no minimum confidence filter)
        detected_objects = []
        for idx, label in enumerate(results["labels"]):
            # Get the object label based on label index
            object_name = COCO_CLASSES[label.item()]  # Assuming COCO_CLASSES is available
            score = results["scores"][idx].item()  # Confidence score for this detection

            # Store only objects with a score higher than a threshold (e.g., 0.1)
            if score > 0.1:
                detected_objects.append(f"{object_name} (score: {score:.2f})")

        if detected_objects:
            bot_response = f"Objects detected: {', '.join(detected_objects)}."
        else:
            bot_response = "No objects detected."

        chat_history.append(("Uploaded an image for analysis", bot_response))
        return chat_history
    except Exception as e:
        error_msg = f"Error processing the image: {str(e)}"
        chat_history.append(("Error during image analysis", error_msg))
        return chat_history

# Build the Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("# Ken Chatbot")
    gr.Markdown("Ask me anything or upload an image for analysis!")

    # Chatbot display without "User" or "Bot" labels
    chatbot = gr.Chatbot(elem_id="chatbot")

    # User input components
    msg = gr.Textbox(label="Type your message here...", placeholder="Enter your message...", show_label=False)
    send_btn = gr.Button("Send")
    img_upload = gr.Image(type="filepath", label="Upload an image for analysis")

    # Define interactions
    def handle_text_message(message):
        return chat_with_gemini(message)

    def handle_image_upload(image_path):
        return analyze_image(image_path)

    # Set up Gradio components with Enter key for sending
    msg.submit(handle_text_message, msg, chatbot)
    send_btn.click(handle_text_message, msg, chatbot)
    send_btn.click(lambda: "", None, msg)  # Clear input field
    img_upload.change(handle_image_upload, img_upload, chatbot)

    # Custom CSS for styling without usernames
    gr.HTML("""
    <style>
    #chatbot .message-container {
        display: flex;
        flex-direction: column;
        margin-bottom: 10px;
        max-width: 70%;
    }
    #chatbot .message {
        border-radius: 15px;
        padding: 10px;
        margin: 5px 0;
        word-wrap: break-word;
    }
    #chatbot .message.user {
        background-color: #DCF8C6;
        margin-left: auto;
        text-align: right;
    }
    #chatbot .message.bot {
        background-color: #E1E1E1;
        margin-right: auto;
        text-align: left;
    }
    </style>
    """)

# Launch the Gradio interface
demo.launch()