import gradio as gr
from transformers import pipeline
import torch
import numpy as np
from PIL import Image
import gradio as gr
from gradio_client import Client
import os
import spaces
import json

dpt_beit = pipeline(task = "depth-estimation", model="Intel/dpt-beit-base-384", device=0)
depth_anything = pipeline(task = "depth-estimation", model="nielsr/depth-anything-small", device=0)
dpt_large = pipeline(task = "depth-estimation", model="intel/dpt-large", device=0)

def depth_anything_inference(img):
  return depth_anything(img)["depth"]

def dpt_beit_inference(img):
  return dpt_beit(img)["depth"]


def dpt_large_inference(img):
  return dpt_large(img)["depth"]
    
@spaces.GPU
def infer(img):
  if img is None:
      return None, None, None
  else:
      return dpt_large_inference(img), dpt_beit_inference(img), depth_anything_inference(img)


css = """
  #mkd {
    height: 500px; 
    overflow: auto; 
    border: 1px solid #ccc; 
  }
"""
with gr.Blocks(css=css) as demo:
  gr.HTML("<h1><center>Compare Depth Estimation Models<center><h1>")
  gr.Markdown("In this Space, you can compare different depth estimation models: [DPT-Large](https://huggingface.co/Intel/dpt-large), [DPT with BeiT backbone](https://huggingface.co/Intel/dpt-beit-large-512) and the recent [Depth Anything Model small checkpoint](https://huggingface.co/LiheYoung/depth-anything-small-hf). 🤩")
  gr.Markdown("You can also see how they compare in terms of speed [here](https://huggingface2.notion.site/DPT-Benchmarks-1e516b0ba193460e865c47b3a5681efb?pvs=4).")
  gr.Markdown("Simply upload an image or try one of the examples to see the outputs.")

  with gr.Column():
      with gr.Row():
        input_img = gr.Image(label="Input Image", type="pil")
      with gr.Row():
          output_1 = gr.Image(type="pil", label="DPT-Large")
          output_2 = gr.Image(type="pil", label="DPT with BeiT Backbone")
          output_3 = gr.Image(type="pil", label="Depth Anything")

  gr.Examples([["bee.jpg"], ["cat.png"], ["cats.png"]],
  inputs = input_img,
  outputs = [output_1, output_2, output_3],
  fn=infer,
  cache_examples=True,
  label='Click on any Examples below to get depth estimation results quickly 👇'
  )

  input_img.change(infer, [input_img], [output_1, output_2, output_3])


demo.launch(debug=True)