import gradio as gr
from transformers import pipeline
import torch
import numpy as np
from PIL import Image
import gradio as gr
from gradio_client import Client
import os
import spaces
import json
dpt_beit = pipeline(task = "depth-estimation", model="Intel/dpt-beit-base-384", device=0)
depth_anything = pipeline(task = "depth-estimation", model="nielsr/depth-anything-small", device=0)
dpt_large = pipeline(task = "depth-estimation", model="intel/dpt-large", device=0)
def depth_anything_inference(img):
return depth_anything(img)["depth"]
def dpt_beit_inference(img):
return dpt_beit(img)["depth"]
def dpt_large_inference(img):
return dpt_large(img)["depth"]
@spaces.GPU
def infer(img):
if img is None:
return None, None, None
else:
return dpt_large_inference(img), dpt_beit_inference(img), depth_anything_inference(img)
css = """
#mkd {
height: 500px;
overflow: auto;
border: 1px solid #ccc;
}
"""
with gr.Blocks(css=css) as demo:
gr.HTML("
Compare Depth Estimation Models")
gr.Markdown("In this Space, you can compare different depth estimation models: [DPT-Large](https://huggingface.co/Intel/dpt-large), [DPT with BeiT backbone](https://huggingface.co/Intel/dpt-beit-large-512) and the recent [Depth Anything Model small checkpoint](https://huggingface.co/LiheYoung/depth-anything-small-hf). 🤩")
gr.Markdown("You can also see how they compare in terms of speed [here](https://huggingface2.notion.site/DPT-Benchmarks-1e516b0ba193460e865c47b3a5681efb?pvs=4).")
gr.Markdown("Simply upload an image or try one of the examples to see the outputs.")
with gr.Column():
with gr.Row():
input_img = gr.Image(label="Input Image", type="pil")
with gr.Row():
output_1 = gr.Image(type="pil", label="DPT-Large")
output_2 = gr.Image(type="pil", label="DPT with BeiT Backbone")
output_3 = gr.Image(type="pil", label="Depth Anything")
gr.Examples([["bee.jpg"], ["cat.png"], ["cats.png"]],
inputs = input_img,
outputs = [output_1, output_2, output_3],
fn=infer,
cache_examples=True,
label='Click on any Examples below to get depth estimation results quickly 👇'
)
input_img.change(infer, [input_img], [output_1, output_2, output_3])
demo.launch(debug=True)