Spaces:

Pavankunchala
/

Depth-Estimation-App

Runtime error

App Files Files Community

Depth-Estimation-App / app.py

Pavankunchala

Update app.py

71a0caa over 3 years ago

raw

history blame contribute delete

5.81 kB

	import sys
	import time
	from pathlib import Path
	import cv2
	from openvino.inference_engine import IECore
	import matplotlib.cm
	import matplotlib.pyplot as plt
	import numpy as np
	import streamlit as st
	from PIL import Image
	import tempfile
	DEMO_IMAGE = 'dog-new.jpg'
	DEMO_VIDEO = 'dance2.mp4'
	@st.cache
	def normalize_minmax(data):

	return (data - data.min()) / (data.max() - data.min())
	@st.cache
	def convert_result_to_image(result, colormap="inferno"):

	cmap = matplotlib.cm.get_cmap(colormap)
	result = result.squeeze(0)
	result = normalize_minmax(result)
	result = cmap(result)[:, :, :3] * 255
	result = result.astype(np.uint8)
	return result
	@st.cache
	def to_rgb(image_data) -> np.ndarray:

	return cv2.cvtColor(image_data, cv2.COLOR_BGR2RGB)
	st.title("Depth Estimation App")
	st.sidebar.title('Depth Estimation')
	st.sidebar.subheader('Parameters')
	DEVICE = "CPU"
	MODEL_FILE = "models/MiDaS_small.xml"
	model_xml_path = Path(MODEL_FILE)
	ie = IECore()
	net = ie.read_network(model=model_xml_path, weights=model_xml_path.with_suffix(".bin"))
	exec_net = ie.load_network(network=net, device_name=DEVICE)
	input_key = list(exec_net.input_info)[0]
	output_key = list(exec_net.outputs.keys())[0]
	network_input_shape = exec_net.input_info[input_key].tensor_desc.dims
	network_image_height, network_image_width = network_input_shape[2:]
	app_mode = st.sidebar.selectbox('Choose the App mode',
	['Run on Image','Run on Video'],index = 0)
	if app_mode == "Run on Image":
	st.markdown('Running on Image')
	st.sidebar.text('Params for Image')
	st.markdown(
	"""
	<style>
	[data-testid="stSidebar"][aria-expanded="true"] > div:first-child {
	width: 400px;
	}
	[data-testid="stSidebar"][aria-expanded="false"] > div:first-child {
	width: 400px;
	margin-left: -400px;
	}
	</style>
	""",
	unsafe_allow_html=True,
	)
	img_file_buffer = st.sidebar.file_uploader("Upload an image", type=[ "jpg", "jpeg",'png'])
	if img_file_buffer is not None:
	image = np.array(Image.open(img_file_buffer))
	else:
	demo_image = DEMO_IMAGE
	image = np.array(Image.open(demo_image))
	st.sidebar.text('Original Image')
	st.sidebar.image(image)
	resized_image = cv2.resize(src=image, dsize=(network_image_height, network_image_width))
	# reshape image to network input shape NCHW
	input_image = np.expand_dims(np.transpose(resized_image, (2, 0, 1)), 0)
	result = exec_net.infer(inputs={input_key: input_image})[output_key]
	# convert network result of disparity map to an image that shows
	# distance as colors
	result_image = convert_result_to_image(result=result)
	# resize back to original image shape. cv2.resize expects shape
	# in (width, height), [::-1] reverses the (height, width) shape to match this.
	result_image = cv2.resize(result_image, image.shape[:2][::-1])
	st.subheader('Output Image')
	st.image(result_image,use_column_width= True)
	if app_mode =='Run on Video':
	st.markdown('Running on Video')

	video_file_buffer = st.sidebar.file_uploader("Upload a video", type=[ "mp4", "mov",'avi','asf', 'm4v' ])
	tfflie = tempfile.NamedTemporaryFile(delete=False)
	stop_button = st.sidebar.button('Stop Processing')
	if stop_button:
	st.stop()
	if not video_file_buffer:

	vid = cv2.VideoCapture(DEMO_VIDEO)
	tfflie.name = DEMO_VIDEO


	else:
	tfflie.write(video_file_buffer.read())
	vid = cv2.VideoCapture(tfflie.name)

	width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
	height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
	fps = int(vid.get(cv2.CAP_PROP_FPS))#codec = cv2.VideoWriter_fourcc(*FLAGS.output_format)
	codec = cv2.VideoWriter_fourcc('X','V','I','D')
	out = cv2.VideoWriter('output_depth.mp4', codec, fps, (width, height))
	start_time = time.perf_counter()
	total_inference_duration = 0
	stframe = st.empty()
	SCALE_OUTPUT = 1
	st.markdown("Frame Rate")
	kpi1_text = st.markdown("0")
	save_video = st.checkbox('Save video')
	while vid.isOpened():
	ret, image = vid.read()
	new_time = time.time()
	input_video_frame_height, input_video_frame_width = image.shape[:2]
	target_frame_height = int(input_video_frame_height * SCALE_OUTPUT)
	target_frame_width = int(input_video_frame_width * SCALE_OUTPUT)
	if not ret:
	vid.release()
	break
	resized_image = cv2.resize(src=image, dsize=(network_image_height, network_image_width))
	# reshape image to network input shape NCHW
	input_image = np.expand_dims(np.transpose(resized_image, (2, 0, 1)), 0)
	inference_start_time = time.perf_counter()
	result = exec_net.infer(inputs={input_key: input_image})[output_key]
	inference_stop_time = time.perf_counter()
	inference_duration = inference_stop_time - inference_start_time
	total_inference_duration += inference_duration
	result_frame = to_rgb(convert_result_to_image(result))
	# Resize image and result to target frame shape
	result_frame = cv2.resize(result_frame, (target_frame_width, target_frame_height))
	image = cv2.resize(image, (target_frame_width, target_frame_height))
	# Put image and result side by side
	stacked_frame = np.hstack((image, result_frame))
	if save_video:
	out.write(stacked_frame)
	stframe.image(stacked_frame,channels = 'BGR',use_column_width=True)
	fps = 1.0/(time.time() - new_time)
	kpi1_text.write(f"<h1 style='text-align: center; color: red;'>{'{:.1f}'.format(fps)}</h1>", unsafe_allow_html=True)

	vid.release()
	out.release()
	cv2.destroyAllWindows()
	st.success('Video is Processed')
	st.stop()