Spaces:

Myogyi
/

detect

Sleeping

detect / detect.py

b36970b 2 months ago

9.26 kB

	import argparse
	import time
	from pathlib import Path
	from PIL import Image
	import numpy as np
	import cv2
	import torch
	import torch.backends.cudnn as cudnn
	from numpy import random
	from super_image import EdsrModel, ImageLoader
	from models.experimental import attempt_load
	from utils.datasets import LoadStreams, LoadImages
	from utils.general import check_img_size, check_requirements, check_imshow, non_max_suppression, apply_classifier, \
	scale_coords, xyxy2xywh, strip_optimizer, set_logging, increment_path
	from utils.plots import plot_one_box
	from utils.torch_utils import select_device, load_classifier, time_synchronized, TracedModel

	def detect(save_img=False):
	source, weights, view_img, save_txt, imgsz, trace = opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size, not opt.no_trace
	save_img = not opt.nosave and not source.endswith('.txt') # save inference images
	webcam = source.isnumeric() or source.endswith('.txt') or source.lower().startswith(
	('rtsp://', 'rtmp://', 'http://', 'https://'))

	# Directories
	save_dir = Path(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok)) # increment run
	(save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir

	# Initialize
	set_logging()
	device = select_device(opt.device)
	half = device.type != 'cpu' # half precision only supported on CUDA

	# Load YOLOv7 model
	model = attempt_load(weights, map_location=device) # load FP32 model
	stride = int(model.stride.max()) # model stride
	imgsz = check_img_size(imgsz, s=stride) # check img_size

	if trace:
	model = TracedModel(model, device, opt.img_size)

	if half:
	model.half() # to FP16

	# Set Dataloader
	vid_path, vid_writer = None, None
	if webcam:
	view_img = check_imshow()
	cudnn.benchmark = True # set True to speed up constant image size inference
	dataset = LoadStreams(source, img_size=imgsz, stride=stride)
	else:
	dataset = LoadImages(source, img_size=imgsz, stride=stride)

	# Get names and colors
	names = model.module.names if hasattr(model, 'module') else model.names
	colors = [[random.randint(0, 255) for _ in range(3)] for _ in names]

	# Run inference
	if device.type != 'cpu':
	model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.parameters()))) # run once
	old_img_w = old_img_h = imgsz
	old_img_b = 1

	t0 = time.time()
	for path, img, im0s, vid_cap in dataset:
	img = torch.from_numpy(img).to(device)
	img = img.half() if half else img.float() # uint8 to fp16/32
	img /= 255.0 # 0 - 255 to 0.0 - 1.0
	if img.ndimension() == 3:
	img = img.unsqueeze(0)

	# Warmup
	if device.type != 'cpu' and (old_img_b != img.shape[0] or old_img_h != img.shape[2] or old_img_w != img.shape[3]):
	old_img_b = img.shape[0]
	old_img_h = img.shape[2]
	old_img_w = img.shape[3]
	for i in range(3):
	model(img, augment=opt.augment)[0]

	# Inference
	t1 = time_synchronized()
	with torch.no_grad(): # Calculating gradients would cause a GPU memory leak
	pred = model(img, augment=opt.augment)[0]
	t2 = time_synchronized()

	# Apply NMS
	pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms)
	t3 = time_synchronized()

	# Process detections
	for i, det in enumerate(pred): # detections per image
	if webcam: # batch_size >= 1
	p, s, im0, frame = path[i], '%g: ' % i, im0s[i].copy(), dataset.count
	else:
	p, s, im0, frame = path, '', im0s, getattr(dataset, 'frame', 0)

	p = Path(p) # to Path
	save_path = str(save_dir / p.name) # img.jpg
	txt_path = str(save_dir / 'labels' / p.stem) + ('' if dataset.mode == 'image' else f'_{frame}') # img.txt
	gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh

	if len(det):
	# Rescale boxes from img_size to im0 size
	det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()

	# Find box with maximum confidence score
	max_conf_idx = det[:, 4].argmax().item()
	xyxy_max_conf = det[max_conf_idx][:4] # coordinates of max confidence bbox

	# Crop the image using max confidence bbox
	x1, y1, x2, y2 = map(int, xyxy_max_conf)
	cropped_img = im0[y1:y2, x1:x2]

	# Convert the cropped image from BGR to RGB format (OpenCV uses BGR by default)
	cropped_img_rgb = cv2.cvtColor(cropped_img, cv2.COLOR_BGR2RGB)

	# Convert the NumPy array (H, W, C) to a PyTorch tensor (C, H, W) and normalize the pixel values
	cropped_img_tensor = torch.from_numpy(cropped_img_rgb).float().permute(2, 0, 1) / 255.0

	# Add batch dimension since the model expects batches of images
	inputs = cropped_img_tensor.unsqueeze(0)

	# Load EDSR model with scale 2
	edsr_model = EdsrModel.from_pretrained('eugenesiow/edsr-base', scale=4)

	# Perform super-resolution on the cropped image
	preds = edsr_model(inputs)

	# Convert the result back to a NumPy array and save it
	upscaled_img = preds.squeeze(0).cpu().detach().numpy().transpose(1, 2, 0) # (C, H, W) -> (H, W, C)

	# Since the output of the model is normalized, we rescale the values back to 0-255
	upscaled_img = np.clip(upscaled_img * 255.0, 0, 255).astype(np.uint8)

	# Convert the image back to BGR for saving (since OpenCV saves in BGR format)
	upscaled_img_bgr = cv2.cvtColor(upscaled_img, cv2.COLOR_RGB2BGR)

	# Save the final upscaled image
	# Save the upscaled image
	upscaled_img_save_path = save_dir / f"{p.stem}_upscaled.jpg"
	cv2.imwrite(str(upscaled_img_save_path), upscaled_img_bgr)


	# Save cropped image#

	cropped_img_save_path = save_dir / f"{p.stem}_cropped.jpg"
	cv2.imwrite(str(cropped_img_save_path), cropped_img)

	# Save upscaled image
	# upscaled_img_save_path = save_dir / f"{p.stem}_upscaled.jpg"
	# cv2.imwrite(str(upscaled_img_save_path), upscaled_img)

	# Display both the cropped and upscaled images
	if view_img:
	cv2.imshow("Cropped Image", cropped_img) # Show cropped image
	cv2.imshow("Upscaled Image", upscaled_img) # Show upscaled image
	cv2.waitKey(1)

	# Print time (inference + NMS)
	print(f'{s}Done. ({(1E3 * (t2 - t1)):.1f}ms) Inference, ({(1E3 * (t3 - t2)):.1f}ms) NMS')

	print(f'Done. ({time.time() - t0:.3f}s)')


	if __name__ == '__main__':
	parser = argparse.ArgumentParser()
	parser.add_argument('--weights', nargs='+', type=str, default='yolov7.pt', help='model.pt path(s)')
	parser.add_argument('--source', type=str, default='inference/images', help='source') # file/folder, 0 for webcam
	parser.add_argument('--img-size', type=int, default=640, help='inference size (pixels)')
	parser.add_argument('--conf-thres', type=float, default=0.25, help='object confidence threshold')
	parser.add_argument('--iou-thres', type=float, default=0.45, help='IOU threshold for NMS')
	parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
	parser.add_argument('--view-img', action='store_true', help='display results')
	parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')
	parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels')
	parser.add_argument('--nosave', action='store_true', help='do not save images/videos')
	parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --class 0, or --class 0 2 3')
	parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS')
	parser.add_argument('--augment', action='store_true', help='augmented inference')
	parser.add_argument('--update', action='store_true', help='update all models')
	parser.add_argument('--project', default='runs/detect', help='save results to project/name')
	parser.add_argument('--name', default='exp', help='save results to project/name')
	parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
	parser.add_argument('--no-trace', action='store_true', help='don`t trace model')
	opt = parser.parse_args()
	print(opt)
	#check_requirements(exclude=('pycocotools', 'thop'))

	with torch.no_grad():
	if opt.update: # update all models (to fix SourceChangeWarning)
	for opt.weights in ['yolov7.pt']:
	detect()
	strip_optimizer(opt.weights)
	else:
	detect()