| import numpy as np |
| import cv2 as cv |
| import argparse |
|
|
| |
| opencv_python_version = lambda str_version: tuple(map(int, (str_version.split(".")))) |
| assert opencv_python_version(cv.__version__) >= opencv_python_version("4.10.0"), \ |
| "Please install latest opencv-python for benchmark: python3 -m pip install --upgrade opencv-python" |
|
|
| from nanodet import NanoDet |
|
|
| |
| backend_target_pairs = [ |
| [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_CPU], |
| [cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA], |
| [cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16], |
| [cv.dnn.DNN_BACKEND_TIMVX, cv.dnn.DNN_TARGET_NPU], |
| [cv.dnn.DNN_BACKEND_CANN, cv.dnn.DNN_TARGET_NPU] |
| ] |
|
|
| classes = ('person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', |
| 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', |
| 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', |
| 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', |
| 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', |
| 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', |
| 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', |
| 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', |
| 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', |
| 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', |
| 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', |
| 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', |
| 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', |
| 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush') |
|
|
| def letterbox(srcimg, target_size=(416, 416)): |
| img = srcimg.copy() |
|
|
| top, left, newh, neww = 0, 0, target_size[0], target_size[1] |
| if img.shape[0] != img.shape[1]: |
| hw_scale = img.shape[0] / img.shape[1] |
| if hw_scale > 1: |
| newh, neww = target_size[0], int(target_size[1] / hw_scale) |
| img = cv.resize(img, (neww, newh), interpolation=cv.INTER_AREA) |
| left = int((target_size[1] - neww) * 0.5) |
| img = cv.copyMakeBorder(img, 0, 0, left, target_size[1] - neww - left, cv.BORDER_CONSTANT, value=0) |
| else: |
| newh, neww = int(target_size[0] * hw_scale), target_size[1] |
| img = cv.resize(img, (neww, newh), interpolation=cv.INTER_AREA) |
| top = int((target_size[0] - newh) * 0.5) |
| img = cv.copyMakeBorder(img, top, target_size[0] - newh - top, 0, 0, cv.BORDER_CONSTANT, value=0) |
| else: |
| img = cv.resize(img, target_size, interpolation=cv.INTER_AREA) |
|
|
| letterbox_scale = [top, left, newh, neww] |
| return img, letterbox_scale |
|
|
| def unletterbox(bbox, original_image_shape, letterbox_scale): |
| ret = bbox.copy() |
|
|
| h, w = original_image_shape |
| top, left, newh, neww = letterbox_scale |
|
|
| if h == w: |
| ratio = h / newh |
| ret = ret * ratio |
| return ret |
|
|
| ratioh, ratiow = h / newh, w / neww |
| ret[0] = max((ret[0] - left) * ratiow, 0) |
| ret[1] = max((ret[1] - top) * ratioh, 0) |
| ret[2] = min((ret[2] - left) * ratiow, w) |
| ret[3] = min((ret[3] - top) * ratioh, h) |
|
|
| return ret.astype(np.int32) |
|
|
| def vis(preds, res_img, letterbox_scale, fps=None): |
| ret = res_img.copy() |
|
|
| |
| if fps is not None: |
| fps_label = "FPS: %.2f" % fps |
| cv.putText(ret, fps_label, (10, 25), cv.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2) |
|
|
| |
| for pred in preds: |
| bbox = pred[:4] |
| conf = pred[-2] |
| classid = pred[-1].astype(np.int32) |
|
|
| |
| xmin, ymin, xmax, ymax = unletterbox(bbox, ret.shape[:2], letterbox_scale) |
| cv.rectangle(ret, (xmin, ymin), (xmax, ymax), (0, 255, 0), thickness=2) |
|
|
| |
| label = "{:s}: {:.2f}".format(classes[classid], conf) |
| cv.putText(ret, label, (xmin, ymin - 10), cv.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), thickness=2) |
|
|
| return ret |
|
|
| if __name__=='__main__': |
| parser = argparse.ArgumentParser(description='Nanodet inference using OpenCV an contribution by Sri Siddarth Chakaravarthy part of GSOC_2022') |
| parser.add_argument('--input', '-i', type=str, |
| help='Path to the input image. Omit for using default camera.') |
| parser.add_argument('--model', '-m', type=str, |
| default='object_detection_nanodet_2022nov.onnx', help="Path to the model") |
| parser.add_argument('--backend_target', '-bt', type=int, default=0, |
| help='''Choose one of the backend-target pair to run this demo: |
| {:d}: (default) OpenCV implementation + CPU, |
| {:d}: CUDA + GPU (CUDA), |
| {:d}: CUDA + GPU (CUDA FP16), |
| {:d}: TIM-VX + NPU, |
| {:d}: CANN + NPU |
| '''.format(*[x for x in range(len(backend_target_pairs))])) |
| parser.add_argument('--confidence', default=0.35, type=float, |
| help='Class confidence') |
| parser.add_argument('--nms', default=0.6, type=float, |
| help='Enter nms IOU threshold') |
| parser.add_argument('--save', '-s', action='store_true', |
| help='Specify to save results. This flag is invalid when using camera.') |
| parser.add_argument('--vis', '-v', action='store_true', |
| help='Specify to open a window for result visualization. This flag is invalid when using camera.') |
| args = parser.parse_args() |
|
|
| backend_id = backend_target_pairs[args.backend_target][0] |
| target_id = backend_target_pairs[args.backend_target][1] |
|
|
| model = NanoDet(modelPath= args.model, |
| prob_threshold=args.confidence, |
| iou_threshold=args.nms, |
| backend_id=backend_id, |
| target_id=target_id) |
|
|
| tm = cv.TickMeter() |
| tm.reset() |
| if args.input is not None: |
| image = cv.imread(args.input) |
| input_blob = cv.cvtColor(image, cv.COLOR_BGR2RGB) |
|
|
| |
| input_blob, letterbox_scale = letterbox(input_blob) |
|
|
| |
| tm.start() |
| preds = model.infer(input_blob) |
| tm.stop() |
| print("Inference time: {:.2f} ms".format(tm.getTimeMilli())) |
|
|
| img = vis(preds, image, letterbox_scale) |
|
|
| if args.save: |
| print('Results saved to result.jpg\n') |
| cv.imwrite('result.jpg', img) |
|
|
| if args.vis: |
| cv.namedWindow(args.input, cv.WINDOW_AUTOSIZE) |
| cv.imshow(args.input, img) |
| cv.waitKey(0) |
|
|
| else: |
| print("Press any key to stop video capture") |
| deviceId = 0 |
| cap = cv.VideoCapture(deviceId) |
|
|
| while cv.waitKey(1) < 0: |
| hasFrame, frame = cap.read() |
| if not hasFrame: |
| print('No frames grabbed!') |
| break |
|
|
| input_blob = cv.cvtColor(frame, cv.COLOR_BGR2RGB) |
| input_blob, letterbox_scale = letterbox(input_blob) |
| |
| tm.start() |
| preds = model.infer(input_blob) |
| tm.stop() |
|
|
| img = vis(preds, frame, letterbox_scale, fps=tm.getFPS()) |
|
|
| cv.imshow("NanoDet Demo", img) |
|
|
| tm.reset() |
|
|