Spatial Detection Network

The example creates a pipeline to perform YOLOv6-Nano spatial object detection using RGB and stereo depth streams, visualizes results with bounding boxes and spatial coordinates on both colorized depth and RGB frames, and uses a custom visualization node.This example requires the DepthAI v3 API, see installation instructions.
Pipeline

React Flow
Source code

Python
C++
Python

GitHub
1#!/usr/bin/env python3
2
3import argparse
4from pathlib import Path
5import cv2
6import depthai as dai
7import numpy as np
8
9NEURAL_FPS = 8
10STEREO_DEFAULT_FPS = 30
11
12parser = argparse.ArgumentParser()
13parser.add_argument(
14    "--depthSource", type=str, default="stereo", choices=["stereo", "neural"]
15)
16args = parser.parse_args()
17
18modelDescription = dai.NNModelDescription("yolov6-nano")
19size = (640, 400)
20
21if args.depthSource == "stereo":
22    fps = STEREO_DEFAULT_FPS
23else:
24    fps = NEURAL_FPS
25
26class SpatialVisualizer(dai.node.HostNode):
27    def __init__(self):
28        dai.node.HostNode.__init__(self)
29        self.sendProcessingToPipeline(True)
30    def build(self, depth:dai.Node.Output, detections: dai.Node.Output, rgb: dai.Node.Output):
31        self.link_args(depth, detections, rgb) # Must match the inputs to the process method
32
33    def process(self, depthPreview, detections, rgbPreview):
34        depthPreview = depthPreview.getCvFrame()
35        rgbPreview = rgbPreview.getCvFrame()
36        depthFrameColor = self.processDepthFrame(depthPreview)
37        self.displayResults(rgbPreview, depthFrameColor, detections.detections)
38
39    def processDepthFrame(self, depthFrame):
40        depth_downscaled = depthFrame[::4]
41        if np.all(depth_downscaled == 0):
42            min_depth = 0
43        else:
44            min_depth = np.percentile(depth_downscaled[depth_downscaled != 0], 1)
45        max_depth = np.percentile(depth_downscaled, 99)
46        depthFrameColor = np.interp(depthFrame, (min_depth, max_depth), (0, 255)).astype(np.uint8)
47        return cv2.applyColorMap(depthFrameColor, cv2.COLORMAP_HOT)
48
49    def displayResults(self, rgbFrame, depthFrameColor, detections):
50        height, width, _ = rgbFrame.shape
51        for detection in detections:
52            self.drawBoundingBoxes(depthFrameColor, detection)
53            self.drawDetections(rgbFrame, detection, width, height)
54
55        cv2.imshow("depth", depthFrameColor)
56        cv2.imshow("rgb", rgbFrame)
57        if cv2.waitKey(1) == ord('q'):
58            self.stopPipeline()
59
60    def drawBoundingBoxes(self, depthFrameColor, detection):
61        roiData = detection.boundingBoxMapping
62        roi = roiData.roi
63        roi = roi.denormalize(depthFrameColor.shape[1], depthFrameColor.shape[0])
64        topLeft = roi.topLeft()
65        bottomRight = roi.bottomRight()
66        cv2.rectangle(depthFrameColor, (int(topLeft.x), int(topLeft.y)), (int(bottomRight.x), int(bottomRight.y)), (255, 255, 255), 1)
67
68    def drawDetections(self, frame, detection, frameWidth, frameHeight):
69        x1 = int(detection.xmin * frameWidth)
70        x2 = int(detection.xmax * frameWidth)
71        y1 = int(detection.ymin * frameHeight)
72        y2 = int(detection.ymax * frameHeight)
73        label = detection.labelName
74        color = (255, 255, 255)
75        cv2.putText(frame, str(label), (x1 + 10, y1 + 20), cv2.FONT_HERSHEY_TRIPLEX, 0.5, color)
76        cv2.putText(frame, "{:.2f}".format(detection.confidence * 100), (x1 + 10, y1 + 35), cv2.FONT_HERSHEY_TRIPLEX, 0.5, color)
77        cv2.putText(frame, f"X: {int(detection.spatialCoordinates.x)} mm", (x1 + 10, y1 + 50), cv2.FONT_HERSHEY_TRIPLEX, 0.5, color)
78        cv2.putText(frame, f"Y: {int(detection.spatialCoordinates.y)} mm", (x1 + 10, y1 + 65), cv2.FONT_HERSHEY_TRIPLEX, 0.5, color)
79        cv2.putText(frame, f"Z: {int(detection.spatialCoordinates.z)} mm", (x1 + 10, y1 + 80), cv2.FONT_HERSHEY_TRIPLEX, 0.5, color)
80        cv2.rectangle(frame, (x1, y1), (x2, y2), color, 1)
81
82# Creates the pipeline and a default device implicitly
83with dai.Pipeline() as p:
84    # Define sources and outputs
85    platform = p.getDefaultDevice().getPlatform()
86
87    camRgb = p.create(dai.node.Camera).build(dai.CameraBoardSocket.CAM_A, sensorFps=fps)
88    monoLeft = p.create(dai.node.Camera).build(dai.CameraBoardSocket.CAM_B, sensorFps=fps)
89    monoRight = p.create(dai.node.Camera).build(dai.CameraBoardSocket.CAM_C, sensorFps=fps)
90    if args.depthSource == "stereo":
91        depthSource = p.create(dai.node.StereoDepth)
92        depthSource.setExtendedDisparity(True)
93        if platform == dai.Platform.RVC2:
94            depthSource.setOutputSize(640, 400)
95        monoLeft.requestOutput(size).link(depthSource.left)
96        monoRight.requestOutput(size).link(depthSource.right)
97    elif args.depthSource == "neural":
98        depthSource = p.create(dai.node.NeuralDepth).build(
99            monoLeft.requestFullResolutionOutput(),
100            monoRight.requestFullResolutionOutput(),
101            dai.DeviceModelZoo.NEURAL_DEPTH_LARGE,
102        )
103    else:
104        raise ValueError(f"Invalid depth source: {args.depthSource}")
105
106    spatialDetectionNetwork = p.create(dai.node.SpatialDetectionNetwork).build(
107        camRgb, depthSource, modelDescription
108    )
109    visualizer = p.create(SpatialVisualizer)
110
111    spatialDetectionNetwork.input.setBlocking(False)
112    spatialDetectionNetwork.setBoundingBoxScaleFactor(0.5)
113    spatialDetectionNetwork.setDepthLowerThreshold(100)
114    spatialDetectionNetwork.setDepthUpperThreshold(5000)
115
116    visualizer.build(
117        spatialDetectionNetwork.passthroughDepth,
118        spatialDetectionNetwork.out,
119        spatialDetectionNetwork.passthrough,
120    )
121
122    print("Starting pipeline with depth source: ", args.depthSource)
123
124    p.run()
Need assistance?

Head over to Discussion Forum for technical support or any other questions you might have.
ON THIS PAGE

Spatial Detection Network

Pipeline

Source code

Python

Need assistance?