1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117
| """ Jetson Orin NX 部署示例
YOLOv8l + TensorRT FP16 """
import tensorrt as trt import pycuda.driver as cuda import numpy as np
class YOLOv8TRTInference: """ TensorRT 推理引擎 """ def __init__(self, engine_path: str): self.logger = trt.Logger(trt.Logger.WARNING) with open(engine_path, 'rb') as f: self.engine = trt.Runtime(self.logger).deserialize_cuda_engine(f.read()) self.context = self.engine.create_execution_context() self.inputs = [] self.outputs = [] self.bindings = [] for i in range(self.engine.num_io_tensors): name = self.engine.get_tensor_name(i) dtype = trt.nptype(self.engine.get_tensor_dtype(name)) shape = self.engine.get_tensor_shape(name) size = trt.volume(shape) host_mem = cuda.pagelocked_empty(size, dtype) device_mem = cuda.mem_alloc(host_mem.nbytes) self.bindings.append(int(device_mem)) if self.engine.get_tensor_mode(name) == trt.TensorIOMode.INPUT: self.inputs.append({'host': host_mem, 'device': device_mem, 'shape': shape}) else: self.outputs.append({'host': host_mem, 'device': device_mem, 'shape': shape}) def infer(self, image: np.ndarray) -> np.ndarray: """ 推理 Args: image: (H, W, 3) uint8 Returns: detections: (N, 6) [x1, y1, x2, y2, conf, class] """ input_tensor = self.preprocess(image) np.copyto(self.inputs[0]['host'], input_tensor.ravel()) cuda.memcpy_htod(self.inputs[0]['device'], self.inputs[0]['host']) self.context.execute_v2(self.bindings) cuda.memcpy_dtoh(self.outputs[0]['host'], self.outputs[0]['device']) return self.postprocess(self.outputs[0]['host']) def preprocess(self, image: np.ndarray) -> np.ndarray: """预处理""" import cv2 resized = cv2.resize(image, (640, 640)) normalized = resized.astype(np.float32) / 255.0 transposed = np.transpose(normalized, (2, 0, 1)) return np.ascontiguousarray(transposed) def postprocess(self, output: np.ndarray) -> np.ndarray: """后处理(NMS)""" return output.reshape(-1, 6)
if __name__ == "__main__": import cv2 detector = YOLOv8TRTInference("yolov8l_fp16.engine") cap = cv2.VideoCapture(0) while True: ret, frame = cap.read() if not ret: break detections = detector.infer(frame) for det in detections: x1, y1, x2, y2, conf, cls = det if conf > 0.5: cv2.rectangle(frame, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 0), 2) cv2.imshow("DMS Detection", frame) if cv2.waitKey(1) & 0xFF == ord('q'): break cap.release() cv2.destroyAllWindows()
|