IR 摄像头 DMS 疲劳分心检测论文解读与代码复现

IR 摄像头 DMS 疲劳分心检测论文解读与代码复现

发布时间: 2026-06-14
标签: 论文解读, DMS, IR 摄像头, 疲劳检测, 分心检测, YOLOv7
来源: PMC (Scientific Reports, 2023), CANLAB


论文信息

  • 标题: Real-time driver monitoring system with facial landmark-based eye closure detection and head pose recognition
  • 作者: D. Jung et al., CANLAB
  • 期刊: Scientific Reports (Nature), 2023
  • 链接: https://pmc.ncbi.nlm.nih.gov/articles/PMC10600215/

核心创新

本文提出一种基于 IR 摄像头 的实时驾驶员监控系统(DMS),核心创新:

  1. IR 摄像头优势: 不受光照变化影响,适合隧道、夜间等场景
  2. 轻量化设计: 仅使用 CPU 进行眼动检测和头部姿态估计
  3. 实时性能: 嵌入式平台(Xavier)达到 10 FPS

系统架构

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
┌─────────────────────────────────────────────────────────┐
IR 摄像头 DMS 架构 │
├─────────────────────────────────────────────────────────┤
│ │
│ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐│
│ │ IR 摄像头 │───▶│ YOLOv7 │───▶│ 面部关键点 ││
│ │ 1280×800 │ │ 人脸检测 │ │ 提取 ││
│ └─────────────┘ └─────────────┘ └─────────────┘│
│ │ │
│ ┌───────────────────────┴────┐ │
│ │ │ │
│ ┌──────▼──────┐ ┌──────▼───┐│
│ │ 头部姿态估计 │ │ 眼睑检测 ││
│ │ solvePnP │ │ 自适应阈值││
│ └──────┬──────┘ └──────┬───┘│
│ │ │ │
│ ┌──────▼──────┐ ┌──────▼───┐│
│ │ 分心检测 │ │ 疲劳检测 ││
│ │ >50帧不看前方│ │ >50帧闭眼││
│ └─────────────┘ └──────────┘│
│ │
└─────────────────────────────────────────────────────────┘

方法详解

1. IR 摄像头硬件规格

参数 规格
分辨率 1280 × 800
帧率 30 FPS
动态范围 63.9 dB
信噪比 MAX 39 dB
工作温度 -40°C ~ +85°C
传输接口 GMSL2

IR 摄像头优势:

  • 不受环境光影响
  • 隧道、夜间表现一致
  • 无隐私问题(非 RGB 图像)

2. 人脸检测:YOLOv7

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import torch
import cv2
import numpy as np

class YOLOv7FaceDetector:
"""
基于 YOLOv7 的人脸检测器

针对 IR 图像优化的轻量化模型
"""

def __init__(self, model_path: str, conf_threshold: float = 0.5):
"""
Args:
model_path: YOLOv7 模型路径
conf_threshold: 置信度阈值
"""
self.model = torch.hub.load('WongKinYiu/yolov7', 'custom',
model_path, trust_repo=True)
self.conf_threshold = conf_threshold

def detect(self, image: np.ndarray) -> list:
"""
检测人脸

Args:
image: IR 图像 (H, W) 或 (H, W, 3)

Returns:
人脸边界框列表 [(x1, y1, x2, y2, conf), ...]
"""
# 预处理
if len(image.shape) == 2:
# 灰度图转 RGB
image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)

# 推理
results = self.model(image)

# 解析结果
faces = []
for det in results.xyxy[0]:
x1, y1, x2, y2, conf, cls = det.cpu().numpy()
if conf > self.conf_threshold:
faces.append((int(x1), int(y1), int(x2), int(y2), conf))

return faces

def detect_with_landmarks(self, image: np.ndarray) -> tuple:
"""
检测人脸并提取边界框

Returns:
(faces, face_images)
"""
faces = self.detect(image)
face_images = []

for (x1, y1, x2, y2, conf) in faces:
face_img = image[y1:y2, x1:x2]
face_images.append(face_img)

return faces, face_images


# 使用示例
if __name__ == "__main__":
detector = YOLOv7FaceDetector('yolov7-face.pt', conf_threshold=0.7)

# 加载 IR 图像
ir_image = cv2.imread('ir_driver.jpg', cv2.IMREAD_GRAYSCALE)

# 检测人脸
faces, face_images = detector.detect_with_landmarks(ir_image)

print(f"检测到 {len(faces)} 张人脸")
for i, (x1, y1, x2, y2, conf) in enumerate(faces):
print(f" 人脸 {i+1}: ({x1}, {y1}) - ({x2}, {y2}), 置信度: {conf:.2f}")

3. 面部关键点提取

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import dlib
import numpy as np
from typing import List, Tuple

class FacialLandmarkExtractor:
"""
面部关键点提取器

使用 dlib 的随机森林方法(快速、轻量)
"""

def __init__(self, predictor_path: str = 'shape_predictor_68_face_landmarks.dat'):
"""
Args:
predictor_path: dlib 预训练模型路径
"""
self.predictor = dlib.shape_predictor(predictor_path)

# 关键点索引
self.LEFT_EYE_INDICES = list(range(36, 42))
self.RIGHT_EYE_INDICES = list(range(42, 48))
self.NOSE_INDICES = list(range(27, 36))
self.MOUTH_INDICES = list(range(48, 68))

def extract(self, image: np.ndarray, face_bbox: tuple) -> np.ndarray:
"""
提取面部关键点

Args:
image: IR 图像
face_bbox: (x1, y1, x2, y2) 人脸边界框

Returns:
landmarks: (68, 2) 关键点坐标
"""
x1, y1, x2, y2 = face_bbox

# dlib rectangle
rect = dlib.rectangle(x1, y1, x2, y2)

# 提取关键点
shape = self.predictor(image, rect)

# 转换为 numpy 数组
landmarks = np.zeros((68, 2), dtype=np.int32)
for i in range(68):
landmarks[i] = (shape.part(i).x, shape.part(i).y)

return landmarks

def get_eye_regions(self, landmarks: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
"""
获取左右眼区域

Returns:
(left_eye, right_eye): 眼睛关键点
"""
left_eye = landmarks[self.LEFT_EYE_INDICES]
right_eye = landmarks[self.RIGHT_EYE_INDICES]

return left_eye, right_eye

def get_eye_centers(self, landmarks: np.ndarray) -> Tuple[Tuple[int, int], Tuple[int, int]]:
"""
获取左右眼中心

Returns:
((left_x, left_y), (right_x, right_y))
"""
left_eye, right_eye = self.get_eye_regions(landmarks)

left_center = left_eye.mean(axis=0).astype(int)
right_center = right_eye.mean(axis=0).astype(int)

return tuple(left_center), tuple(right_center)

4. 头部姿态估计(solvePnP)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
import cv2
import numpy as np
from typing import Tuple

class HeadPoseEstimator:
"""
头部姿态估计器

基于 solvePnP 算法
"""

def __init__(self, camera_matrix: np.ndarray = None, dist_coeffs: np.ndarray = None):
"""
Args:
camera_matrix: 相机内参矩阵 (3, 3)
dist_coeffs: 畸变系数
"""
# 默认相机参数(需根据实际摄像头标定)
if camera_matrix is None:
self.camera_matrix = np.array([
[800, 0, 640],
[0, 800, 400],
[0, 0, 1]
], dtype=np.float64)
else:
self.camera_matrix = camera_matrix

self.dist_coeffs = dist_coeffs if dist_coeffs is not None else np.zeros((4, 1))

# 3D 模型点(标准人脸模型)
self.model_points = np.array([
(0.0, 0.0, 0.0), # 鼻尖
(0.0, -330.0, -65.0), # 下巴
(-225.0, 170.0, -135.0), # 左眼外角
(225.0, 170.0, -135.0), # 右眼外角
(-150.0, -150.0, -125.0), # 左嘴角
(150.0, -150.0, -125.0) # 右嘴角
], dtype=np.float64)

# 对应的 2D 关键点索引
self.landmark_indices = [30, 8, 36, 45, 48, 54]

def estimate(self, landmarks: np.ndarray) -> Tuple[np.ndarray, np.ndarray, dict]:
"""
估计头部姿态

Args:
landmarks: (68, 2) 面部关键点

Returns:
(rotation_vector, translation_vector, euler_angles)
"""
# 提取 2D 图像点
image_points = landmarks[self.landmark_indices].astype(np.float64)

# solvePnP
success, rotation_vector, translation_vector = cv2.solvePnP(
self.model_points,
image_points,
self.camera_matrix,
self.dist_coeffs,
flags=cv2.SOLVEPNP_ITERATIVE
)

if not success:
return None, None, None

# 转换为欧拉角
rotation_matrix, _ = cv2.Rodrigues(rotation_vector)

# 计算欧拉角
sy = np.sqrt(rotation_matrix[0, 0]**2 + rotation_matrix[1, 0]**2)

if sy < 1e-6:
pitch = np.arctan2(-rotation_matrix[1, 2], rotation_matrix[1, 1])
yaw = np.arctan2(-rotation_matrix[2, 0], sy)
roll = 0
else:
pitch = np.arctan2(rotation_matrix[2, 1], rotation_matrix[2, 2])
yaw = np.arctan2(-rotation_matrix[2, 0], sy)
roll = np.arctan2(rotation_matrix[1, 0], rotation_matrix[0, 0])

euler_angles = {
'pitch': np.degrees(pitch), # 俯仰角(点头)
'yaw': np.degrees(yaw), # 偏航角(左右转头)
'roll': np.degrees(roll) # 翻滚角(歪头)
}

return rotation_vector, translation_vector, euler_angles

def is_looking_forward(self, euler_angles: dict,
yaw_threshold: float = 30.0,
pitch_threshold: float = 20.0) -> bool:
"""
判断是否看向前方

Args:
euler_angles: 欧拉角字典
yaw_threshold: 偏航角阈值(度)
pitch_threshold: 俯仰角阈值(度)

Returns:
是否看向前方
"""
if euler_angles is None:
return False

yaw_ok = abs(euler_angles['yaw']) < yaw_threshold
pitch_ok = abs(euler_angles['pitch']) < pitch_threshold

return yaw_ok and pitch_ok


# 使用示例
if __name__ == "__main__":
estimator = HeadPoseEstimator()

# 模拟关键点
landmarks = np.random.randint(200, 400, (68, 2))

# 估计头部姿态
rot_vec, trans_vec, euler = estimator.estimate(landmarks)

if euler:
print(f"Pitch (俯仰): {euler['pitch']:.1f}°")
print(f"Yaw (偏航): {euler['yaw']:.1f}°")
print(f"Roll (翻滚): {euler['roll']:.1f}°")

is_forward = estimator.is_looking_forward(euler)
print(f"看向前方: {is_forward}")

5. 眼睑闭合检测(自适应阈值)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
import numpy as np
import cv2
from typing import Tuple

class EyeClosureDetector:
"""
眼睑闭合检测器

基于自适应阈值的眼睑闭合检测
"""

def __init__(self,
closure_threshold: int = 40,
frame_threshold: int = 50):
"""
Args:
closure_threshold: 闭眼像素阈值
frame_threshold: 持续帧数阈值
"""
self.closure_threshold = closure_threshold
self.frame_threshold = frame_threshold

self.closed_frame_count = 0

def extract_eye_region(self, image: np.ndarray,
eye_landmarks: np.ndarray,
padding: int = 5) -> np.ndarray:
"""
提取眼睛区域

Args:
image: IR 图像
eye_landmarks: 眼睛关键点 (6, 2)
padding: 边界填充

Returns:
眼睛区域图像
"""
x_min = max(0, eye_landmarks[:, 0].min() - padding)
x_max = min(image.shape[1], eye_landmarks[:, 0].max() + padding)
y_min = max(0, eye_landmarks[:, 1].min() - padding)
y_max = min(image.shape[0], eye_landmarks[:, 1].max() + padding)

return image[y_min:y_max, x_min:x_max]

def detect_closure(self, eye_image: np.ndarray) -> Tuple[bool, int]:
"""
检测眼睑是否闭合

方法:
1. 计算自适应阈值 T = (min + max) / 2
2. 二值化:>T 为白色,≤T 为黑色
3. 应用闭眼检测滤波器
4. 统计满足条件的像素数

Args:
eye_image: 眼睛区域图像

Returns:
(是否闭眼, 像素计数)
"""
# 计算自适应阈值
min_val = eye_image.min()
max_val = eye_image.max()
threshold = (min_val + max_val) // 2

# 二值化
_, binary = cv2.threshold(eye_image, threshold, 255, cv2.THRESH_BINARY)

# 闭眼检测滤波器
# 眼睛闭合时,眼睑区域填充均匀
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
closed = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel)

# 统计满足条件的像素
# 闭眼时,中间区域应填充
h, w = closed.shape
center_region = closed[h//3:2*h//3, w//4:3*w//4]
white_pixels = np.sum(center_region == 255)

is_closed = white_pixels > self.closure_threshold

return is_closed, white_pixels

def detect_drowsiness(self, left_eye_image: np.ndarray,
right_eye_image: np.ndarray) -> Tuple[bool, str]:
"""
检测疲劳

双眼都闭合且持续超过 frame_threshold 帧

Returns:
(是否疲劳, 状态描述)
"""
left_closed, _ = self.detect_closure(left_eye_image)
right_closed, _ = self.detect_closure(right_eye_image)

if left_closed and right_closed:
self.closed_frame_count += 1

if self.closed_frame_count >= self.frame_threshold:
return True, f"疲劳检测:闭眼 {self.closed_frame_count} 帧"
else:
return False, f"监控中:闭眼 {self.closed_frame_count} 帧"
else:
self.closed_frame_count = 0
return False, "正常"


# 完整 DMS 系统集成
class IRDriverMonitoringSystem:
"""
完整的 IR 摄像头驾驶员监控系统
"""

def __init__(self,
face_model_path: str,
landmark_model_path: str):
"""
Args:
face_model_path: YOLOv7 人脸检测模型路径
landmark_model_path: dlib 关键点模型路径
"""
self.face_detector = YOLOv7FaceDetector(face_model_path)
self.landmark_extractor = FacialLandmarkExtractor(landmark_model_path)
self.head_pose_estimator = HeadPoseEstimator()
self.eye_closure_detector = EyeClosureDetector()

# 分心检测
self.distraction_frame_count = 0
self.distraction_threshold = 50 # 持续不看前方帧数

def process_frame(self, frame: np.ndarray) -> dict:
"""
处理单帧图像

Args:
frame: IR 图像

Returns:
检测结果字典
"""
result = {
'face_detected': False,
'drowsiness': False,
'distraction': False,
'euler_angles': None,
'status': '正常'
}

# 1. 人脸检测
faces, _ = self.face_detector.detect_with_landmarks(frame)

if len(faces) == 0:
result['status'] = '未检测到人脸'
return result

result['face_detected'] = True

# 取最大的人脸
main_face = max(faces, key=lambda f: (f[2]-f[0]) * (f[3]-f[1]))
x1, y1, x2, y2, conf = main_face

# 2. 关键点提取
landmarks = self.landmark_extractor.extract(frame, (x1, y1, x2, y2))

# 3. 头部姿态估计
_, _, euler = self.head_pose_estimator.estimate(landmarks)
result['euler_angles'] = euler

# 4. 分心检测
is_forward = self.head_pose_estimator.is_looking_forward(euler)

if not is_forward:
self.distraction_frame_count += 1
if self.distraction_frame_count >= self.distraction_threshold:
result['distraction'] = True
result['status'] = f'分心警告:未看前方 {self.distraction_frame_count} 帧'
else:
self.distraction_frame_count = 0

# 5. 疲劳检测
left_eye, right_eye = self.landmark_extractor.get_eye_regions(landmarks)
left_eye_img = self.eye_closure_detector.extract_eye_region(frame, left_eye)
right_eye_img = self.eye_closure_detector.extract_eye_region(frame, right_eye)

is_drowsy, status = self.eye_closure_detector.detect_drowsiness(
left_eye_img, right_eye_img
)
result['drowsiness'] = is_drowsy
if is_drowsy:
result['status'] = status

return result


# 使用示例
if __name__ == "__main__":
dms = IRDriverMonitoringSystem('yolov7-face.pt', 'shape_predictor_68_face_landmarks.dat')

# 模拟视频流
for i in range(100):
# 模拟 IR 帧
frame = np.random.randint(0, 255, (800, 1280), dtype=np.uint8)

result = dms.process_frame(frame)

if result['drowsiness']:
print(f"[帧 {i}] 疲劳警告!")
elif result['distraction']:
print(f"[帧 {i}] 分心警告!")
elif result['face_detected']:
euler = result['euler_angles']
print(f"[帧 {i}] Pitch: {euler['pitch']:.1f}°, Yaw: {euler['yaw']:.1f}°")

实验结果

人脸检测性能

条件 帧数 检测率 精度 召回率
正常光照 22,379 98.4% 100% 98.4%
低光照 17,996 99.0% 100% 99.0%

疲劳检测性能

指标 结果
准确率 >99%
精度 99.3%
帧率(CPU) 20-25 FPS
帧率(Xavier) 10 FPS

对 IMS 开发的启示

1. IR 摄像头选型

参数 推荐值
分辨率 1280×800 或更高
帧率 ≥30 FPS
工作温度 -40°C ~ +85°C
传输接口 GMSL2 或 MIPI CSI

2. 算法选择

功能 推荐方法 原因
人脸检测 YOLOv7-Tiny 速度与精度平衡
关键点提取 dlib (随机森林) 快速、CPU 可运行
头部姿态 solvePnP 数值稳定、可解释
眼睑检测 自适应阈值 轻量、无需训练

3. 部署优化

  • 仅人脸检测使用 GPU,其余模块 CPU 运行
  • 关键点数量优化:68 点可精简为 36 点
  • 帧率优化:跳帧检测(每 3 帧检测 1 次)

参考资料

  1. 论文原文: https://pmc.ncbi.nlm.nih.gov/articles/PMC10600215/
  2. 数据集: https://github.com/kdh6126/IR-Carmera-Datasets/
  3. YOLOv7: https://github.com/WongKinYiu/yolov7

总结

本文提出的 IR 摄像头 DMS 方案:

  1. 硬件优势: IR 不受光照影响,适合全天候监控
  2. 算法轻量: 仅人脸检测使用深度学习,其余传统方法
  3. 实时性能: 嵌入式平台可达 10 FPS
  4. 部署友好: CPU 即可运行大部分模块

对 IMS 开发,IR 摄像头 + 轻量化算法是推荐技术路线。


IR 摄像头 DMS 疲劳分心检测论文解读与代码复现
https://dapalm.com/2026/06/14/2026-06-14-IR-Camera-DMS-Paper-Eye-Closure-Head-Pose/
作者
Mars
发布于
2026年6月14日
许可协议