眼动追踪鲁棒性优化:墨镜、口罩、低光照场景应对方案

前言

眼动追踪(Eye Tracking)是 DMS 的核心技术,但实际场景面临诸多挑战:

  • 墨镜遮挡眼睛
  • 口罩遮挡下半脸
  • 低光照/逆光条件
  • 驾驶员头部大幅运动

本文系统分析这些挑战并提供工程解决方案。


一、挑战场景分析

1.1 场景分类

场景 问题 影响程度
墨镜 眼睛完全遮挡 严重
口罩 下半脸遮挡 中等
低光照 特征不明显 中等
逆光 面部阴影 严重
头部运动 部分遮挡 中等
化妆/美瞳 特征变化 轻微

1.2 性能影响

场景 正常准确率 挑战场景准确率 下降幅度
正常 95% - -
墨镜(透光) 95% 70% -25%
墨镜(不透光) 95% 20% -75%
口罩 95% 85% -10%
低光照 95% 75% -20%
逆光 95% 60% -35%

二、墨镜场景应对

2.1 技术方案

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
┌────────────────────────────────────────────┐
│ 墨镜场景应对方案 │
├────────────────────────────────────────────┤
│ │
│ 方案 1:红外补光 │
│ └─> 940nm IR 穿透部分墨镜 │
│ │
│ 方案 2:反射光分析 │
│ └─> 分析墨镜反射的眼动模式 │
│ │
│ 方案 3:多模态融合 │
│ └─> 头部姿态 + 方向盘输入 + 踏板输入 │
│ │
│ 方案 4:降级检测 │
│ └─> 基于头部姿态的注意力估计 │
│ │
└────────────────────────────────────────────┘

2.2 红外穿透方案

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
"""
红外墨镜穿透方案

940nm 红外光可以穿透部分墨镜
"""

import numpy as np
import cv2
from typing import Tuple, Optional

class IRSunglassesPenetration:
"""
红外墨镜穿透检测

原理:
- 部分墨镜(特别是偏光镜)对红外光有较好透过率
- 使用 940nm IR LED 补光
- IR 摄像头捕获眼部图像
"""

def __init__(
self,
ir_threshold: int = 50,
eye_region_size: Tuple[int, int] = (64, 32)
):
self.ir_threshold = ir_threshold
self.eye_region_size = eye_region_size

def detect_eye_through_sunglasses(
self,
ir_image: np.ndarray,
face_bbox: Tuple[int, int, int, int]
) -> Tuple[Optional[np.ndarray], float]:
"""
透过墨镜检测眼睛

Args:
ir_image: 红外图像(单通道)
face_bbox: 人脸边界框 (x1, y1, x2, y2)

Returns:
eye_region: 眼部区域(如果检测到)
confidence: 置信度
"""
x1, y1, x2, y2 = face_bbox
face_width = x2 - x1
face_height = y2 - y1

# 提取眼部区域(上半脸的中间部分)
eye_y1 = int(y1 + face_height * 0.2)
eye_y2 = int(y1 + face_height * 0.5)
eye_x1 = int(x1 + face_width * 0.1)
eye_x2 = int(x1 + face_width * 0.9)

eye_region = ir_image[eye_y1:eye_y2, eye_x1:eye_x2]

# 检测是否有眼睛特征
# 使用亮度梯度
if eye_region.size == 0:
return None, 0.0

# 自适应阈值
binary = cv2.adaptiveThreshold(
eye_region,
255,
cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY_INV,
11,
2
)

# 检测圆形结构(眼球)
circles = cv2.HoughCircles(
cv2.medianBlur(binary, 5),
cv2.HOUGH_GRADIENT,
1,
minDist=20,
param1=50,
param2=30,
minRadius=5,
maxRadius=20
)

if circles is not None and len(circles[0]) >= 2:
# 检测到至少两个圆形(双眼)
confidence = min(len(circles[0]) / 2.0, 1.0)

# 调整大小
eye_region_resized = cv2.resize(
eye_region,
self.eye_region_size
)

return eye_region_resized, confidence

return None, 0.0

def estimate_gaze_from_reflection(
self,
ir_image: np.ndarray,
face_bbox: Tuple[int, int, int, int]
) -> Tuple[float, float]:
"""
从墨镜反射估计视线方向

原理:
- 墨镜反射前方场景
- 分析反射图像的亮度分布
- 估计眼球位置变化
"""
# 提取墨镜区域
x1, y1, x2, y2 = face_bbox
sunglasses_region = ir_image[
int(y1 + (y2-y1)*0.2):int(y1 + (y2-y1)*0.5),
int(x1 + (x2-x1)*0.1):int(x1 + (x2-x1)*0.9)
]

if sunglasses_region.size == 0:
return 0.0, 0.0

# 分析左右亮度差异
h, w = sunglasses_region.shape
left_half = sunglasses_region[:, :w//2]
right_half = sunglasses_region[:, w//2:]

left_brightness = np.mean(left_half)
right_brightness = np.mean(right_half)

# 估计水平视线
gaze_x = (right_brightness - left_brightness) / 255.0
gaze_x = np.clip(gaze_x, -1, 1)

# 分析上下亮度差异
top_half = sunglasses_region[:h//2, :]
bottom_half = sunglasses_region[h//2:, :]

top_brightness = np.mean(top_half)
bottom_brightness = np.mean(bottom_half)

# 估计垂直视线
gaze_y = (bottom_brightness - top_brightness) / 255.0
gaze_y = np.clip(gaze_y, -1, 1)

return gaze_x, gaze_y


# 测试
if __name__ == "__main__":
detector = IRSunglassesPenetration()

# 模拟红外图像
ir_image = np.random.randint(0, 255, (480, 640), dtype=np.uint8)
face_bbox = (200, 100, 400, 350)

eye_region, confidence = detector.detect_eye_through_sunglasses(
ir_image, face_bbox
)

gaze_x, gaze_y = detector.estimate_gaze_from_reflection(
ir_image, face_bbox
)

print(f"眼部检测置信度: {confidence:.2f}")
print(f"估计视线: ({gaze_x:.2f}, {gaze_y:.2f})")

2.3 多模态融合方案

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
"""
多模态融合方案

当眼睛检测不可靠时,融合其他信号
"""

import numpy as np
from typing import Dict, Tuple
from dataclasses import dataclass

@dataclass
class MultimodalFeatures:
"""多模态特征"""
head_yaw: float # 头部偏航角
head_pitch: float # 头部俯仰角
steering_angle: float # 方向盘转角
steering_velocity: float # 方向盘角速度
lane_position: float # 车道位置偏差
vehicle_speed: float # 车速

class MultimodalAttentionEstimator:
"""
多模态注意力估计器

当眼动追踪不可靠时,使用其他信号估计注意力
"""

def __init__(self):
# 权重参数
self.weights = {
'head_pose': 0.4,
'steering': 0.3,
'lane_keeping': 0.3
}

# 阈值
self.head_yaw_threshold = 20.0 # 度
self.head_pitch_threshold = 15.0
self.steering_variance_threshold = 5.0
self.lane_deviation_threshold = 0.3 # 米

def estimate_attention(
self,
features: MultimodalFeatures
) -> Tuple[float, str]:
"""
估计注意力状态

Args:
features: 多模态特征

Returns:
attention_score: 注意力分数 (0-1)
attention_state: 注意力状态
"""
scores = {}

# 1. 头部姿态评分
head_score = self._evaluate_head_pose(
features.head_yaw,
features.head_pitch
)
scores['head_pose'] = head_score

# 2. 方向盘评分
steering_score = self._evaluate_steering(
features.steering_angle,
features.steering_velocity
)
scores['steering'] = steering_score

# 3. 车道保持评分
lane_score = self._evaluate_lane_keeping(
features.lane_position
)
scores['lane_keeping'] = lane_score

# 加权融合
attention_score = sum(
scores[k] * self.weights[k]
for k in scores
)

# 判断状态
if attention_score > 0.8:
attention_state = "normal"
elif attention_score > 0.5:
attention_state = "mild_distraction"
elif attention_score > 0.3:
attention_state = "distraction"
else:
attention_state = "severe_distraction"

return attention_score, attention_state

def _evaluate_head_pose(
self,
yaw: float,
pitch: float
) -> float:
"""评估头部姿态"""
# 正常驾驶:头部朝向前方
yaw_deviation = abs(yaw) / self.head_yaw_threshold
pitch_deviation = abs(pitch) / self.head_pitch_threshold

# 综合评分
score = 1.0 - min(max(yaw_deviation, pitch_deviation), 1.0)

return max(score, 0.0)

def _evaluate_steering(
self,
angle: float,
velocity: float
) -> float:
"""评估方向盘输入"""
# 正常驾驶:有规律的微调

# 角速度过大或过小都不正常
if abs(velocity) < 0.1: # 长时间不动
return 0.3
elif abs(velocity) > 30: # 急剧转向
return 0.5
else:
return 0.8

def _evaluate_lane_keeping(
self,
lane_position: float
) -> float:
"""评估车道保持"""
deviation = abs(lane_position)

if deviation < self.lane_deviation_threshold * 0.5:
return 1.0
elif deviation < self.lane_deviation_threshold:
return 0.7
else:
return 0.3


# 测试
if __name__ == "__main__":
estimator = MultimodalAttentionEstimator()

# 模拟正常驾驶
normal_features = MultimodalFeatures(
head_yaw=5.0,
head_pitch=3.0,
steering_angle=2.0,
steering_velocity=1.5,
lane_position=0.1,
vehicle_speed=25.0
)

score, state = estimator.estimate_attention(normal_features)
print(f"正常驾驶 - 注意力分数: {score:.2f}, 状态: {state}")

# 模拟分心
distracted_features = MultimodalFeatures(
head_yaw=35.0, # 头部转向
head_pitch=10.0,
steering_angle=0.0,
steering_velocity=0.0, # 无方向盘输入
lane_position=0.5, # 车道偏差大
vehicle_speed=25.0
)

score, state = estimator.estimate_attention(distracted_features)
print(f"分心驾驶 - 注意力分数: {score:.2f}, 状态: {state}")

三、口罩场景应对

3.1 问题分析

口罩遮挡影响:

  • 下巴、嘴巴、鼻子被遮挡
  • 人脸关键点检测受影响
  • 眼睛区域通常可见

3.2 解决方案

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
"""
口罩场景眼动追踪方案

关键:只使用眼睛区域
"""

import cv2
import numpy as np
from typing import List, Tuple, Optional

class MaskedFaceEyeTracker:
"""
口罩场景眼动追踪器

策略:
1. 检测是否戴口罩
2. 只使用眼睛区域特征
3. 调整关键点检测策略
"""

def __init__(self):
# 眼部关键点索引(68点模型)
self.left_eye_indices = list(range(36, 42))
self.right_eye_indices = list(range(42, 48))

# 眼睛纵横比计算
self.ear_history = []

def detect_mask(
self,
face_image: np.ndarray,
landmarks: np.ndarray
) -> bool:
"""
检测是否戴口罩

Args:
face_image: 人脸图像
landmarks: 关键点

Returns:
is_masked: 是否戴口罩
"""
# 方法 1:检查下半脸特征
# 口罩区域通常没有明显的嘴唇、下巴特征

# 方法 2:使用颜色分析
# 口罩通常是蓝色、白色或黑色

# 提取下半脸区域
nose_y = landmarks[30][1] # 鼻尖 y 坐标
chin_y = landmarks[8][1] # 下巴 y 坐标

lower_face = face_image[
int(nose_y):int(chin_y),
:
]

if lower_face.size == 0:
return False

# 分析颜色分布
# 口罩区域颜色较为单一
hsv = cv2.cvtColor(lower_face, cv2.COLOR_BGR2HSV)

# 检测蓝色(医用口罩)
blue_mask = cv2.inRange(hsv, (100, 50, 50), (130, 255, 255))
blue_ratio = np.sum(blue_mask > 0) / blue_mask.size

# 检测白色
white_mask = cv2.inRange(hsv, (0, 0, 200), (180, 30, 255))
white_ratio = np.sum(white_mask > 0) / white_mask.size

# 检测黑色
black_mask = cv2.inRange(hsv, (0, 0, 0), (180, 255, 50))
black_ratio = np.sum(black_mask > 0) / black_mask.size

# 判断
is_masked = (blue_ratio > 0.3 or white_ratio > 0.3 or black_ratio > 0.3)

return is_masked

def track_eyes_only(
self,
face_image: np.ndarray,
landmarks: np.ndarray
) -> Dict:
"""
仅追踪眼睛(适用于口罩场景)

Args:
face_image: 人脸图像
landmarks: 关键点

Returns:
eye_tracking_result: 眼动追踪结果
"""
# 提取左眼区域
left_eye_pts = landmarks[self.left_eye_indices]
left_eye_bbox = self._get_bbox(left_eye_pts, face_image.shape)
left_eye_img = self._crop_region(face_image, left_eye_bbox)

# 提取右眼区域
right_eye_pts = landmarks[self.right_eye_indices]
right_eye_bbox = self._get_bbox(right_eye_pts, face_image.shape)
right_eye_img = self._crop_region(face_image, right_eye_bbox)

# 计算眼睛纵横比(EAR)
left_ear = self._calculate_ear(left_eye_pts)
right_ear = self._calculate_ear(right_eye_pts)
avg_ear = (left_ear + right_ear) / 2

# 计算视线方向(基于瞳孔位置)
left_gaze = self._estimate_gaze_from_eye(left_eye_img)
right_gaze = self._estimate_gaze_from_eye(right_eye_img)

# 平均视线
gaze_x = (left_gaze[0] + right_gaze[0]) / 2
gaze_y = (left_gaze[1] + right_gaze[1]) / 2

return {
'left_ear': left_ear,
'right_ear': right_ear,
'avg_ear': avg_ear,
'gaze_x': gaze_x,
'gaze_y': gaze_y,
'left_eye_bbox': left_eye_bbox,
'right_eye_bbox': right_eye_bbox
}

def _get_bbox(
self,
points: np.ndarray,
image_shape: Tuple[int, int]
) -> Tuple[int, int, int, int]:
"""获取边界框"""
x1 = max(int(points[:, 0].min() - 5), 0)
y1 = max(int(points[:, 1].min() - 5), 0)
x2 = min(int(points[:, 0].max() + 5), image_shape[1])
y2 = min(int(points[:, 1].max() + 5), image_shape[0])

return (x1, y1, x2, y2)

def _crop_region(
self,
image: np.ndarray,
bbox: Tuple[int, int, int, int]
) -> np.ndarray:
"""裁剪区域"""
x1, y1, x2, y2 = bbox
return image[y1:y2, x1:x2]

def _calculate_ear(self, eye_pts: np.ndarray) -> float:
"""
计算眼睛纵横比(Eye Aspect Ratio)

EAR = (|p2-p6| + |p3-p5|) / (2 * |p1-p4|)
"""
# 垂直距离
v1 = np.linalg.norm(eye_pts[1] - eye_pts[5])
v2 = np.linalg.norm(eye_pts[2] - eye_pts[4])

# 水平距离
h = np.linalg.norm(eye_pts[0] - eye_pts[3])

if h < 1:
return 0.0

ear = (v1 + v2) / (2 * h)
return ear

def _estimate_gaze_from_eye(
self,
eye_image: np.ndarray
) -> Tuple[float, float]:
"""从眼睛图像估计视线"""
if eye_image.size == 0:
return (0.0, 0.0)

# 转换为灰度
if len(eye_image.shape) == 3:
gray = cv2.cvtColor(eye_image, cv2.COLOR_BGR2GRAY)
else:
gray = eye_image

# 检测瞳孔(简化:找最暗点)
# 实际应使用更鲁棒的方法
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(gray)

# 归一化
h, w = gray.shape
gaze_x = (min_loc[0] - w/2) / (w/2)
gaze_y = (min_loc[1] - h/2) / (h/2)

return (gaze_x, gaze_y)

四、低光照场景应对

4.1 技术方案

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
"""
低光照场景增强方案

使用红外补光和图像增强
"""

import cv2
import numpy as np

class LowLightEnhancer:
"""
低光照图像增强器

方法:
1. 红外补光
2. 直方图均衡化
3. Retinex 算法
4. 深度学习增强
"""

def __init__(self, method: str = 'retinex'):
self.method = method

def enhance(
self,
image: np.ndarray
) -> np.ndarray:
"""
增强低光照图像

Args:
image: 输入图像

Returns:
enhanced: 增强后的图像
"""
if self.method == 'histogram':
return self._histogram_equalization(image)
elif self.method == 'retinex':
return self._retinex(image)
elif self.method == 'gamma':
return self._gamma_correction(image)
else:
return image

def _histogram_equalization(
self,
image: np.ndarray
) -> np.ndarray:
"""直方图均衡化"""
if len(image.shape) == 3:
# YCrCb 空间
ycrcb = cv2.cvtColor(image, cv2.COLOR_BGR2YCrCb)
ycrcb[:, :, 0] = cv2.equalizeHist(ycrcb[:, :, 0])
return cv2.cvtColor(ycrcb, cv2.COLOR_YCrCb2BGR)
else:
return cv2.equalizeHist(image)

def _retinex(
self,
image: np.ndarray,
sigma: float = 30
) -> np.ndarray:
"""
Single-Scale Retinex

R = log(I) - log(I * G)

其中 G 是高斯核
"""
if len(image.shape) == 3:
result = np.zeros_like(image, dtype=np.float32)
for i in range(3):
result[:, :, i] = self._ssr(image[:, :, i], sigma)
else:
result = self._ssr(image, sigma)

# 归一化
result = cv2.normalize(result, None, 0, 255, cv2.NORM_MINMAX)
return result.astype(np.uint8)

def _ssr(
self,
channel: np.ndarray,
sigma: float
) -> np.ndarray:
"""Single-Scale Retinex for single channel"""
channel = channel.astype(np.float32) + 1.0

# 高斯模糊
blur = cv2.GaussianBlur(channel, (0, 0), sigma)

# Retinex
retinex = np.log(channel) - np.log(blur + 1)

return retinex

def _gamma_correction(
self,
image: np.ndarray,
gamma: float = 1.5
) -> np.ndarray:
"""伽马校正"""
# 自动计算 gamma
mean_brightness = np.mean(image)
gamma = np.log(0.5) / np.log(mean_brightness / 255.0)
gamma = np.clip(gamma, 0.5, 2.5)

# 查找表
table = np.array([
((i / 255.0) ** gamma) * 255
for i in range(256)
]).astype(np.uint8)

return cv2.LUT(image, table)


# 红外补光配置
class IR illuminationConfig:
"""
红外补光配置

硬件要求:
- 940nm IR LED
- 峰值波长:940nm
- 辐射强度:≥500mW/sr
- 数量:4-8 个
"""

def __init__(
self,
wavelength: int = 940, # nm
num_leds: int = 6,
peak_power: float = 1000, # mW
beam_angle: float = 30 # 度
):
self.wavelength = wavelength
self.num_leds = num_leds
self.peak_power = peak_power
self.beam_angle = beam_angle

def get_recommended_config(
self,
cabin_size: Tuple[float, float, float] = (1.5, 1.5, 1.0)
) -> Dict:
"""
获取推荐配置

Args:
cabin_size: 车厢尺寸 (长, 宽, 高) 米

Returns:
config: 配置参数
"""
# 根据车厢大小计算需要的 LED 数量和功率
volume = cabin_size[0] * cabin_size[1] * cabin_size[2]

# 每立方米约需要 200mW
total_power = volume * 200

num_leds = int(np.ceil(total_power / self.peak_power))
num_leds = max(4, min(num_leds, 8))

return {
'wavelength': self.wavelength,
'num_leds': num_leds,
'power_per_led': total_power / num_leds,
'placement': self._get_placement(num_leds)
}

def _get_placement(self, num_leds: int) -> List[Tuple[float, float]]:
"""获取 LED 布置位置"""
# 典型布置:仪表台上方、A 柱
placements = {
4: [(0.3, 0.5), (0.7, 0.5), (0.2, 0.3), (0.8, 0.3)],
6: [(0.3, 0.5), (0.7, 0.5), (0.2, 0.3), (0.8, 0.3), (0.5, 0.2), (0.5, 0.8)],
8: [(0.2, 0.5), (0.8, 0.5), (0.1, 0.3), (0.9, 0.3), (0.3, 0.2), (0.7, 0.2), (0.3, 0.8), (0.7, 0.8)]
}

return placements.get(num_leds, placements[6])

五、综合鲁棒性策略

5.1 降级检测策略

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
"""
降级检测策略

根据场景自动选择检测方法
"""

from enum import Enum
from typing import Dict, Optional
import numpy as np

class DetectionMode(Enum):
"""检测模式"""
NORMAL = 1 # 正常模式(双眼追踪)
SINGLE_EYE = 2 # 单眼模式(遮挡一只眼)
HEAD_POSE_ONLY = 3 # 仅头部姿态(墨镜)
MULTIMODAL = 4 # 多模态融合(极端场景)
DEGRADED = 5 # 降级模式(不可用)

class RobustEyeTracker:
"""
鲁棒眼动追踪器

自动选择最佳检测模式
"""

def __init__(self):
self.mode = DetectionMode.NORMAL
self.confidence_history = []

# 子模块
self.eye_detector = None # 眼部检测器
self.head_pose_estimator = None # 头部姿态估计器
self.multimodal_estimator = MultimodalAttentionEstimator()
self.light_enhancer = LowLightEnhancer()

def track(
self,
image: np.ndarray,
vehicle_signals: Optional[Dict] = None
) -> Dict:
"""
执行追踪

Args:
image: 输入图像
vehicle_signals: 车辆信号

Returns:
result: 追踪结果
"""
# 1. 分析场景
scene_analysis = self._analyze_scene(image)

# 2. 选择模式
self._select_mode(scene_analysis)

# 3. 执行检测
result = self._execute_detection(
image, scene_analysis, vehicle_signals
)

# 4. 置信度融合
result['mode'] = self.mode.name
result['confidence'] = self._calculate_confidence(result)

return result

def _analyze_scene(self, image: np.ndarray) -> Dict:
"""分析场景"""
analysis = {
'light_level': self._assess_light_level(image),
'has_sunglasses': False,
'has_mask': False,
'face_detected': False,
'eyes_visible': False
}

# 简化分析
analysis['face_detected'] = True # 假设检测到人脸

return analysis

def _assess_light_level(self, image: np.ndarray) -> str:
"""评估光照水平"""
if len(image.shape) == 3:
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
else:
gray = image

mean_brightness = np.mean(gray)

if mean_brightness < 50:
return 'low'
elif mean_brightness > 200:
return 'high'
else:
return 'normal'

def _select_mode(self, analysis: Dict):
"""选择检测模式"""
if not analysis['face_detected']:
self.mode = DetectionMode.DEGRADED
elif analysis['has_sunglasses']:
if analysis['eyes_visible']:
self.mode = DetectionMode.SINGLE_EYE
else:
self.mode = DetectionMode.HEAD_POSE_ONLY
elif analysis['light_level'] == 'low':
self.mode = DetectionMode.SINGLE_EYE
else:
self.mode = DetectionMode.NORMAL

def _execute_detection(
self,
image: np.ndarray,
analysis: Dict,
vehicle_signals: Optional[Dict]
) -> Dict:
"""执行检测"""
result = {}

if self.mode == DetectionMode.NORMAL:
result = self._normal_detection(image)

elif self.mode == DetectionMode.SINGLE_EYE:
result = self._single_eye_detection(image)

elif self.mode == DetectionMode.HEAD_POSE_ONLY:
result = self._head_pose_detection(image)

elif self.mode == DetectionMode.MULTIMODAL:
if vehicle_signals:
result = self._multimodal_detection(vehicle_signals)

elif self.mode == DetectionMode.DEGRADED:
result = {'status': 'unavailable'}

return result

def _normal_detection(self, image: np.ndarray) -> Dict:
"""正常检测"""
return {
'gaze_x': 0.0,
'gaze_y': 0.0,
'ear': 0.3,
'status': 'normal'
}

def _single_eye_detection(self, image: np.ndarray) -> Dict:
"""单眼检测"""
# 图像增强
enhanced = self.light_enhancer.enhance(image)

return {
'gaze_x': 0.0,
'gaze_y': 0.0,
'ear': 0.25,
'status': 'single_eye'
}

def _head_pose_detection(self, image: np.ndarray) -> Dict:
"""仅头部姿态检测"""
return {
'gaze_x': 0.0,
'gaze_y': 0.0,
'head_yaw': 5.0,
'head_pitch': 3.0,
'status': 'head_pose_only'
}

def _multimodal_detection(self, signals: Dict) -> Dict:
"""多模态检测"""
features = MultimodalFeatures(
head_yaw=signals.get('head_yaw', 0),
head_pitch=signals.get('head_pitch', 0),
steering_angle=signals.get('steering_angle', 0),
steering_velocity=signals.get('steering_velocity', 0),
lane_position=signals.get('lane_position', 0),
vehicle_speed=signals.get('speed', 0)
)

score, state = self.multimodal_estimator.estimate_attention(features)

return {
'attention_score': score,
'attention_state': state,
'status': 'multimodal'
}

def _calculate_confidence(self, result: Dict) -> float:
"""计算置信度"""
mode_confidence = {
DetectionMode.NORMAL: 0.95,
DetectionMode.SINGLE_EYE: 0.75,
DetectionMode.HEAD_POSE_ONLY: 0.5,
DetectionMode.MULTIMODAL: 0.6,
DetectionMode.DEGRADED: 0.0
}

return mode_confidence.get(self.mode, 0.0)

六、IMS 开发建议

6.1 鲁棒性优先级

优先级 场景 解决方案
P0 低光照 红外补光 + 图像增强
P1 墨镜 多模态融合
P2 口罩 眼部追踪
P3 极端场景 降级检测

6.2 硬件配置建议

组件 推荐配置
IR 摄像头 940nm,≥2MP,全局快门
IR LED 940nm,≥6 个,峰值功率 1W
处理器 NPU ≥15 TOPS

总结

眼动追踪鲁棒性的关键要点:

  1. 墨镜场景: 红外穿透 + 多模态融合
  2. 口罩场景: 仅使用眼部特征
  3. 低光照: 红外补光 + 图像增强
  4. 降级策略: 自动选择检测模式

参考来源:

  1. Eye Aspect Ratio (EAR) for Eye Blink Detection, 2016
  2. Single-Scale Retinex Algorithm
  3. Euro NCAP DSM Test Protocol

眼动追踪鲁棒性优化:墨镜、口罩、低光照场景应对方案
https://dapalm.com/2026/04/20/2026-04-20-eye-tracking-robustness/
作者
Mars
发布于
2026年4月20日
许可协议