驾驶员认知分心检测研究进展:DCDD 模型与眼动行为分析

驾驶员认知分心检测研究进展:DCDD 模型与眼动行为分析

认知分心的定义与挑战

什么是认知分心?

认知分心(Cognitive Distraction)是指驾驶员”看而不见”的状态——眼睛注视道路,但注意力已转移:

1
2
3
4
5
视觉分心:眼睛离开道路

手动分心:手离开方向盘

认知分心:思维离开驾驶任务 ← 最难检测!

检测难点

挑战 描述
外观正常 驾驶员眼观前方、手握方向盘
缺乏明显信号 无大幅度动作或姿态变化
主观性强 需要检测内部认知状态
数据标注困难 难以确定真实认知状态

眼动行为与认知分心的关联

研究发现

多项研究表明,认知分心会导致特定的眼动模式变化:

眼动指标 正常驾驶 认知分心
扫视频率 高(频繁扫视) 低(凝视固定)
注视分布 分散(道路+后视镜) 集中(道路中心)
瞳孔直径 稳定 波动增大
眨眼频率 正常 先增后减
扫视幅度 大(覆盖广) 小(隧道效应)

眼动特征提取

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
import numpy as np
from typing import List, Tuple
from dataclasses import dataclass

@dataclass
class GazePoint:
"""注视点数据"""
timestamp: float
x: float # 屏幕坐标
y: float
pupil_diameter: float = 0.0


class EyeMovementFeatureExtractor:
"""
眼动特征提取器

从眼动数据提取认知分心相关特征
"""

def __init__(self,
screen_width: int = 1920,
screen_height: int = 1080):
self.screen_width = screen_width
self.screen_height = screen_height

# 特征窗口参数
self.window_size_sec = 10.0
self.min_samples = 100

def extract_features(self, gaze_sequence: List[GazePoint]) -> dict:
"""
提取眼动特征

Args:
gaze_sequence: 注视点序列

Returns:
features: 特征字典
"""
if len(gaze_sequence) < self.min_samples:
return {}

# 转换为 numpy 数组
timestamps = np.array([g.timestamp for g in gaze_sequence])
x_coords = np.array([g.x for g in gaze_sequence])
y_coords = np.array([g.y for g in gaze_sequence])
pupils = np.array([g.pupil_diameter for g in gaze_sequence])

features = {}

# 1. 扫视特征
saccades = self._detect_saccades(x_coords, y_coords, timestamps)
features.update(self._compute_saccade_features(saccades))

# 2. 注视特征
fixations = self._detect_fixations(x_coords, y_coords, timestamps)
features.update(self._compute_fixation_features(fixations))

# 3. 注视分布特征
features.update(self._compute_distribution_features(x_coords, y_coords))

# 4. 瞳孔特征
features.update(self._compute_pupil_features(pupils))

# 5. 时间序列特征
features.update(self._compute_temporal_features(
x_coords, y_coords, timestamps
))

return features

def _detect_saccades(self, x: np.ndarray, y: np.ndarray,
t: np.ndarray) -> List[dict]:
"""检测扫视事件"""
saccades = []

# 计算速度
dt = np.diff(t)
dx = np.diff(x)
dy = np.diff(y)

velocity = np.sqrt(dx**2 + dy**2) / dt # pixels/sec

# 扫视阈值(经验值)
saccade_threshold = 100 # pixels/sec

# 找到扫视段
is_saccade = velocity > saccade_threshold

# 合并连续扫视
saccade_start = None
for i, sacc in enumerate(is_saccade):
if sacc and saccade_start is None:
saccade_start = i
elif not sacc and saccade_start is not None:
saccades.append({
'start_idx': saccade_start,
'end_idx': i,
'duration': t[i] - t[saccade_start],
'amplitude': np.sqrt(
(x[i] - x[saccade_start])**2 +
(y[i] - y[saccade_start])**2
),
'start_time': t[saccade_start],
'end_time': t[i]
})
saccade_start = None

return saccades

def _detect_fixations(self, x: np.ndarray, y: np.ndarray,
t: np.ndarray) -> List[dict]:
"""检测注视事件"""
fixations = []

# 简化实现:使用聚类方法
# 实际应使用 I-VT 或 I-DT 算法

# 速度阈值
fixation_threshold = 30 # pixels/sec

dt = np.diff(t)
dx = np.diff(x)
dy = np.diff(y)
velocity = np.sqrt(dx**2 + dy**2) / dt

is_fixation = velocity < fixation_threshold

# 合并连续注视
fixation_start = None
for i, fix in enumerate(is_fixation):
if fix and fixation_start is None:
fixation_start = i
elif not fix and fixation_start is not None:
fixations.append({
'start_idx': fixation_start,
'end_idx': i,
'duration': t[i] - t[fixation_start],
'center_x': np.mean(x[fixation_start:i]),
'center_y': np.mean(y[fixation_start:i])
})
fixation_start = None

return fixations

def _compute_saccade_features(self, saccades: List[dict]) -> dict:
"""计算扫视特征"""
if not saccades:
return {
'saccade_count': 0,
'saccade_rate': 0,
'mean_saccade_amplitude': 0,
'std_saccade_amplitude': 0
}

amplitudes = [s['amplitude'] for s in saccades]
durations = [s['duration'] for s in saccades]

return {
'saccade_count': len(saccades),
'saccade_rate': len(saccades) / self.window_size_sec,
'mean_saccade_amplitude': np.mean(amplitudes),
'std_saccade_amplitude': np.std(amplitudes),
'mean_saccade_duration': np.mean(durations)
}

def _compute_fixation_features(self, fixations: List[dict]) -> dict:
"""计算注视特征"""
if not fixations:
return {
'fixation_count': 0,
'mean_fixation_duration': 0,
'fixation_dispersion': 0
}

durations = [f['duration'] for f in fixations]
centers = np.array([[f['center_x'], f['center_y']] for f in fixations])

return {
'fixation_count': len(fixations),
'mean_fixation_duration': np.mean(durations),
'std_fixation_duration': np.std(durations),
'fixation_dispersion': np.std(centers, axis=0).mean()
}

def _compute_distribution_features(self, x: np.ndarray, y: np.ndarray) -> dict:
"""计算注视分布特征"""
# 标准化坐标
x_norm = x / self.screen_width
y_norm = y / self.screen_height

# 计算分散度(熵)
hist_2d, _, _ = np.histogram2d(x_norm, y_norm, bins=10)
hist_norm = hist_2d / hist_2d.sum()
entropy = -np.sum(hist_norm * np.log2(hist_norm + 1e-10))

# 计算集中度(到中心的距离)
center_x, center_y = 0.5, 0.5
distances = np.sqrt((x_norm - center_x)**2 + (y_norm - center_y)**2)

return {
'gaze_entropy': entropy,
'mean_distance_to_center': np.mean(distances),
'std_distance_to_center': np.std(distances),
'gaze_range_x': np.ptp(x_norm),
'gaze_range_y': np.ptp(y_norm)
}

def _compute_pupil_features(self, pupils: np.ndarray) -> dict:
"""计算瞳孔特征"""
return {
'mean_pupil_diameter': np.mean(pupils),
'std_pupil_diameter': np.std(pupils),
'pupil_diameter_range': np.ptp(pupils)
}

def _compute_temporal_features(self, x: np.ndarray, y: np.ndarray,
t: np.ndarray) -> dict:
"""计算时间序列特征"""
# 计算速度和加速度
dt = np.diff(t)
dx = np.diff(x)
dy = np.diff(y)

velocity = np.sqrt(dx**2 + dy**2) / dt
acceleration = np.diff(velocity) / dt[:-1]

return {
'mean_velocity': np.mean(velocity),
'std_velocity': np.std(velocity),
'mean_acceleration': np.mean(np.abs(acceleration)),
'velocity_variability': np.std(velocity) / np.mean(velocity)
}

DCDD 模型:驾驶员认知分心检测

模型架构

DCDD(Driver Cognitive Distraction Detection)模型是 2024 年提出的眼动行为认知分心检测方法:

1
2
3
4
5
6
7
8
输入层:眼动数据序列

特征提取层:
├─ 空间特征(注视分布、扫视模式)
├─ 时间特征(序列模式、动态变化)
└─ 通道特征(多维度特征融合)

分类层:认知分心概率

PyTorch 实现

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
import torch
import torch.nn as nn
import torch.nn.functional as F
from typing import Tuple

class DCDDModel(nn.Module):
"""
DCDD: Driver Cognitive Distraction Detection Model

基于眼动行为的认知分心检测模型

参考论文:
"Driver Cognitive Distraction Detection based on eye movement behavior
and integration of multi-view space-channel feature"
Expert Systems with Applications, 2024
"""

def __init__(self,
input_dim: int = 20,
hidden_dim: int = 128,
num_heads: int = 4,
num_layers: int = 2,
dropout: float = 0.1):
super().__init__()

self.input_dim = input_dim
self.hidden_dim = hidden_dim

# 输入投影
self.input_projection = nn.Linear(input_dim, hidden_dim)

# 空间特征提取(多视角空间特征)
self.spatial_conv = nn.ModuleList([
nn.Conv1d(hidden_dim, hidden_dim, kernel_size=k, padding=k//2)
for k in [3, 5, 7]
])

# 时间特征提取(LSTM)
self.temporal_encoder = nn.LSTM(
hidden_dim,
hidden_dim // 2,
num_layers=num_layers,
batch_first=True,
bidirectional=True,
dropout=dropout
)

# 通道注意力
self.channel_attention = nn.Sequential(
nn.Linear(hidden_dim * 3, hidden_dim),
nn.ReLU(),
nn.Linear(hidden_dim, hidden_dim * 3),
nn.Sigmoid()
)

# 自注意力机制
self.self_attention = nn.MultiheadAttention(
hidden_dim * 3,
num_heads,
dropout=dropout,
batch_first=True
)

# 分类头
self.classifier = nn.Sequential(
nn.Linear(hidden_dim * 3, hidden_dim),
nn.ReLU(),
nn.Dropout(dropout),
nn.Linear(hidden_dim, 64),
nn.ReLU(),
nn.Dropout(dropout),
nn.Linear(64, 2) # 二分类:正常/认知分心
)

def forward(self, x: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
"""
前向传播

Args:
x: 输入特征, shape=(batch, seq_len, input_dim)

Returns:
logits: 分类 logits
attention_weights: 注意力权重
"""
batch_size, seq_len, _ = x.shape

# 1. 输入投影
x = self.input_projection(x) # (batch, seq_len, hidden_dim)

# 2. 空间特征提取
x_transposed = x.transpose(1, 2) # (batch, hidden_dim, seq_len)

spatial_features = []
for conv in self.spatial_conv:
spatial_features.append(conv(x_transposed))

spatial_features = torch.cat(spatial_features, dim=1) # (batch, hidden_dim*3, seq_len)
spatial_features = spatial_features.transpose(1, 2) # (batch, seq_len, hidden_dim*3)

# 3. 通道注意力
channel_weights = self.channel_attention(
spatial_features.mean(dim=1) # 全局平均池化
) # (batch, hidden_dim*3)

spatial_features = spatial_features * channel_weights.unsqueeze(1)

# 4. 时间特征提取
temporal_features, _ = self.temporal_encoder(x) # (batch, seq_len, hidden_dim)

# 5. 特征融合
fused_features = torch.cat([
spatial_features,
temporal_features.unsqueeze(-1).expand(-1, -1, -1, 3).reshape(batch_size, seq_len, -1)
], dim=-1)[:, :, :self.hidden_dim * 3]

# 6. 自注意力
attended_features, attention_weights = self.self_attention(
fused_features, fused_features, fused_features
)

# 7. 全局池化
global_features = attended_features.mean(dim=1) # (batch, hidden_dim*3)

# 8. 分类
logits = self.classifier(global_features)

return logits, attention_weights


class DCDDPipeline:
"""
DCDD 完整检测管道
"""

def __init__(self,
model_path: str = None,
device: str = 'cuda' if torch.cuda.is_available() else 'cpu'):
self.device = device
self.feature_extractor = EyeMovementFeatureExtractor()
self.model = DCDDModel()

if model_path:
self.model.load_state_dict(torch.load(model_path, map_location=device))

self.model.to(device)
self.model.eval()

# 状态缓存
self.gaze_buffer = []
self.buffer_size = 300 # 10秒 @ 30Hz

def update(self, gaze_point: GazePoint) -> dict:
"""
更新检测状态

Args:
gaze_point: 当前注视点

Returns:
result: 检测结果
"""
# 添加到缓冲区
self.gaze_buffer.append(gaze_point)

# 维护缓冲区大小
if len(self.gaze_buffer) > self.buffer_size:
self.gaze_buffer.pop(0)

# 检测
if len(self.gaze_buffer) >= self.buffer_size:
return self._detect()
else:
return {
'cognitive_distraction': False,
'confidence': 0.0,
'status': 'warming_up'
}

def _detect(self) -> dict:
"""执行检测"""
with torch.no_grad():
# 提取特征
features = self.feature_extractor.extract_features(self.gaze_buffer)

if not features:
return {
'cognitive_distraction': False,
'confidence': 0.0,
'status': 'insufficient_data'
}

# 转换为张量
feature_vector = torch.tensor(
list(features.values())
).float().unsqueeze(0).unsqueeze(0).to(self.device)

# 模型推理
logits, attention_weights = self.model(feature_vector)

# Softmax
probs = F.softmax(logits, dim=-1)

return {
'cognitive_distraction': probs[0, 1].item() > 0.5,
'confidence': probs[0, 1].item(),
'probabilities': {
'normal': probs[0, 0].item(),
'distracted': probs[0, 1].item()
},
'status': 'detected',
'features': features
}


# 测试代码
if __name__ == "__main__":
# 创建测试数据
np.random.seed(42)

pipeline = DCDDPipeline()

# 模拟 10 秒眼动数据 @ 30Hz
for i in range(300):
# 模拟正常驾驶:分散注视
if i < 150:
x = np.random.normal(960, 200)
y = np.random.normal(540, 150)
else:
# 模拟认知分心:集中注视
x = np.random.normal(960, 50)
y = np.random.normal(540, 30)

gaze_point = GazePoint(
timestamp=i / 30.0,
x=x,
y=y,
pupil_diameter=np.random.normal(4.0, 0.5)
)

result = pipeline.update(gaze_point)

if i % 50 == 0:
print(f"Frame {i}: Cognitive Distraction = {result['cognitive_distraction']}, "
f"Confidence = {result.get('confidence', 0):.2f}")

print("\n最终检测结果:")
print(f"认知分心: {result['cognitive_distraction']}")
print(f"置信度: {result['confidence']:.2f}")

认知分心 vs 视觉分心的区分

多模态融合策略

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
class DistractionClassifier:
"""
分心类型分类器

区分视觉分心、手动分心、认知分心
"""

def __init__(self):
# 各类分心的特征模式
self.distraction_patterns = {
'visual': {
'gaze_off_road': True, # 眼睛离开道路
'head_movement': 'large', # 大幅度头部移动
'hand_on_wheel': True, # 手在方向盘上
'vehicle_control': 'normal' # 车辆控制正常
},
'manual': {
'gaze_off_road': False, # 眼睛可能看道路
'head_movement': 'normal',
'hand_on_wheel': False, # 手离开方向盘
'vehicle_control': 'degraded' # 车辆控制下降
},
'cognitive': {
'gaze_off_road': False, # 眼睛看道路
'head_movement': 'reduced', # 头部移动减少
'hand_on_wheel': True,
'gaze_entropy': 'low', # 注视熵低(隧道效应)
'saccade_rate': 'low' # 扫视频率低
}
}

def classify(self, features: dict) -> str:
"""
分类分心类型

Args:
features: 提取的特征

Returns:
distraction_type: 分心类型
"""
# 提取关键特征
gaze_entropy = features.get('gaze_entropy', 0)
saccade_rate = features.get('saccade_rate', 0)
gaze_off_road_ratio = features.get('gaze_off_road_ratio', 0)
hand_on_wheel = features.get('hand_on_wheel', True)

# 决策树分类
if gaze_off_road_ratio > 0.3:
# 眼睛长时间离开道路 → 视觉分心
return 'visual'

if not hand_on_wheel:
# 手离开方向盘 → 手动分心
return 'manual'

if gaze_entropy < 0.5 and saccade_rate < 0.5:
# 注视集中、扫视减少 → 认知分心
return 'cognitive'

return 'normal'

Euro NCAP 对认知分心的未来要求

当前状态

Euro NCAP 2026 尚未明确要求认知分心检测,但在 Vision 2030 路线图中提及:

1
2
3
4
5
Euro NCAP 2030 愿景:
├─ 驾驶员损伤检测
├─ 突发疾病监测
├─ 压力检测
└─ 认知分心评估 ← 未来要求

技术储备建议

阶段 能力 技术方案
短期 眼动数据采集 部署眼动追踪传感器
中期 眼动特征分析 实现 DCDD 类似模型
长期 多模态融合 眼动 + 生理 + 驾驶行为

IMS 开发启示

1. 传感器需求

传感器 用途 精度要求
眼动追踪 注视点、扫视、瞳孔 角度误差 <1°
面部关键点 头部姿态、表情 误差 <3mm
车辆 CAN 驾驶行为、车辆状态 标准 OBD

2. 算法模块

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
# 认知分心检测完整管道
class CognitiveDistractionPipeline:
def __init__(self):
self.gaze_tracker = GazeTracker()
self.feature_extractor = EyeMovementFeatureExtractor()
self.dcdd_model = DCDDModel()
self.distraction_classifier = DistractionClassifier()

def process(self, frame, vehicle_data):
# 1. 眼动追踪
gaze_data = self.gaze_tracker.track(frame)

# 2. 特征提取
features = self.feature_extractor.extract_features(gaze_data)

# 3. DCDD 模型推理
cognitive_score = self.dcdd_model.predict(features)

# 4. 分心类型分类
distraction_type = self.distraction_classifier.classify(features)

return {
'cognitive_distraction': cognitive_score > 0.5,
'distraction_type': distraction_type,
'confidence': cognitive_score
}

3. 性能指标

指标 目标值 测试方法
准确率 ≥85% 标注数据测试
召回率 ≥80% 认知分心样本
误检率 ≤15% 正常驾驶样本
检测时延 ≤5秒 从分心开始到检测

参考来源:


驾驶员认知分心检测研究进展:DCDD 模型与眼动行为分析
https://dapalm.com/2026/06/13/2026-06-13-Cognitive-Distraction-Detection-DCDD-Eye-Movement/
作者
Mars
发布于
2026年6月13日
许可协议