Fraunhofer IOSB 乘员监测系统:35种行为识别

Fraunhofer IOSB 乘员监测系统:35种行为识别

技术来源

系统概述

Fraunhofer IOSB的先进乘员监测系统(AOMS)是车内行为识别的SOTA方案,核心能力:

  • 3D姿态检测:实时识别所有乘员的骨骼模型
  • 手势识别:厘米级精度指向检测
  • 行为分类:35种车内活动识别
  • 意图预测:基于行为序列预测下一步动作

核心技术

1. 3D姿态检测

骨骼模型输出:

关节点 数量 用途
头部/眼睛 3 视线方向、注意力
颈部 1 头部姿态
肩膀 2 手臂动作
肘部 2 手臂位置
手腕 2 手势识别
躯干 2 坐姿判断
骨盆 1 姿态稳定
腿部 4 坐姿、伸展
总计 17 全身姿态

传感器配置:

方案 优点 缺点
单目3D相机 成本低、部署简单 视角受限
多目2D相机 视角覆盖好 需要3D重建
多目3D相机 最全面 成本高

2. 行为识别算法

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
"""
Fraunhofer IOSB 乘员行为识别系统
基于3D姿态的行为分类
"""

import numpy as np
from typing import List, Dict, Tuple
from dataclasses import dataclass
from enum import Enum

class Activity(Enum):
"""车内活动类型(35种)"""
# 驾驶相关
DRIVING_NORMAL = 0
STEERING = 1
CHECKING_MIRROR = 2

# 通讯类
PHONE_CALL_HAND = 3
PHONE_CALL_HANDSFREE = 4
TEXTING = 5

# 饮食类
DRINKING = 6
EATING = 7

# 休息类
SLEEPING = 8
RESTING_EYES = 9

# 娱乐类
READING = 10
WATCHING_SCREEN = 11
LISTENING_MUSIC = 12

# 交互类
TALKING_PASSENGER = 13
GESTURING = 14
REACHING = 15

# 其他
SEARCHING = 16
ADJUSTING_CONTROLS = 17
PUTTING_ON_SEATBELT = 18
REMOVING_SEATBELT = 19
# ... 更多活动

@dataclass
class Skeleton3D:
"""3D骨骼数据"""
joints: np.ndarray # (17, 3) - 17个关节点的3D坐标
confidence: np.ndarray # (17,) - 各关节点置信度
timestamp: float

@dataclass
class DetectedObject:
"""检测到的物体"""
label: str # 类别:phone, bottle, book, etc.
position: np.ndarray # 3D位置
confidence: float

class ActivityRecognizer:
"""
行为识别器

基于3D骨骼 + 物体检测 + 运动分析
"""

def __init__(self,
history_length: int = 30,
fps: int = 30):
"""
Args:
history_length: 历史帧数
fps: 帧率
"""
self.history_length = history_length
self.fps = fps
self.skeleton_history: List[Skeleton3D] = []
self.activity_history: List[Activity] = []

# 活动持续时间阈值
self.activity_thresholds = {
Activity.DRINKING: 1.5, # 秒
Activity.EATING: 2.0,
Activity.PHONE_CALL_HAND: 3.0,
Activity.TEXTING: 2.0,
Activity.SLEEPING: 5.0,
}

def update_history(self, skeleton: Skeleton3D):
"""更新骨骼历史"""
self.skeleton_history.append(skeleton)

# 保持固定长度
if len(self.skeleton_history) > self.history_length:
self.skeleton_history.pop(0)

def extract_pose_features(self,
skeleton: Skeleton3D) -> Dict:
"""
提取姿态特征

Returns:
features: 姿态特征字典
"""
joints = skeleton.joints

# 头部姿态
head_position = joints[0]
head_orientation = self._compute_head_orientation(joints[0:3])

# 手臂姿态
left_arm_angle = self._compute_arm_angle(joints[3], joints[5], joints[7])
right_arm_angle = self._compute_arm_angle(joints[4], joints[6], joints[8])

# 手部位置(相对于头部)
left_hand_pos = joints[7]
right_hand_pos = joints[8]
left_hand_to_head = left_hand_pos - head_position
right_hand_to_head = right_hand_pos - head_position

# 躯干姿态
torso_angle = self._compute_torso_angle(joints[9], joints[10])

# 视线方向(简化)
gaze_direction = head_orientation

return {
'head_position': head_position,
'head_orientation': head_orientation,
'left_arm_angle': left_arm_angle,
'right_arm_angle': right_arm_angle,
'left_hand_to_head': left_hand_to_head,
'right_hand_to_head': right_hand_to_head,
'torso_angle': torso_angle,
'gaze_direction': gaze_direction
}

def extract_motion_features(self) -> Dict:
"""
提取运动特征(基于历史)

Returns:
features: 运动特征字典
"""
if len(self.skeleton_history) < 2:
return {}

# 手部运动速度
hand_velocities = []
for i in range(1, len(self.skeleton_history)):
dt = (self.skeleton_history[i].timestamp -
self.skeleton_history[i-1].timestamp)

left_hand_vel = np.linalg.norm(
self.skeleton_history[i].joints[7] -
self.skeleton_history[i-1].joints[7]
) / dt

right_hand_vel = np.linalg.norm(
self.skeleton_history[i].joints[8] -
self.skeleton_history[i-1].joints[8]
) / dt

hand_velocities.append((left_hand_vel, right_hand_vel))

# 统计特征
hand_velocities = np.array(hand_velocities)
mean_left_vel = np.mean(hand_velocities[:, 0])
mean_right_vel = np.mean(hand_velocities[:, 1])
std_right_vel = np.std(hand_velocities[:, 1])

# 头部稳定性
head_positions = np.array([s.joints[0] for s in self.skeleton_history])
head_movement = np.std(head_positions, axis=0)
head_stability = 1.0 / (np.linalg.norm(head_movement) + 0.01)

return {
'mean_left_hand_velocity': mean_left_vel,
'mean_right_hand_velocity': mean_right_vel,
'std_right_hand_velocity': std_right_vel,
'head_stability': head_stability
}

def classify_activity(self,
pose_features: Dict,
motion_features: Dict,
detected_objects: List[DetectedObject]) -> Activity:
"""
分类当前活动

Args:
pose_features: 姿态特征
motion_features: 运动特征
detected_objects: 检测到的物体

Returns:
activity: 识别的活动类型
"""
# 检查是否有手机
has_phone = any(obj.label == 'phone' for obj in detected_objects)

# 检查是否有瓶子
has_bottle = any(obj.label == 'bottle' for obj in detected_objects)

# 检查是否有书本
has_book = any(obj.label == 'book' for obj in detected_objects)

# 基于规则的活动判断
right_hand = pose_features['right_hand_to_head']
right_arm_angle = pose_features['right_arm_angle']

# 1. 打电话
if has_phone:
# 手靠近耳朵
if right_hand[1] > 0 and np.linalg.norm(right_hand) < 0.3:
return Activity.PHONE_CALL_HAND

# 2. 喝水
if has_bottle:
# 手持瓶子靠近嘴
if (right_arm_angle > 60 and
motion_features.get('mean_right_hand_velocity', 0) < 0.1):
return Activity.DRINKING

# 3. 吃东西
# 手反复从嘴边移动
if motion_features.get('std_right_hand_velocity', 0) > 0.15:
# 需要结合物体检测
if any(obj.label in ['food', 'bottle'] for obj in detected_objects):
return Activity.EATING

# 4. 睡眠
if (pose_features['head_orientation'][1] < -30 and # 头部下垂
motion_features.get('head_stability', 0) > 0.9 and
motion_features.get('mean_right_hand_velocity', 0) < 0.05):
return Activity.SLEEPING

# 5. 看屏幕/阅读
if has_book:
# 低头看手
if pose_features['head_orientation'][1] > 20:
return Activity.READING

# 6. 正常驾驶
if (right_arm_angle < 30 and
motion_features.get('mean_right_hand_velocity', 0) < 0.1 and
pose_features['gaze_direction'][0] < 15):
return Activity.DRIVING_NORMAL

# 默认
return Activity.DRIVING_NORMAL

def _compute_head_orientation(self, head_joints: np.ndarray) -> np.ndarray:
"""计算头部姿态角度"""
# 简化:使用三个头部关节点
# 实际需要更精确的方法
orientation = np.array([0, 0, 0]) # (yaw, pitch, roll)

# 使用眼睛位置推断yaw
left_eye = head_joints[1]
right_eye = head_joints[2]

if left_eye[0] < right_eye[0]:
orientation[0] = 0 # 正对
else:
orientation[0] = 30 # 侧转

return orientation

def _compute_arm_angle(self,
shoulder: np.ndarray,
elbow: np.ndarray,
wrist: np.ndarray) -> float:
"""计算手臂弯曲角度"""
# 上臂方向
upper_arm = elbow - shoulder

# 前臂方向
forearm = wrist - elbow

# 夹角
cos_angle = np.dot(upper_arm, forearm) / (
np.linalg.norm(upper_arm) * np.linalg.norm(forearm) + 1e-6
)

angle = np.arccos(np.clip(cos_angle, -1, 1))

return np.degrees(angle)

def _compute_torso_angle(self,
torso_upper: np.ndarray,
torso_lower: np.ndarray) -> float:
"""计算躯干倾斜角度"""
direction = torso_lower - torso_upper
vertical = np.array([0, -1, 0])

cos_angle = np.dot(direction, vertical) / np.linalg.norm(direction)

return np.degrees(np.arccos(cos_angle))


class IntentionPredictor:
"""
意图预测器

基于行为序列预测下一步动作
"""

def __init__(self):
self.activity_sequence = []
self.sequence_length = 10

# 活动转移概率(简化)
self.transitions = {
Activity.SEARCHING: [Activity.PHONE_CALL_HAND, Activity.DRINKING],
Activity.REACHING: [Activity.DRINKING, Activity.EATING, Activity.TEXTING],
Activity.REMOVING_SEATBELT: [Activity.SEARCHING, Activity.REACHING]
}

def update_sequence(self, activity: Activity):
"""更新活动序列"""
self.activity_sequence.append(activity)

if len(self.activity_sequence) > self.sequence_length:
self.activity_sequence.pop(0)

def predict_next_intention(self) -> List[Tuple[Activity, float]]:
"""
预测下一步意图

Returns:
predictions: [(活动, 概率), ...]
"""
if len(self.activity_sequence) == 0:
return []

current_activity = self.activity_sequence[-1]

if current_activity in self.transitions:
possible_next = self.transitions[current_activity]
# 简化:均等概率
predictions = [(a, 1.0/len(possible_next)) for a in possible_next]
else:
predictions = []

return predictions


# 测试代码
if __name__ == "__main__":
# 创建识别器
recognizer = ActivityRecognizer()
predictor = IntentionPredictor()

# 模拟骨骼数据
np.random.seed(42)

# 正常驾驶姿态
normal_skeleton = Skeleton3D(
joints=np.array([
[0, 0, 0], # 头
[-0.03, 0.05, 0.1], # 左眼
[0.03, 0.05, 0.1], # 右眼
[-0.15, -0.1, 0], # 左肩
[0.15, -0.1, 0], # 右肩
[-0.25, -0.3, 0.1], # 左肘
[0.25, -0.3, 0.1], # 右肘
[-0.2, -0.2, 0.3], # 左手
[0.2, -0.2, 0.3], # 右手
[0, -0.4, 0], # 躯干上
[0, -0.7, 0], # 躯干下
[0, -0.8, 0], # 骨盆
[-0.1, -0.9, -0.2], # 左膝
[0.1, -0.9, -0.2], # 右膝
[-0.1, -1.0, -0.4], # 左脚踝
[0.1, -1.0, -0.4], # 右脚踝
]),
confidence=np.ones(17) * 0.9,
timestamp=0.0
)

# 提取特征
pose_features = recognizer.extract_pose_features(normal_skeleton)
motion_features = recognizer.extract_motion_features()

print("姿态特征:")
print(f" 头部位置: {pose_features['head_position']}")
print(f" 左臂角度: {pose_features['left_arm_angle']:.1f}°")
print(f" 右臂角度: {pose_features['right_arm_angle']:.1f}°")

# 模拟打电话姿态
phone_skeleton = Skeleton3D(
joints=np.array([
[0, 0.05, 0],
[-0.03, 0.05, 0.1],
[0.03, 0.05, 0.1],
[-0.15, -0.1, 0],
[0.15, -0.1, 0],
[-0.25, -0.3, 0.1],
[0.35, -0.1, 0.2], # 右肘抬高
[-0.2, -0.2, 0.3],
[0.15, 0.1, 0.1], # 右手靠近耳朵
[0, -0.4, 0],
[0, -0.7, 0],
[0, -0.8, 0],
[-0.1, -0.9, -0.2],
[0.1, -0.9, -0.2],
[-0.1, -1.0, -0.4],
[0.1, -1.0, -0.4],
]),
confidence=np.ones(17) * 0.9,
timestamp=1.0
)

phone_features = recognizer.extract_pose_features(phone_skeleton)
print(f"\n打电话姿态:")
print(f" 右手到头部距离: {np.linalg.norm(phone_features['right_hand_to_head']):.2f}m")

detected_objects = [DetectedObject('phone', np.array([0.15, 0.1, 0.1]), 0.95)]
activity = recognizer.classify_activity(phone_features, motion_features, detected_objects)
print(f" 识别活动: {activity.name}")

35种活动分类

活动类别

类别 活动数量 具体活动
驾驶相关 5 正常驾驶、方向盘操作、观察后视镜、换挡、踩踏板
通讯类 4 手持电话、免提电话、发短信、视频通话
饮食类 3 喝水、吃东西、吸烟
休息类 3 睡眠、闭目养神、打哈欠
娱乐类 4 阅读、看屏幕、听音乐、玩游戏
交互类 5 与乘客交谈、手势、伸手取物、整理物品、调整设备
安全相关 4 系安全带、解安全带、调整座椅、检查仪表
其他 7 搜索物品、化妆、照镜子、照看儿童、宠物互动…

Euro NCAP 2026 应用

分心检测要求

活动类型 Euro NCAP检测要求 警告等级
视觉分心(看手机/屏幕) ≤3秒检测 一级警告
手动分心(手持电话) ≤2秒检测 一级警告
认知分心(电话通话) ≤10秒检测 一级警告
疲劳(打哈欠/睡眠) 持续观察 二级警告

测试场景

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
# Euro NCAP分心测试场景定义
DISTRACTION_SCENARIOS = {
'D-01': {
'activity': Activity.TEXTING,
'description': '低头看手机发短信',
'detection_time': 2.0, # 秒
'gaze_away_duration': 3.0,
'warning_level': 1
},
'D-02': {
'activity': Activity.PHONE_CALL_HAND,
'description': '手持电话通话',
'detection_time': 2.0,
'warning_level': 1
},
'D-03': {
'activity': Activity.DRINKING,
'description': '喝水',
'detection_time': 3.0,
'warning_level': 0 # 不触发警告
},
'D-04': {
'activity': Activity.SLEEPING,
'description': '闭眼超过5秒',
'detection_time': 5.0,
'warning_level': 2
}
}

IMS开发启示

1. 系统架构

graph LR
    A[相机输入] --> B[3D姿态估计]
    B --> C[关节点提取]
    C --> D[特征工程]
    
    E[物体检测] --> F[手部物体识别]
    F --> D
    
    D --> G[活动分类]
    G --> H[意图预测]
    H --> I[警告触发]

2. 关键技术指标

指标 要求 实现难度
姿态检测精度 关节误差<5cm 中等
活动识别准确率 >90% 较高
实时性能 >25fps 中等
遮挡鲁棒性 50%遮挡仍可用

3. 硬件配置

方案 成本 适用场景
单目IR+RGB 驾驶员监测
双目3D相机 驾驶员+前排乘客
多目3D相机 全舱监测

4. 与DMS集成

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
class IntegratedDMS:
"""集成DMS系统"""

def __init__(self):
self.pose_estimator = PoseEstimator3D()
self.activity_recognizer = ActivityRecognizer()
self.warning_manager = WarningManager()

def process_frame(self, frame: np.ndarray) -> Dict:
"""处理单帧"""
# 1. 3D姿态估计
skeleton = self.pose_estimator.estimate(frame)

# 2. 活动识别
activity = self.activity_recognizer.classify(skeleton)

# 3. 风险评估
risk = self._assess_risk(activity)

# 4. 警告决策
warning = self.warning_manager.decide(activity, risk)

return {
'skeleton': skeleton,
'activity': activity,
'risk_level': risk,
'warning': warning
}

参考文献

  1. Fraunhofer IOSB. “Advanced Occupant Monitoring System for activity recognition in cars.” 2025.
  2. Euro NCAP. “Safe Driving - Driver Engagement Protocol v1.1.” October 2025.
  3. Da Cruz, L., et al. “SVIRO: Synthetic Vehicle Interior Rear Seat Occupancy Dataset.” WACV 2020.

相关文章:


Fraunhofer IOSB 乘员监测系统:35种行为识别
https://dapalm.com/2026/06/11/2026-06-11-Fraunhofer-Occupant-Monitoring-35-Activities/
作者
Mars
发布于
2026年6月11日
许可协议