姿态+面部融合检测：驾驶员疲劳检测多模态方法

来源： Nature Scientific Reports 2026
发布时间： 2026年4月
链接： https://www.nature.com/articles/s41598-026-44994-4

核心洞察

多模态融合检测优势：

面部特征（眼动/嘴动）+ 身体姿态融合
弥补单一模态局限性
提高检测鲁棒性和准确率

一、方法论

1.1 多模态特征提取

"""
驾驶员疲劳多模态检测系统
"""

import numpy as np
from typing import Dict, List, Tuple

class MultiModalFatigueDetector:
    """
    多模态疲劳检测器
    
    检测维度：
    1. 面部特征：EAR, MAR, 头部姿态
    2. 身体姿态：肩部位置、脊柱角度、手部位置
    3. 行为模式：打哈欠频率、眨眼频率、头部运动
    """
    
    def __init__(self):
        # 面部分析器
        self.face_analyzer = FacialFeatureAnalyzer()
        
        # 姿态分析器
        self.pose_analyzer = BodyPoseAnalyzer()
        
        # 融合决策器
        self.fusion = MultiModalFusion()
        
        # 阈值
        self.ear_threshold = 0.2  # 眼睛开度阈值
        self.mar_threshold = 0.6  # 嘴巴开度阈值
        self.head_pose_threshold = 30  # 头部偏转角度阈值
    
    def detect(self, frame: np.ndarray) -> Dict:
        """
        检测疲劳状态
        
        Args:
            frame: 输入帧 (H, W, 3)
        
        Returns:
            result: 检测结果
        """
        # 1. 面部特征提取
        face_features = self.face_analyzer.extract(frame)
        
        # 2. 身体姿态提取
        pose_features = self.pose_analyzer.extract(frame)
        
        # 3. 特征融合
        fatigue_score = self.fusion.fuse(
            face_features,
            pose_features
        )
        
        # 4. 疲劳判定
        is_fatigued = fatigue_score > 0.7
        
        # 5. 疲劳等级
        fatigue_level = self.get_fatigue_level(fatigue_score)
        
        return {
            'is_fatigued': is_fatigued,
            'fatigue_score': fatigue_score,
            'fatigue_level': fatigue_level,
            'face_features': face_features,
            'pose_features': pose_features
        }
    
    def get_fatigue_level(self, score: float) -> str:
        """获取疲劳等级"""
        if score < 0.4:
            return 'normal'  # 正常
        elif score < 0.6:
            return 'mild'    # 轻度疲劳
        elif score < 0.8:
            return 'moderate'  # 中度疲劳
        else:
            return 'severe'  # 重度疲劳


class FacialFeatureAnalyzer:
    """面部特征分析器"""
    
    def extract(self, frame: np.ndarray) -> Dict:
        """
        提取面部特征
        
        关键指标：
        - EAR: 眼睛开度比
        - MAR: 嘴巴开度比
        - 头部姿态（pitch, yaw, roll）
        - 眨眼频率
        - 打哈欠频率
        """
        # 实际实现需要面部关键点检测
        # 这里给出算法框架
        
        return {
            'ear': 0.0,  # 眼睛开度
            'mar': 0.0,  # 嘴巴开度
            'head_pose': (0.0, 0.0, 0.0),  # (pitch, yaw, roll)
            'blink_rate': 0.0,  # 眨眼频率 (次/分钟)
            'yawn_rate': 0.0,   # 打哈欠频率
            'eye_closure_duration': 0.0  # 闭眼持续时间 (秒)
        }
    
    def calculate_ear(self, eye_landmarks: np.ndarray) -> float:
        """
        计算眼睛开度比 (Eye Aspect Ratio)
        
        EAR = (|p2-p6| + |p3-p5|) / (2 * |p1-p4|)
        
        Args:
            eye_landmarks: 眼睛关键点 (6, 2)
        
        Returns:
            ear: 眼睛开度比 [0, 1]
        """
        # 垂直距离
        v1 = np.linalg.norm(eye_landmarks[1] - eye_landmarks[5])
        v2 = np.linalg.norm(eye_landmarks[2] - eye_landmarks[4])
        
        # 水平距离
        h = np.linalg.norm(eye_landmarks[0] - eye_landmarks[3])
        
        # EAR
        ear = (v1 + v2) / (2 * h + 1e-7)
        
        return ear
    
    def calculate_mar(self, mouth_landmarks: np.ndarray) -> float:
        """
        计算嘴巴开度比 (Mouth Aspect Ratio)
        
        MAR = (|p2-p8| + |p3-p7| + |p4-p6|) / (2 * |p1-p9|)
        
        Args:
            mouth_landmarks: 嘴巴关键点 (10, 2)
        
        Returns:
            mar: 嘴巴开度比
        """
        # 垂直距离
        v1 = np.linalg.norm(mouth_landmarks[1] - mouth_landmarks[7])
        v2 = np.linalg.norm(mouth_landmarks[2] - mouth_landmarks[6])
        v3 = np.linalg.norm(mouth_landmarks[3] - mouth_landmarks[5])
        
        # 水平距离
        h = np.linalg.norm(mouth_landmarks[0] - mouth_landmarks[8])
        
        # MAR
        mar = (v1 + v2 + v3) / (2 * h + 1e-7)
        
        return mar
    
    def estimate_head_pose(self, landmarks: np.ndarray, 
                          camera_matrix: np.ndarray) -> Tuple[float, float, float]:
        """
        估计头部姿态
        
        Args:
            landmarks: 面部关键点 (68, 2)
            camera_matrix: 相机内参矩阵
        
        Returns:
            (pitch, yaw, roll): 欧拉角 (度)
        """
        import cv2
        
        # 3D模型点
        model_points = np.array([
            (0.0, 0.0, 0.0),             # 鼻尖
            (0.0, -330.0, -65.0),        # 下巴
            (-225.0, 170.0, -135.0),     # 左眼外角
            (225.0, 170.0, -135.0),      # 右眼外角
            (-150.0, -150.0, -125.0),    # 左嘴角
            (150.0, -150.0, -125.0)      # 右嘴角
        ], dtype=np.float64)
        
        # 2D图像点
        image_points = np.array([
            landmarks[30],   # 鼻尖
            landmarks[8],    # 下巴
            landmarks[36],   # 左眼外角
            landmarks[45],   # 右眼外角
            landmarks[48],   # 左嘴角
            landmarks[54]    # 右嘴角
        ], dtype=np.float64)
        
        # 畸变系数
        dist_coeffs = np.zeros((4, 1))
        
        # SolvePnP
        success, rotation_vector, translation_vector = cv2.solvePnP(
            model_points, image_points, camera_matrix, dist_coeffs
        )
        
        # 转换为旋转矩阵
        rotation_mat, _ = cv2.Rodrigues(rotation_vector)
        
        # 转换为欧拉角
        pitch = np.degrees(np.arcsin(-rotation_mat[2, 0]))
        yaw = np.degrees(np.arctan2(rotation_mat[2, 1], rotation_mat[2, 2]))
        roll = np.degrees(np.arctan2(rotation_mat[1, 0], rotation_mat[0, 0]))
        
        return (pitch, yaw, roll)


class BodyPoseAnalyzer:
    """身体姿态分析器"""
    
    def extract(self, frame: np.ndarray) -> Dict:
        """
        提取身体姿态特征
        
        关键指标：
        - 肩部位置变化（下垂表示疲劳）
        - 脊柱角度（前倾表示疲劳）
        - 手部位置（离开方向盘）
        - 身体晃动频率
        """
        return {
            'shoulder_drop': 0.0,  # 肩部下垂程度
            'spine_angle': 0.0,    # 脊柱角度
            'hand_position': 'on_wheel',  # 手部位置
            'body_sway_rate': 0.0,  # 身体晃动频率
            'slouch_score': 0.0    # 坐姿评分
        }
    
    def calculate_shoulder_drop(self, 
                                shoulder_landmarks: np.ndarray,
                                baseline: np.ndarray) -> float:
        """
        计算肩部下垂程度
        
        Args:
            shoulder_landmarks: 肩部关键点 (左肩, 右肩)
            baseline: 基线肩部位置
        
        Returns:
            drop: 下垂程度 (像素)
        """
        # 当前肩部中心
        current_center = np.mean(shoulder_landmarks, axis=0)
        
        # 基线肩部中心
        baseline_center = np.mean(baseline, axis=0)
        
        # 下垂量（y方向，向下为正）
        drop = current_center[1] - baseline_center[1]
        
        return max(drop, 0)  # 只计算下垂，不计算上抬
    
    def calculate_spine_angle(self, 
                              spine_landmarks: np.ndarray) -> float:
        """
        计算脊柱角度
        
        Args:
            spine_landmarks: 脊柱关键点 (多点和)
        
        Returns:
            angle: 脊柱角度 (度)
        """
        if len(spine_landmarks) < 3:
            return 0.0
        
        # 取脊柱上中下三点
        top = spine_landmarks[0]
        middle = spine_landmarks[len(spine_landmarks)//2]
        bottom = spine_landmarks[-1]
        
        # 计算角度
        v1 = middle - top
        v2 = bottom - middle
        
        # 向量夹角
        cos_angle = np.dot(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2) + 1e-7)
        angle = np.degrees(np.arccos(np.clip(cos_angle, -1, 1)))
        
        return angle


class MultiModalFusion:
    """多模态融合"""
    
    def __init__(self):
        # 权重（可学习）
        self.weights = {
            'ear': 0.30,
            'mar': 0.15,
            'head_pose': 0.20,
            'blink_rate': 0.15,
            'shoulder_drop': 0.10,
            'spine_angle': 0.10
        }
    
    def fuse(self, face_features: Dict, pose_features: Dict) -> float:
        """
        融合多模态特征
        
        Args:
            face_features: 面部特征
            pose_features: 姿态特征
        
        Returns:
            fatigue_score: 疲劳评分 [0, 1]
        """
        score = 0.0
        
        # 1. EAR (眼睛开度)
        ear = face_features.get('ear', 0.5)
        ear_score = self.normalize_ear(ear)
        score += self.weights['ear'] * ear_score
        
        # 2. MAR (嘴巴开度)
        mar = face_features.get('mar', 0.0)
        mar_score = self.normalize_mar(mar)
        score += self.weights['mar'] * mar_score
        
        # 3. 头部姿态
        head_pose = face_features.get('head_pose', (0, 0, 0))
        pose_score = self.normalize_head_pose(head_pose)
        score += self.weights['head_pose'] * pose_score
        
        # 4. 眨眼频率
        blink_rate = face_features.get('blink_rate', 15)
        blink_score = self.normalize_blink_rate(blink_rate)
        score += self.weights['blink_rate'] * blink_score
        
        # 5. 肩部下垂
        shoulder_drop = pose_features.get('shoulder_drop', 0)
        shoulder_score = self.normalize_shoulder_drop(shoulder_drop)
        score += self.weights['shoulder_drop'] * shoulder_score
        
        # 6. 脊柱角度
        spine_angle = pose_features.get('spine_angle', 180)
        spine_score = self.normalize_spine_angle(spine_angle)
        score += self.weights['spine_angle'] * spine_score
        
        return np.clip(score, 0, 1)
    
    def normalize_ear(self, ear: float) -> float:
        """归一化EAR"""
        # EAR < 0.2 表示闭眼
        # EAR > 0.3 表示睁眼
        if ear < 0.2:
            return 1.0  # 疲劳
        elif ear < 0.3:
            return 0.5
        else:
            return 0.0
    
    def normalize_mar(self, mar: float) -> float:
        """归一化MAR"""
        # MAR > 0.6 表示打哈欠
        if mar > 0.6:
            return 1.0
        elif mar > 0.4:
            return 0.5
        else:
            return 0.0
    
    def normalize_head_pose(self, pose: Tuple[float, float, float]) -> float:
        """归一化头部姿态"""
        pitch, yaw, roll = pose
        
        # 大幅偏转表示疲劳
        pitch_score = min(abs(pitch) / 30, 1.0)
        yaw_score = min(abs(yaw) / 45, 1.0)
        roll_score = min(abs(roll) / 20, 1.0)
        
        return (pitch_score + yaw_score + roll_score) / 3
    
    def normalize_blink_rate(self, rate: float) -> float:
        """归一化眨眼频率"""
        # 正常：10-20次/分钟
        # 疲劳：>25次/分钟 或 <5次/分钟
        if rate > 25 or rate < 5:
            return 1.0
        elif rate > 20 or rate < 8:
            return 0.5
        else:
            return 0.0
    
    def normalize_shoulder_drop(self, drop: float) -> float:
        """归一化肩部下垂"""
        # 下垂 > 20像素表示疲劳
        return min(drop / 20, 1.0)
    
    def normalize_spine_angle(self, angle: float) -> float:
        """归一化脊柱角度"""
        # 正常：170-180度
        # 前倾：<160度表示疲劳
        if angle < 160:
            return 1.0
        elif angle < 170:
            return 0.5
        else:
            return 0.0


# 测试代码
if __name__ == "__main__":
    detector = MultiModalFatigueDetector()
    
    # 模拟数据
    frame = np.random.randint(0, 255, (720, 1280, 3), dtype=np.uint8)
    
    result = detector.detect(frame)
    
    print(f"疲劳评分: {result['fatigue_score']:.2f}")
    print(f"疲劳等级: {result['fatigue_level']}")
    print(f"是否疲劳: {'是' if result['is_fatigued'] else '否'}")

二、实验结果

2.1 数据集

数据集	样本数	场景
DMD	1040	多模态驾驶员监控
DriveAHead	24000	驾驶员头部姿态
DD-pose	15000	驾驶员头部姿态

2.2 检测性能

方法	准确率	召回率	F1-score
仅面部特征	89.2%	87.5%	88.3%
仅姿态特征	82.1%	79.8%	80.9%
多模态融合	94.7%	93.2%	93.9%

三、IMS开发启示

3.1 实现要点

# 多模态疲劳检测配置
fatigue_detection:
  modalities:
    - facial:
        enabled: true
        features: [ear, mar, head_pose, blink_rate, yawn_rate]
        model: "mediapipe_face_mesh"
    - pose:
        enabled: true
        features: [shoulder_drop, spine_angle, hand_position, body_sway]
        model: "mediapipe_pose"
  
  fusion:
    method: "weighted_average"
    weights:
      ear: 0.30
      mar: 0.15
      head_pose: 0.20
      blink_rate: 0.15
      shoulder_drop: 0.10
      spine_angle: 0.10
  
  thresholds:
    normal: 0.4
    mild: 0.6
    moderate: 0.8
    severe: 1.0

3.2 性能指标

指标	目标值
帧率	≥30fps
延迟	<100ms
检测准确率	>90%
误报率	<5%

3.3 硬件要求

组件	规格
摄像头	RGB-IR，全局快门
处理器	≥4 TOPS NPU
内存	≥2GB

四、总结

维度	评估	备注
创新性	⭐⭐⭐⭐	面部+姿态多模态融合
实用性	⭐⭐⭐⭐⭐	直接可用
可复现性	⭐⭐⭐⭐	算法清晰
部署难度	⭐⭐⭐	需要两个模型
IMS价值	⭐⭐⭐⭐⭐	提高检测鲁棒性

优先级： 🔥🔥🔥🔥🔥
建议落地： 作为疲劳检测增强方案

参考文献

Nature Scientific Reports. “A fatigue driving detection method based on driver posture and facial state analysis.” 2026.
Fang et al. “AlphaPose: Whole-body regional multi-person pose estimation.” IEEE TPAMI, 2023.
Mediapipe. “Face Mesh and Pose Estimation.” Google, 2024.

发布时间： 2026-04-23
标签： #多模态融合 #疲劳检测 #姿态估计 #面部特征 #IMS开发

IMS > DMS > Euro NCAP

#分心检测 #Euro NCAP 2026 #边缘部署 #疲劳检测

姿态+面部融合检测：驾驶员疲劳检测多模态方法

https://dapalm.com/2026/04/23/2026-04-23-pose-face-fusion-fatigue-detection/

作者

Mars

发布于

2026年4月23日

许可协议

OOP异常姿态检测：自适应安全气囊约束系统的视觉方案上一篇

STURDeCAM57：5MP全局快门RGB-IR摄像头，DMS/OMS全天候成像方案下一篇