低光照疲劳检测新突破：双注意力机制+可解释AI

论文： Low-light driver drowsiness detection for real-time safety assistance using dual attention mechanisms in deep learning model
来源： Scientific Reports, Nature, 2026
链接： https://www.nature.com/articles/s41598-026-44442-3
发表时间： 2026年4月

核心创新

首次将双注意力机制与**可解释AI（XAI）**结合应用于低光照疲劳检测，解决夜间驾驶场景下疲劳检测准确率低的核心痛点。

三大突破

InceptionV3 + 双注意力：空间注意力+通道注意力协同增强面部关键区域
低光照增强预处理：CLAHE + Retinex理论结合，提升暗光环境特征提取
可解释性设计：Grad-CAM可视化让模型决策透明化，满足车规级安全要求

方法详解

1. 整体架构

输入图像 (640×480 RGB)
    ↓
低光照增强模块 (CLAHE + MSR)
    ↓
InceptionV3 Backbone (预训练ImageNet)
    ↓
双注意力模块
    ├─ 空间注意力 (关注眼睛、嘴巴区域)
    └─ 通道注意力 (增强疲劳相关特征)
    ↓
特征融合 (Concat + FC)
    ↓
分类输出 (疲劳/非疲劳)
    ↓
可解释性分析 (Grad-CAM)

2. 低光照增强模块

CLAHE (对比度受限自适应直方图均衡化)

import cv2
import numpy as np

def apply_clahe(image: np.ndarray, clip_limit: float = 2.0, tile_size: tuple = (8, 8)) -> np.ndarray:
    """
    CLAHE低光照增强
    
    Args:
        image: 输入BGR图像
        clip_limit: 对比度限制，防止噪声放大
        tile_size: 分块大小
    
    Returns:
        enhanced: 增强后的图像
    """
    # 转换到LAB空间
    lab = cv2.cvtColor(image, cv2.COLOR_BGR2LAB)
    l, a, b = cv2.split(lab)
    
    # 仅对亮度通道应用CLAHE
    clahe = cv2.createCLAHE(clipLimit=clip_limit, tileGridSize=tile_size)
    l_enhanced = clahe.apply(l)
    
    # 合并并转回BGR
    lab_enhanced = cv2.merge([l_enhanced, a, b])
    enhanced = cv2.cvtColor(lab_enhanced, cv2.COLOR_LAB2BGR)
    
    return enhanced


# 测试代码
if __name__ == "__main__":
    # 模拟低光照图像
    dark_image = cv2.imread("dark_cabin.jpg")
    
    # 增强处理
    enhanced = apply_clahe(dark_image, clip_limit=3.0, tile_size=(8, 8))
    
    # 对比显示
    cv2.imshow("Original", dark_image)
    cv2.imshow("Enhanced", enhanced)
    cv2.waitKey(0)

多尺度Retinex (MSR)

def multi_scale_retinex(image: np.ndarray, scales: list = [15, 80, 250]) -> np.ndarray:
    """
    多尺度Retinex低光照增强
    
    Args:
        image: 输入图像 (0-255)
        scales: 高斯模糊尺度列表
    
    Returns:
        msr_result: 增强结果
    """
    image = image.astype(np.float64) + 1.0  # 避免log(0)
    
    msr_result = np.zeros_like(image)
    
    for scale in scales:
        # 高斯模糊估计光照
        gaussian = cv2.GaussianBlur(image, (0, 0), scale)
        
        # Retinex: log(R) = log(I) - log(L)
        retinex = np.log10(image) - np.log10(gaussian)
        msr_result += retinex
    
    # 平均多尺度结果
    msr_result = msr_result / len(scales)
    
    # 归一化到0-255
    for i in range(3):
        msr_result[:, :, i] = cv2.normalize(
            msr_result[:, :, i], None, 0, 255, cv2.NORM_MINMAX
        )
    
    return msr_result.astype(np.uint8)

3. 双注意力机制

空间注意力模块 (Spatial Attention)

import torch
import torch.nn as nn

class SpatialAttention(nn.Module):
    """
    空间注意力：关注图像中与疲劳相关的区域（眼睛、嘴巴）
    """
    
    def __init__(self, kernel_size: int = 7):
        super().__init__()
        self.conv = nn.Conv2d(2, 1, kernel_size, padding=kernel_size//2, bias=False)
        self.sigmoid = nn.Sigmoid()
    
    def forward(self, x: torch.Tensor) -> torch.Tensor:
        """
        Args:
            x: 输入特征图 (B, C, H, W)
        
        Returns:
            空间注意力权重 (B, 1, H, W)
        """
        # 沿通道维度计算均值和最大值
        avg_out = torch.mean(x, dim=1, keepdim=True)  # (B, 1, H, W)
        max_out, _ = torch.max(x, dim=1, keepdim=True)  # (B, 1, H, W)
        
        # 拼接
        concat = torch.cat([avg_out, max_out], dim=1)  # (B, 2, H, W)
        
        # 卷积 + Sigmoid
        attention = self.sigmoid(self.conv(concat))  # (B, 1, H, W)
        
        return attention * x  # 加权特征图

通道注意力模块 (Channel Attention)

class ChannelAttention(nn.Module):
    """
    通道注意力：增强与疲劳相关的特征通道
    """
    
    def __init__(self, in_channels: int, reduction: int = 16):
        super().__init__()
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.max_pool = nn.AdaptiveMaxPool2d(1)
        
        self.fc = nn.Sequential(
            nn.Linear(in_channels, in_channels // reduction, bias=False),
            nn.ReLU(inplace=True),
            nn.Linear(in_channels // reduction, in_channels, bias=False)
        )
        self.sigmoid = nn.Sigmoid()
    
    def forward(self, x: torch.Tensor) -> torch.Tensor:
        """
        Args:
            x: 输入特征图 (B, C, H, W)
        
        Returns:
            通道注意力权重 (B, C, 1, 1)
        """
        B, C, H, W = x.size()
        
        # 平均池化分支
        avg_out = self.fc(self.avg_pool(x).view(B, C))  # (B, C)
        
        # 最大池化分支
        max_out = self.fc(self.max_pool(x).view(B, C))  # (B, C)
        
        # 融合
        attention = self.sigmoid(avg_out + max_out).view(B, C, 1, 1)
        
        return attention * x

双注意力融合

class DualAttention(nn.Module):
    """
    双注意力机制：通道注意力 + 空间注意力
    """
    
    def __init__(self, in_channels: int, reduction: int = 16):
        super().__init__()
        self.channel_attention = ChannelAttention(in_channels, reduction)
        self.spatial_attention = SpatialAttention()
    
    def forward(self, x: torch.Tensor) -> torch.Tensor:
        """
        Args:
            x: 输入特征图 (B, C, H, W)
        
        Returns:
            增强后的特征图
        """
        # 先通道注意力
        x = self.channel_attention(x)
        
        # 再空间注意力
        x = self.spatial_attention(x)
        
        return x

4. 可解释性分析 (Grad-CAM)

class GradCAM:
    """
    Grad-CAM可解释性分析
    """
    
    def __init__(self, model, target_layer):
        self.model = model
        self.target_layer = target_layer
        
        # 注册钩子
        self.gradients = None
        self.activations = None
        
        target_layer.register_forward_hook(self.save_activation)
        target_layer.register_backward_hook(self.save_gradient)
    
    def save_activation(self, module, input, output):
        self.activations = output
    
    def save_gradient(self, module, grad_input, grad_output):
        self.gradients = grad_output[0]
    
    def __call__(self, x, class_idx=None):
        """
        生成类激活图
        
        Args:
            x: 输入图像 (1, C, H, W)
            class_idx: 目标类别索引
        
        Returns:
            cam: 热力图 (H, W)
        """
        # 前向传播
        output = self.model(x)
        
        if class_idx is None:
            class_idx = output.argmax(dim=1).item()
        
        # 反向传播
        self.model.zero_grad()
        output[0, class_idx].backward(retain_graph=True)
        
        # 计算权重
        weights = self.gradients.mean(dim=(2, 3), keepdim=True)  # (1, C, 1, 1)
        
        # 加权求和
        cam = (weights * self.activations).sum(dim=1, keepdim=True)  # (1, 1, H, W)
        cam = F.relu(cam)
        
        # 归一化
        cam = cam.squeeze().cpu().numpy()
        cam = (cam - cam.min()) / (cam.max() - cam.min() + 1e-8)
        
        return cam


# 使用示例
def visualize_attention(image_path: str, model, target_layer):
    """
    可视化模型关注的疲劳区域
    """
    from PIL import Image
    import matplotlib.pyplot as plt
    
    # 加载图像
    image = Image.open(image_path).convert('RGB')
    input_tensor = transform(image).unsqueeze(0)
    
    # 生成Grad-CAM
    grad_cam = GradCAM(model, target_layer)
    cam = grad_cam(input_tensor, class_idx=1)  # 1 = 疲劳
    
    # 叠加热力图
    cam_resized = cv2.resize(cam, (image.width, image.height))
    heatmap = cv2.applyColorMap(np.uint8(255 * cam_resized), cv2.COLORMAP_JET)
    result = cv2.addWeighted(np.array(image), 0.5, heatmap, 0.5, 0)
    
    # 显示
    plt.figure(figsize=(12, 4))
    plt.subplot(131)
    plt.imshow(image)
    plt.title("Original")
    
    plt.subplot(132)
    plt.imshow(cam_resized, cmap='jet')
    plt.title("Attention Map")
    
    plt.subplot(133)
    plt.imshow(result)
    plt.title("Overlay")
    
    plt.tight_layout()
    plt.savefig("attention_visualization.png", dpi=150)
    plt.show()

实验结果

数据集

数据集	样本数	场景	光照条件
DDD (Driver Drowsiness Dataset)	8,500	驾驶舱	正常+低光照
YawDD	2,300	驾驶舱	多光照
自采集夜间数据	1,200	夜间驾驶	极低光照

性能对比

方法	正常光照准确率	低光照准确率	FPS
Baseline InceptionV3	94.2%	71.3%	35
+ CLAHE	94.5%	82.1%	32
+ MSR	94.8%	84.7%	30
+ 双注意力	95.6%	89.3%	28
+ XAI	95.6%	89.3%	27

关键发现：

低光照场景准确率提升 18% (71.3% → 89.3%)
双注意力机制贡献最大（5.2%提升）
可解释性设计不影响性能

IMS开发启示

1. 算法模块化设计

┌─────────────────────────────────────────┐
│           IMS疲劳检测流水线              │
├─────────────────────────────────────────┤
│  [输入] 红外摄像头 (940nm, 25fps)        │
│     ↓                                   │
│  [预处理] CLAHE + MSR自适应增强          │
│     ↓                                   │
│  [特征提取] InceptionV3 + 双注意力       │
│     ↓                                   │
│  [决策] PERCLOS + MCT/MYD融合           │
│     ↓                                   │
│  [输出] 疲劳等级 + 热力图               │
└─────────────────────────────────────────┘

2. 部署优化建议

平台	模型压缩	推理速度	精度损失
Qualcomm QCS8255	INT8量化	45 FPS	0.8%
TI TDA4VM	剪枝50%	38 FPS	1.2%
NVIDIA Orin	FP16	120 FPS	0.3%

3. 可解释性合规

Euro NCAP 2026要求：

疲劳检测系统需提供决策依据
Grad-CAM热力图可满足透明化要求

建议输出格式：

{
  "fatigue_level": "moderate",
  "confidence": 0.87,
  "attention_regions": ["left_eye", "right_eye", "mouth"],
  "trigger_duration": 3.2,
  "timestamp": "2026-04-23T00:15:32Z"
}

4. 低光照场景适配

硬件配置建议：

组件	型号	参数	用途
红外摄像头	OV2311	2MP, 全局快门	眼睛检测
红外补光	SFH 4740	940nm, 120mW/sr	夜间照明
滤光片	BP940	中心波长940nm	抑制可见光干扰

软件参数：

CLAHE clip_limit: 2.5-3.5（根据车窗透光率调整）
MSR scales: [15, 80, 250]（多尺度融合）
注意力阈值: 0.6（低于此值触发低置信度告警）

代码复现

完整推理管道

"""
论文复现：低光照疲劳检测完整管道
论文：Low-light driver drowsiness detection for real-time safety assistance
作者：Javed et al.
会议：Scientific Reports, 2026
"""

import cv2
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from torchvision import transforms
from PIL import Image


class LowLightEnhancer:
    """低光照增强模块"""
    
    def __init__(self, clip_limit: float = 3.0, tile_size: tuple = (8, 8)):
        self.clip_limit = clip_limit
        self.tile_size = tile_size
        self.scales = [15, 80, 250]
    
    def clahe(self, image: np.ndarray) -> np.ndarray:
        """CLAHE增强"""
        lab = cv2.cvtColor(image, cv2.COLOR_BGR2LAB)
        l, a, b = cv2.split(lab)
        
        clahe = cv2.createCLAHE(
            clipLimit=self.clip_limit,
            tileGridSize=self.tile_size
        )
        l_enhanced = clahe.apply(l)
        
        lab_enhanced = cv2.merge([l_enhanced, a, b])
        return cv2.cvtColor(lab_enhanced, cv2.COLOR_LAB2BGR)
    
    def msr(self, image: np.ndarray) -> np.ndarray:
        """多尺度Retinex"""
        image = image.astype(np.float64) + 1.0
        msr_result = np.zeros_like(image)
        
        for scale in self.scales:
            gaussian = cv2.GaussianBlur(image, (0, 0), scale)
            retinex = np.log10(image) - np.log10(gaussian)
            msr_result += retinex
        
        msr_result /= len(self.scales)
        
        for i in range(3):
            msr_result[:, :, i] = cv2.normalize(
                msr_result[:, :, i], None, 0, 255, cv2.NORM_MINMAX
            )
        
        return msr_result.astype(np.uint8)
    
    def __call__(self, image: np.ndarray) -> np.ndarray:
        """融合CLAHE和MSR"""
        clahe_result = self.clahe(image)
        msr_result = self.msr(image)
        
        # 加权融合
        enhanced = cv2.addWeighted(clahe_result, 0.6, msr_result, 0.4, 0)
        return enhanced


class DualAttentionDrowsinessNet(nn.Module):
    """双注意力疲劳检测网络"""
    
    def __init__(self, num_classes: int = 2):
        super().__init__()
        
        # 使用MobileNetV3作为轻量级backbone
        from torchvision.models import mobilenet_v3_small
        mobilenet = mobilenet_v3_small(pretrained=True)
        
        # 提取特征层
        self.features = mobilenet.features
        
        # 双注意力模块
        self.channel_attention = ChannelAttention(576, reduction=16)
        self.spatial_attention = SpatialAttention(kernel_size=7)
        
        # 分类头
        self.avgpool = nn.AdaptiveAvgPool2d(1)
        self.classifier = nn.Sequential(
            nn.Linear(576, 256),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),
            nn.Linear(256, num_classes)
        )
    
    def forward(self, x: torch.Tensor) -> torch.Tensor:
        """
        Args:
            x: 输入图像 (B, 3, 224, 224)
        
        Returns:
            logits: 分类输出 (B, num_classes)
        """
        # 特征提取
        x = self.features(x)
        
        # 双注意力
        x = self.channel_attention(x)
        x = self.spatial_attention(x)
        
        # 分类
        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        
        return x


class DrowsinessDetector:
    """疲劳检测完整管道"""
    
    def __init__(self, model_path: str = None, device: str = 'cuda'):
        self.device = device
        
        # 初始化模型
        self.model = DualAttentionDrowsinessNet(num_classes=2)
        
        if model_path:
            self.model.load_state_dict(torch.load(model_path, map_location=device))
        
        self.model.to(device)
        self.model.eval()
        
        # 初始化增强器
        self.enhancer = LowLightEnhancer(clip_limit=3.0, tile_size=(8, 8))
        
        # 预处理
        self.transform = transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize(
                mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225]
            )
        ])
    
    def detect(self, frame: np.ndarray) -> dict:
        """
        检测单帧疲劳状态
        
        Args:
            frame: BGR图像 (H, W, 3)
        
        Returns:
            result: 检测结果
        """
        # 低光照增强
        enhanced = self.enhancer(frame)
        
        # 预处理
        image_rgb = cv2.cvtColor(enhanced, cv2.COLOR_BGR2RGB)
        image_pil = Image.fromarray(image_rgb)
        input_tensor = self.transform(image_pil).unsqueeze(0).to(self.device)
        
        # 推理
        with torch.no_grad():
            logits = self.model(input_tensor)
            probs = F.softmax(logits, dim=1)
        
        # 解析结果
        drowsy_prob = probs[0, 1].item()
        
        result = {
            'drowsy_prob': drowsy_prob,
            'is_drowsy': drowsy_prob > 0.5,
            'enhanced_frame': enhanced
        }
        
        return result


# 测试代码
if __name__ == "__main__":
    # 初始化检测器
    detector = DrowsinessDetector(device='cuda')
    
    # 模拟低光照图像
    np.random.seed(42)
    dark_image = np.random.randint(30, 80, (480, 640, 3), dtype=np.uint8)
    
    # 检测
    result = detector.detect(dark_image)
    
    print(f"疲劳概率: {result['drowsy_prob']:.2%}")
    print(f"是否疲劳: {'是' if result['is_drowsy'] else '否'}")
    
    # 可视化增强效果
    cv2.imshow("Original", dark_image)
    cv2.imshow("Enhanced", result['enhanced_frame'])
    cv2.waitKey(0)

总结

维度	评估	备注
创新性	⭐⭐⭐⭐	双注意力+XAI首次结合
实用性	⭐⭐⭐⭐⭐	直接解决低光照痛点
可复现性	⭐⭐⭐⭐	代码完整，依赖清晰
部署难度	⭐⭐⭐	需优化至嵌入式平台
IMS价值	⭐⭐⭐⭐⭐	夜间疲劳检测关键方案

优先级： 🔥🔥🔥🔥🔥
建议落地： 立即在IMS项目中集成低光照增强模块

参考文献

Javed, M., et al. “Low-light driver drowsiness detection for real-time safety assistance using dual attention mechanisms in deep learning model.” Scientific Reports, 2026.
Dinges, A. K., et al. “PERCLOS: A valid measure of drowsiness in drivers.” Transportation Research, 1998.
Wang, Y., et al. “LLFormer: Transformer-based low-light image enhancement.” IEEE TIP, 2023.

发布时间： 2026-04-23
标签： #疲劳检测 #低光照增强 #注意力机制 #可解释AI #EuroNCAP #IMS开发

技术方案

#分心检测 #Euro NCAP 2026 #边缘部署 #疲劳检测

低光照疲劳检测新突破：双注意力机制+可解释AI

https://dapalm.com/2026/04/23/2026-04-23-low-light-drowsiness-detection-dual-attention/

作者

Mars

发布于

2026年4月23日

许可协议

LDIE-FDNet：YOLOv11 + 轻量级动态图像增强，实时疲劳检测新标杆上一篇

多模态融合DMS综述：摄像头+雷达+生理信号融合实现99.8%检测准确率下一篇