1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139
| """ 论文:Deep Learning-Based Real-Time Driver Cognitive Distraction Detection 作者:IEEE Access 2025 复现:IMS知识库
核心方法:ResNet-18骨干 + 多尺度LSTM + 自注意力 """
import torch import torch.nn as nn import torchvision.models as models
class CognitiveDistractionDetector(nn.Module): """ 认知分心实时检测模型 性能指标(论文报告): - 准确率:94.2% - FPS:45(RTX 3090) - 延迟:22ms """ def __init__(self, num_classes=2, pretrained=True): super().__init__() resnet = models.resnet18(pretrained=pretrained) self.backbone = nn.Sequential(*list(resnet.children())[:-1]) self.feature_dim = 512 self.temporal_extractor = MultiScaleTemporalFeatureExtractor( feature_dim=self.feature_dim, num_scales=3 ) self.attention = CognitiveAttentionModule( feature_dim=512, num_heads=8 ) self.classifier = nn.Sequential( nn.Linear(512, 256), nn.ReLU(), nn.Dropout(0.5), nn.Linear(256, num_classes) ) def forward(self, x): """ Args: x: (B, T, C, H, W) 视频片段 B: batch size T: 帧数(默认30帧=1秒) C: 通道数(3) H, W: 高度和宽度(64×64) Returns: logits: (B, num_classes) 分类结果 attn_weights: 注意力权重可视化 """ B, T, C, H, W = x.shape x = x.view(B * T, C, H, W) features = self.backbone(x) features = features.view(B * T, -1) features = features.view(B, T, -1) temporal_features = self.temporal_extractor(features) attended, attn_weights = self.attention(features) fused = temporal_features + attended logits = self.classifier(fused) return logits, attn_weights
if __name__ == "__main__": model = CognitiveDistractionDetector(num_classes=2, pretrained=False) model.eval() batch_size = 4 num_frames = 30 video_clip = torch.randn(batch_size, num_frames, 3, 64, 64) with torch.no_grad(): logits, attn_weights = model(video_clip) probs = torch.softmax(logits, dim=-1) predictions = torch.argmax(probs, dim=-1) print("=" * 60) print("认知分心检测结果") print("=" * 60) print(f"输入形状: {video_clip.shape}") print(f"输出形状: {logits.shape}") print(f"预测类别: {predictions.tolist()}") print(f"分心概率: {probs[:, 1].tolist()}") print(f"注意力权重形状: {attn_weights.shape}") import time model = model.cuda() video_clip = video_clip.cuda() for _ in range(10): _ = model(video_clip) torch.cuda.synchronize() start = time.time() for _ in range(100): _ = model(video_clip) torch.cuda.synchronize() end = time.time() fps = 100 * batch_size / (end - start) latency = (end - start) / 100 * 1000 print(f"\n性能指标:") print(f" FPS: {fps:.1f}") print(f" 延迟: {latency:.2f}ms")
|