1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166
| """ 多模态疲劳检测融合网络 """
import torch import torch.nn as nn import torch.nn.functional as F from typing import Dict, Tuple, Optional
class MultiModalFatigueDetector(nn.Module): """ 多模态疲劳检测器 输入模态: - 视觉:面部视频帧序列 - ECG:心电图信号 - EDA:皮肤电活动 - 行为:方向盘角度序列 融合策略:注意力机制中期融合 """ def __init__(self, visual_dim: int = 512, ecg_dim: int = 128, eda_dim: int = 64, behavior_dim: int = 32, fusion_dim: int = 256, num_classes: int = 4): super().__init__() self.visual_encoder = nn.Sequential( nn.Linear(visual_dim, 256), nn.ReLU(), nn.Dropout(0.3), nn.Linear(256, fusion_dim) ) self.ecg_encoder = nn.Sequential( nn.Linear(ecg_dim, 64), nn.ReLU(), nn.Dropout(0.3), nn.Linear(64, fusion_dim) ) self.eda_encoder = nn.Sequential( nn.Linear(eda_dim, 32), nn.ReLU(), nn.Dropout(0.3), nn.Linear(32, fusion_dim) ) self.behavior_encoder = nn.Sequential( nn.Linear(behavior_dim, 16), nn.ReLU(), nn.Dropout(0.3), nn.Linear(16, fusion_dim) ) self.cross_modal_attention = nn.MultiheadAttention( embed_dim=fusion_dim, num_heads=4, batch_first=True ) self.reliability_net = nn.Sequential( nn.Linear(fusion_dim * 4, 128), nn.ReLU(), nn.Linear(128, 4), nn.Softmax(dim=-1) ) self.classifier = nn.Sequential( nn.Linear(fusion_dim, 128), nn.ReLU(), nn.Dropout(0.3), nn.Linear(128, num_classes) ) self.confidence_head = nn.Sequential( nn.Linear(fusion_dim, 32), nn.ReLU(), nn.Linear(32, 1), nn.Sigmoid() ) def forward(self, visual_feat: torch.Tensor, ecg_feat: torch.Tensor, eda_feat: torch.Tensor, behavior_feat: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: """ 前向传播 Args: visual_feat: 视觉特征 (B, visual_dim) ecg_feat: ECG特征 (B, ecg_dim) eda_feat: EDA特征 (B, eda_dim) behavior_feat: 行为特征 (B, behavior_dim) Returns: logits: 分类输出 (B, num_classes) confidence: 置信度 (B, 1) modality_weights: 模态权重 (B, 4) """ batch_size = visual_feat.size(0) v_encoded = self.visual_encoder(visual_feat) e_encoded = self.ecg_encoder(ecg_feat) d_encoded = self.eda_encoder(eda_feat) b_encoded = self.behavior_encoder(behavior_feat) multi_modal_seq = torch.stack([v_encoded, e_encoded, d_encoded, b_encoded], dim=1) attended, _ = self.cross_modal_attention( multi_modal_seq, multi_modal_seq, multi_modal_seq ) concat_feat = attended.view(batch_size, -1) modality_weights = self.reliability_net(concat_feat) weights_expanded = modality_weights.unsqueeze(-1) weighted_feat = (attended * weights_expanded).sum(dim=1) logits = self.classifier(weighted_feat) confidence = self.confidence_head(weighted_feat) return logits, confidence, modality_weights
if __name__ == "__main__": model = MultiModalFatigueDetector() visual = torch.randn(4, 512) ecg = torch.randn(4, 128) eda = torch.randn(4, 64) behavior = torch.randn(4, 32) logits, conf, weights = model(visual, ecg, eda, behavior) print(f"视觉特征: {visual.shape}") print(f"ECG特征: {ecg.shape}") print(f"EDA特征: {eda.shape}") print(f"行为特征: {behavior.shape}") print(f"\n分类输出: {logits.shape}") print(f"置信度: {conf.shape}") print(f"模态权重: {weights.shape}") print(f"\n模态权重样本: {weights[0].detach().numpy()}")
|