1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73
| import torch import torch.nn as nn
class DMSFusionNetwork(nn.Module): """ DMS 多模态融合网络 """ def __init__(self): super().__init__() self.rgb_backbone = ResNet18(pretrained=True) self.rgb_pool = nn.AdaptiveAvgPool2d(1) self.ir_backbone = ResNet18(pretrained=True) self.ir_pool = nn.AdaptiveAvgPool2d(1) self.radar_encoder = nn.Sequential( nn.Linear(128, 256), nn.ReLU(), nn.Linear(256, 128) ) self.fusion = nn.MultiheadAttention( embed_dim=512, num_heads=8, batch_first=True ) self.classifier = nn.Sequential( nn.Linear(512, 256), nn.ReLU(), nn.Linear(256, 4) ) def forward(self, rgb, ir, radar): """ 前向传播 参数: rgb: RGB 图像 (B, 3, H, W) ir: IR 图像 (B, 1, H, W) radar: 雷达特征 (B, 128) """ rgb_feat = self.rgb_backbone(rgb) rgb_feat = self.rgb_pool(rgb_feat).flatten(1) ir_feat = self.ir_backbone(ir) ir_feat = self.ir_pool(ir_feat).flatten(1) radar_feat = self.radar_encoder(radar) radar_feat = nn.functional.pad(radar_feat, (0, 384)) multi_modal = torch.stack([rgb_feat, ir_feat, radar_feat], dim=1) fused, _ = self.fusion(multi_modal, multi_modal, multi_modal) fused = fused.mean(dim=1) output = self.classifier(fused) return output
|