DMS/OMS 合成数据生成:隐私保护下的训练数据扩充方案

问题背景

DMS/OMS 数据集挑战:

挑战 描述 影响
隐私问题 人脸/眼动数据敏感 数据采集困难
数据稀缺 极端场景数据少 模型泛化差
标注成本 关键点/状态标注昂贵 数据集规模受限
多样性不足 人种/年龄/场景覆盖不全 偏见风险

合成数据优势:

  • ✅ 无隐私问题
  • ✅ 可生成任意场景
  • ✅ 自动标注
  • ✅ 可控多样性

合成数据生成技术

1. 基于渲染的合成

Anyverse、Synthesis AI 等平台提供驾驶舱场景渲染。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
"""
驾驶舱场景渲染配置

使用 Blender/Unreal Engine 渲染
"""

import numpy as np
from typing import Dict, List

class CockpitRenderer:
"""
驾驶舱场景渲染器

生成 DMS/OMS 合成数据
"""

def __init__(self):
# 场景配置
self.scene_config = {
'vehicle_model': 'sedan_2026',
'camera_position': 'steering_column',
'camera_fov': 60,
'resolution': (1920, 1080),
'frame_rate': 30
}

# 驾驶员模型配置
self.driver_config = {
'gender': ['male', 'female'],
'age_range': (18, 70),
'ethnicity': ['asian', 'caucasian', 'african', 'hispanic'],
'body_type': ['slim', 'average', 'heavy'],
'height_range': (150, 200) # cm
}

# 状态配置
self.state_config = {
'fatigue_levels': ['alert', 'mild_fatigue', 'severe_fatigue', 'microsleep'],
'distraction_types': ['phone', 'navigation', 'eating', 'drinking', 'smoking', 'looking_away'],
'gaze_directions': ['front', 'left', 'right', 'down', 'up'],
'head_poses': ['front', 'yaw_left', 'yaw_right', 'pitch_down', 'pitch_up']
}

# 环境配置
self.environment_config = {
'lighting': ['daylight', 'dusk', 'night', 'tunnel', 'overcast'],
'weather': ['clear', 'rain', 'fog'],
'time_of_day': ['morning', 'afternoon', 'evening', 'night']
}

def generate_dataset(self, num_samples: int) -> List[Dict]:
"""
生成合成数据集

Args:
num_samples: 样本数量

Returns:
dataset: [
{
'image': np.ndarray,
'landmarks': np.ndarray,
'gaze': tuple,
'state': dict,
'metadata': dict
}
]
"""
dataset = []

for i in range(num_samples):
# 随机采样配置
driver = self._sample_driver()
state = self._sample_state()
environment = self._sample_environment()

# 渲染图像
image = self._render_image(driver, state, environment)

# 生成标注(自动)
landmarks = self._generate_landmarks(driver, state)
gaze = self._generate_gaze(state)

sample = {
'image': image,
'landmarks': landmarks,
'gaze': gaze,
'state': state,
'metadata': {
'driver_id': i,
'driver_config': driver,
'environment': environment
}
}

dataset.append(sample)

return dataset

def _sample_driver(self) -> Dict:
"""随机采样驾驶员配置"""
return {
'gender': np.random.choice(self.driver_config['gender']),
'age': np.random.randint(*self.driver_config['age_range']),
'ethnicity': np.random.choice(self.driver_config['ethnicity']),
'body_type': np.random.choice(self.driver_config['body_type']),
'height': np.random.randint(*self.driver_config['height_range']),
'accessories': self._sample_accessories()
}

def _sample_accessories(self) -> Dict:
"""采样配饰(墨镜、口罩等)"""
return {
'sunglasses': np.random.choice([True, False], p=[0.3, 0.7]),
'mask': np.random.choice([True, False], p=[0.2, 0.8]),
'hat': np.random.choice([True, False], p=[0.1, 0.9]),
'glasses': np.random.choice([True, False], p=[0.4, 0.6])
}

def _sample_state(self) -> Dict:
"""采样驾驶员状态"""
return {
'fatigue_level': np.random.choice(self.state_config['fatigue_levels']),
'distraction_type': np.random.choice(self.state_config['distraction_types']),
'gaze_direction': np.random.choice(self.state_config['gaze_directions']),
'head_pose': np.random.choice(self.state_config['head_poses'])
}

def _sample_environment(self) -> Dict:
"""采样环境条件"""
return {
'lighting': np.random.choice(self.environment_config['lighting']),
'weather': np.random.choice(self.environment_config['weather']),
'time_of_day': np.random.choice(self.environment_config['time_of_day'])
}

def _render_image(self, driver, state, environment) -> np.ndarray:
"""
渲染图像

实际实现需要调用 Blender/Unreal Engine API
"""
# 模拟返回
return np.random.randint(0, 255, (1080, 1920, 3), dtype=np.uint8)

def _generate_landmarks(self, driver, state) -> np.ndarray:
"""
生成面部关键点标注

基于 3D 模型自动生成
"""
# 68 点关键点
landmarks = np.random.rand(68, 2) * 1000
return landmarks

def _generate_gaze(self, state) -> tuple:
"""生成视线方向"""
gaze_map = {
'front': (0, 0, 1),
'left': (-30, 0, 1),
'right': (30, 0, 1),
'down': (0, -30, 1),
'up': (0, 30, 1)
}
return gaze_map.get(state['gaze_direction'], (0, 0, 1))


# 测试
if __name__ == "__main__":
renderer = CockpitRenderer()

# 生成 100 个样本
dataset = renderer.generate_dataset(100)

print(f"生成样本数: {len(dataset)}")
print(f"样本示例: {dataset[0]['metadata']}")

2. 基于GAN的合成

StyleGAN3 + 条件控制

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
"""
条件GAN生成驾驶员图像

控制年龄、性别、人种、状态
"""

import torch
import torch.nn as nn

class ConditionalDriverGAN(nn.Module):
"""
条件驾驶员图像生成器

基于StyleGAN3架构
"""

def __init__(self, latent_dim=512, num_classes=10):
super().__init__()

# 条件编码器
self.condition_encoder = nn.Sequential(
nn.Linear(num_classes, 256),
nn.ReLU(),
nn.Linear(256, latent_dim)
)

# 生成器(简化版)
self.generator = nn.Sequential(
nn.ConvTranspose2d(latent_dim, 512, 4, 1, 0),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.ConvTranspose2d(512, 256, 4, 2, 1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.ConvTranspose2d(256, 128, 4, 2, 1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.ConvTranspose2d(128, 64, 4, 2, 1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.ConvTranspose2d(64, 3, 4, 2, 1),
nn.Tanh()
)

def forward(self, noise, condition):
"""
生成图像

Args:
noise: 随机噪声 (B, latent_dim)
condition: 条件向量 (B, num_classes)

Returns:
image: 生成的图像 (B, 3, H, W)
"""
# 编码条件
cond_embed = self.condition_encoder(condition)

# 融合噪声和条件
latent = noise + cond_embed

# 生成图像
image = self.generator(latent.unsqueeze(-1).unsqueeze(-1))

return image


class ConditionVector:
"""
条件向量编码

将驾驶员属性编码为向量
"""

def __init__(self):
# 属性定义
self.attributes = {
'gender': ['male', 'female'],
'age_group': ['young', 'middle', 'senior'],
'ethnicity': ['asian', 'caucasian', 'african', 'hispanic'],
'fatigue_level': ['alert', 'mild', 'severe'],
'distraction': ['none', 'phone', 'other'],
'accessories': ['none', 'glasses', 'sunglasses', 'mask']
}

# 计算总维度
self.num_classes = sum(len(v) for v in self.attributes.values())

def encode(self, driver_attributes: Dict) -> torch.Tensor:
"""
编码驾驶员属性

Args:
driver_attributes: {
'gender': 'male',
'age_group': 'middle',
'ethnicity': 'asian',
'fatigue_level': 'mild',
'distraction': 'phone',
'accessories': 'glasses'
}

Returns:
condition: (num_classes,)
"""
condition = torch.zeros(self.num_classes)

idx = 0
for attr_name, attr_values in self.attributes.items():
attr_value = driver_attributes.get(attr_name, attr_values[0])
if attr_value in attr_values:
condition[idx + attr_values.index(attr_value)] = 1
idx += len(attr_values)

return condition


# 测试
if __name__ == "__main__":
gan = ConditionalDriverGAN()
condition_encoder = ConditionVector()

# 定义目标属性
target = {
'gender': 'male',
'age_group': 'middle',
'ethnicity': 'asian',
'fatigue_level': 'mild',
'distraction': 'phone',
'accessories': 'glasses'
}

# 编码条件
condition = condition_encoder.encode(target).unsqueeze(0)

# 生成噪声
noise = torch.randn(1, 512)

# 生成图像
with torch.no_grad():
image = gan(noise, condition)

print(f"生成图像形状: {image.shape}")

3. 域适应与迁移学习

从合成数据到真实数据

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
"""
域适应训练

从合成数据训练,适应真实场景
"""

import torch
import torch.nn as nn

class DomainAdaptiveDMS(nn.Module):
"""
域适应 DMS 模型

合成数据预训练 + 真实数据微调
"""

def __init__(self, backbone, num_classes=3):
super().__init__()

# 特征提取器(共享)
self.feature_extractor = backbone

# 任务分类器
self.task_classifier = nn.Sequential(
nn.Linear(2048, 512),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(512, num_classes)
)

# 域分类器(对抗训练)
self.domain_classifier = nn.Sequential(
nn.Linear(2048, 256),
nn.ReLU(),
nn.Linear(256, 1),
nn.Sigmoid()
)

def forward(self, x, alpha=1.0):
"""
前向传播

Args:
x: 输入图像
alpha: 域适应强度(梯度反转)

Returns:
task_output: 任务预测
domain_output: 域预测
"""
# 提取特征
features = self.feature_extractor(x)

# 任务预测
task_output = self.task_classifier(features)

# 域预测(梯度反转)
reverse_features = GradientReversalFunction.apply(features, alpha)
domain_output = self.domain_classifier(reverse_features)

return task_output, domain_output


class GradientReversalFunction(torch.autograd.Function):
"""
梯度反转层

用于对抗域适应
"""

@staticmethod
def forward(ctx, x, alpha):
ctx.alpha = alpha
return x.clone()

@staticmethod
def backward(ctx, grad_output):
return -ctx.alpha * grad_output, None


def train_domain_adaptive(
model,
synthetic_loader,
real_loader,
num_epochs=100
):
"""
域适应训练

Args:
synthetic_loader: 合成数据加载器
real_loader: 真实数据加载器
"""
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
task_criterion = nn.CrossEntropyLoss()
domain_criterion = nn.BCELoss()

for epoch in range(num_epochs):
for (syn_batch, real_batch) in zip(synthetic_loader, real_loader):
syn_images, syn_labels = syn_batch
real_images, _ = real_batch

# 合成数据(域标签=0)
task_pred_syn, domain_pred_syn = model(syn_images, alpha=1.0)
task_loss_syn = task_criterion(task_pred_syn, syn_labels)
domain_loss_syn = domain_criterion(domain_pred_syn, torch.zeros(len(syn_images)))

# 真实数据(域标签=1)
_, domain_pred_real = model(real_images, alpha=1.0)
domain_loss_real = domain_criterion(domain_pred_real, torch.ones(len(real_images)))

# 总损失
task_loss = task_loss_syn
domain_loss = domain_loss_syn + domain_loss_real
total_loss = task_loss + 0.1 * domain_loss

optimizer.zero_grad()
total_loss.backward()
optimizer.step()

print(f"Epoch {epoch}: task_loss={task_loss.item():.4f}, domain_loss={domain_loss.item():.4f}")


# 测试
if __name__ == "__main__":
# 使用预训练骨干
from torchvision.models import resnet50
backbone = resnet50(pretrained=True)
backbone.fc = nn.Identity()

model = DomainAdaptiveDMS(backbone)
print("模型初始化完成")

合成数据质量评估

评估指标

指标 描述 目标
FID Fréchet Inception Distance < 50
LPIPS Learned Perceptual Image Patch Similarity > 0.5
任务性能 在真实数据上的准确率 > 90% 真实数据训练
多样性 生成样本的多样性 覆盖所有场景

评估代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
def evaluate_synthetic_data(
synthetic_dataset,
real_dataset,
model
):
"""
评估合成数据质量
"""
# 1. 计算FID
fid = calculate_fid(synthetic_dataset, real_dataset)

# 2. 计算任务性能
# 仅用合成数据训练,在真实数据上测试
train_on_synthetic(model, synthetic_dataset)
accuracy = test_on_real(model, real_dataset)

# 3. 计算多样性
diversity = calculate_diversity(synthetic_dataset)

return {
'fid': fid,
'accuracy': accuracy,
'diversity': diversity
}

隐私保护策略

策略 描述 适用场景
完全合成 100% 合成数据 无真实数据场景
差分隐私 添加噪声保护真实数据 有真实数据场景
联邦学习 分布式训练,数据不出本地 多方协作场景
匿名化 去除身份信息 数据共享场景

总结: 合成数据是解决 DMS/OMS 数据集隐私和稀缺问题的关键技术。建议采用渲染+GAN混合方案,结合域适应训练,确保合成数据在真实场景的有效性。


DMS/OMS 合成数据生成:隐私保护下的训练数据扩充方案
https://dapalm.com/2026/06/05/2026-06-05-Synthetic-Data-Generation-DMS-OMS/
作者
Mars
发布于
2026年6月5日
许可协议