DMS 数据标注规范:从原始数据到训练集

DMS 数据标注规范:从原始数据到训练集

引言

高质量的标注数据是DMS算法训练的基础。然而,驾驶员监控数据的标注面临诸多挑战:主观性强、边界模糊、隐私敏感等。本文将系统介绍DMS数据标注规范、质量控制方法和开源数据集资源。

标注类型与规范

1. 疲劳标注

标注类型 定义 标签值
PERCLOS 时间窗口内闭眼比例 0-100%
疲劳等级 综合疲劳程度 0-3(正常/轻/中/重)
眨眼事件 单次眨眼起止时间 (start_frame, end_frame)
微睡眠 短暂失去意识 起止帧 + 持续时间
打哈欠 嘴巴张开事件 (start_frame, end_frame, amplitude)

疲劳等级判定标准:

等级 PERCLOS 眨眼持续时间 眨眼频率 主观状态
0-正常 <15% <250ms 15-20次/分 清醒
1-轻度 15-20% 250-350ms 10-15次/分 略感疲倦
2-中度 20-25% 350-500ms 5-10次/分 明显疲劳
3-重度 >25% >500ms <5次/分 极度疲劳

2. 分心标注

标注类型 定义 标签值
视线方向 注视区域分类 前方/左后视镜/右后视镜/仪表盘/中控/手机/其他
头部姿态 3D姿态角 (yaw, pitch, roll)
分心类型 分心行为分类 手机/中控/乘客/食物/其他
分心持续时间 单次分心时长 秒数
手部状态 手的位置 方向盘/离开方向盘

视线区域定义:

1
2
3
4
5
6
7
8
9
10
11
12
13
        ┌─────────────────────────────┐
│ 前挡风玻璃 │
│ (前方) │
│ │
┌───────┼─────────────────────────────┼───────┐
│ 左后 │ │ 右后 │
│ 视镜 │ │ 视镜 │
├───────┼─────────────────────────────┼───────┤
│ │ 方向盘 │ │
│ │ │ │ │
│ │ 仪表盘 │ 中控 │ │
│ │ ▼ ▼ │ │
└───────┴─────────────────────────────┴───────┘

3. 行为标注

标注类型 定义 标签值
眼睛状态 睁眼/闭眼 0/1
嘴巴状态 正常/说话/打哈欠/微笑 0/1/2/3
眼镜/墨镜 是否佩戴 0/1/2(无/眼镜/墨镜)
口罩 是否佩戴 0/1
手持物体 手持物品类型 无/手机/食物/饮料/其他

标注流程

标准流程

1
数据采集 → 数据预处理 → 标注任务分配 → 标注执行 → 质量检查 → 验收归档

详细步骤

from dataclasses import dataclass
from typing import List, Dict, Optional
from enum import Enum
import json
import time

class AnnotationType(Enum):
    """标注类型"""
    FATIGUE = "fatigue"
    DISTRACTION = "distraction"
    GAZE = "gaze"
    HEAD_POSE = "head_pose"
    BEHAVIOR = "behavior"

@dataclass
class AnnotationTask:
    """标注任务"""
    task_id: str
    video_path: str
    annotation_type: AnnotationType
    start_frame: int
    end_frame: int
    assigned_to: str
    status: str = "pending"
    created_at: float = time.time()

@dataclass
class AnnotationResult:
    """标注结果"""
    task_id: str
    annotator_id: str
    frame_annotations: List[Dict]
    completed_at: float = time.time()
    review_status: str = "pending"

class AnnotationPipeline:
    """标注流程管理"""
    
    def __init__(self):
        """初始化"""
        self.tasks: Dict[str, AnnotationTask] = {}
        self.results: Dict[str, AnnotationResult] = {}
        self.quality_metrics = {}
        
    def create_task(self, 
                    video_path: str,
                    annotation_type: AnnotationType,
                    start_frame: int,
                    end_frame: int) -> str:
        """创建标注任务
        
        Args:
            video_path: 视频路径
            annotation_type: 标注类型
            start_frame: 起始帧
            end_frame: 结束帧
            
        Returns:
            task_id: 任务ID
        """
        task_id = f"task_{len(self.tasks)}_{int(time.time())}"
        
        task = AnnotationTask(
            task_id=task_id,
            video_path=video_path,
            annotation_type=annotation_type,
            start_frame=start_frame,
            end_frame=end_frame,
            assigned_to=""
        )
        
        self.tasks[task_id] = task
        return task_id
    
    def assign_task(self, task_id: str, annotator_id: str):
        """分配任务
        
        Args:
            task_id: 任务ID
            annotator_id: 标注员ID
        """
        if task_id in self.tasks:
            self.tasks[task_id].assigned_to = annotator_id
            self.tasks[task_id].status = "assigned"
            
    def submit_result(self, 
                      task_id: str,
                      annotator_id: str,
                      annotations: List[Dict]):
        """提交标注结果
        
        Args:
            task_id: 任务ID
            annotator_id: 标注员ID
            annotations: 标注数据
        """
        result = AnnotationResult(
            task_id=task_id,
            annotator_id=annotator_id,
            frame_annotations=annotations
        )
        
        self.results[task_id] = result
        self.tasks[task_id].status = "completed"

class AnnotationValidator:
    """标注验证器"""
    
    @staticmethod
    def validate_fatigue_annotation(annotation: Dict) -> bool:
        """验证疲劳标注
        
        Args:
            annotation: 标注数据
            
        Returns:
            valid: 是否有效
        """
        required_fields = ['frame_id', 'perclos', 'fatigue_level']
        
        for field in required_fields:
            if field not in annotation:
                return False
                
        # 检查值范围
        if not 0 <= annotation['perclos'] <= 100:
            return False
            
        if not 0 <= annotation['fatigue_level'] <= 3:
            return False
            
        return True
    
    @staticmethod
    def validate_gaze_annotation(annotation: Dict) -> bool:
        """验证视线标注
        
        Args:
            annotation: 标注数据
            
        Returns:
            valid: 是否有效
        """
        valid_regions = ['front', 'left_mirror', 'right_mirror', 'dashboard', 
                         'center_console', 'phone', 'other']
        
        if 'gaze_region' not in annotation:
            return False
            
        return annotation['gaze_region'] in valid_regions

class QualityController:
    """质量控制"""
    
    def __init__(self, 
                 iou_threshold: float = 0.8,
                 agreement_threshold: float = 0.85):
        """初始化
        
        Args:
            iou_threshold: IOU阈值
            agreement_threshold: 一致性阈值
        """
        self.iou_threshold = iou_threshold
        self.agreement_threshold = agreement_threshold
        
    def calculate_iou(self, box1: tuple, box2: tuple) -> float:
        """计算IOU
        
        Args:
            box1: 边界框1 (x1, y1, x2, y2)
            box2: 边界框2
            
        Returns:
            iou: 交并比
        """
        x1 = max(box1[0], box2[0])
        y1 = max(box1[1], box2[1])
        x2 = min(box1[2], box2[2])
        y2 = min(box1[3], box2[3])
        
        if x2 < x1 or y2 < y1:
            return 0.0
            
        intersection = (x2 - x1) * (y2 - y1)
        area1 = (box1[2] - box1[0]) * (box1[3] - box1[1])
        area2 = (box2[2] - box2[0]) * (box2[3] - box2[1])
        union = area1 + area2 - intersection
        
        return intersection / union if union > 0 else 0
    
    def check_inter_annotator_agreement(self,
                                         annotations1: List[Dict],
                                         annotations2: List[Dict]) -> float:
        """检查标注员间一致性
        
        Args:
            annotations1: 标注员1的结果
            annotations2: 标注员2的结果
            
        Returns:
            agreement: 一致性比例
        """
        if len(annotations1) != len(annotations2):
            return 0.0
            
        agreements = 0
        total = len(annotations1)
        
        for a1, a2 in zip(annotations1, annotations2):
            if a1.get('label') == a2.get('label'):
                # 对于分类任务,检查标签一致
                if 'bbox' in a1 and 'bbox' in a2:
                    # 对于检测任务,检查IOU
                    if self.calculate_iou(a1['bbox'], a2['bbox']) > self.iou_threshold:
                        agreements += 1
                else:
                    agreements += 1
                    
        return agreements / total if total > 0 else 0
    
    def assess_annotation_quality(self, 
                                   result: AnnotationResult) -> Dict:
        """评估标注质量
        
        Args:
            result: 标注结果
            
        Returns:
            quality_report: 质量报告
        """
        issues = []
        total_frames = len(result.frame_annotations)
        
        # 检查连续性
        prev_frame = -1
        gaps = 0
        for ann in result.frame_annotations:
            frame_id = ann.get('frame_id', 0)
            if prev_frame >= 0 and frame_id != prev_frame + 1:
                gaps += 1
            prev_frame = frame_id
            
        if gaps > 0:
            issues.append(f"发现{gaps}处帧间隔")
            
        # 检查异常值
        for ann in result.frame_annotations:
            if 'fatigue_level' in ann:
                if ann['fatigue_level'] > 3:
                    issues.append(f"帧{ann['frame_id']}: 疲劳等级超出范围")
                    
        return {
            'total_frames': total_frames,
            'issues': issues,
            'quality_score': max(0, 100 - len(issues) * 5)
        }

# 标注格式示例
ANNOTATION_FORMAT_EXAMPLE = {
    "video_info": {
        "video_id": "dmd_001",
        "duration_seconds": 60,
        "fps": 30,
        "resolution": [1920, 1080]
    },
    "annotations": [
        {
            "frame_id": 0,
            "timestamp": 0.0,
            "face": {
                "bbox": [100, 200, 300, 450],
                "landmarks": [[x, y] for _ in range(68)]
            },
            "eyes": {
                "left_eye": {
                    "bbox": [120, 280, 160, 310],
                    "ear": 0.35,
                    "state": "open"
                },
                "right_eye": {
                    "bbox": [200, 280, 240, 310],
                    "ear": 0.33,
                    "state": "open"
                }
            },
            "gaze": {
                "region": "front",
                "yaw": 5.2,
                "pitch": -2.1
            },
            "fatigue": {
                "perclos": 8.5,
                "blink_rate": 18,
                "fatigue_level": 0
            }
        }
    ],
    "metadata": {
        "annotator": "annotator_001",
        "annotated_at": "2026-06-01T10:00:00Z",
        "reviewed_by": "reviewer_001",
        "review_status": "approved"
    }
}

# 使用示例
if __name__ == "__main__":
    pipeline = AnnotationPipeline()
    
    # 创建任务
    task_id = pipeline.create_task(
        video_path="/data/dmd/video_001.mp4",
        annotation_type=AnnotationType.FATIGUE,
        start_frame=0,
        end_frame=900  # 30秒@30fps
    )
    
    print(f"创建任务: {task_id}")
    
    # 分配任务
    pipeline.assign_task(task_id, "annotator_001")
    
    # 模拟提交结果
    annotations = [
        {"frame_id": i, "perclos": 10, "fatigue_level": 0}
        for i in range(900)
    ]
    
    pipeline.submit_result(task_id, "annotator_001", annotations)
    
    # 质量检查
    qc = QualityController()
    result = pipeline.results[task_id]
    quality = qc.assess_annotation_quality(result)
    
    print(f"质量分数: {quality['quality_score']}")
    print(f"问题数: {len(quality['issues'])}")

DMS 数据标注规范:从原始数据到训练集
https://dapalm.com/2026/06/01/2026-06-01-DMS数据标注规范从原始数据到训练集/
作者
Mars
发布于
2026年6月1日
许可协议