MediaPipe 系列 13:推理 Calculator——集成 TFLite 模型

一、TFLite 推理原理

1.1 TFLite 模型架构

TensorFlow Lite (TFLite) 是 Google 推出的轻量级深度学习推理框架,专为移动端和嵌入式设备优化。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
┌─────────────────────────────────────────────────────────────────────────┐
│ TFLite 模型加载与执行流程 │
├─────────────────────────────────────────────────────────────────────────┤
│ │
│ 模型文件 (.tflite) │
│ ┌─────────────────────────────────────────────────────────┐ │
│ │ FlatBuffer: 模型结构、算子定义、张量信息 │ │
│ │ - Model: 全局元数据 │ │
│ │ - OperatorCodes: 算子列表 │ │
│ │ - Subgraphs: 计算图(输入→输出) │ │
│ │ - Buffers: 张量数据存储 │ │
│ └─────────────────────────────────────────────────────────┘ │
│ │ │
│ ▼ │
│ 解释器 (Interpreter) │
│ ┌─────────────────────────────────────────────────────────┐ │
│ │ 1. LoadModel() - 从文件加载模型 │ │
│ │ 2. AllocateTensors() - 分配张量内存 │ │
│ │ 3. Invoke() - 执行推理 │ │
│ │ 4. GetTensor() - 获取输入/输出张量 │ │
│ └─────────────────────────────────────────────────────────┘ │
│ │ │
│ ▼ │
│ 执行引擎 (Execution Engine) │
│ ┌─────────────────────────────────────────────────────────┐ │
│ │ - 串行执行: 顺序执行算子 │ │
│ │ - 并行执行: 多线程加速 │ │
│ │ - Delegate: GPU/NNAPI 硬件加速 │ │
│ └─────────────────────────────────────────────────────────┘ │
│ │
└─────────────────────────────────────────────────────────────────────────┘

1.2 FlatBuffer 格式

TFLite 模型使用 FlatBuffer 二进制格式,相比 Protocol Buffers 更紧凑、加载更快。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
// TFLite 模型结构
struct FlatBufferModel {
// 模型元数据
tflite::Model* model;

// 算子注册表
tflite::ops::builtin::BuiltinOpResolver resolver;

// 输入输出信息
std::vector<int> input_indices;
std::vector<int> output_indices;

// 张量信息
std::vector<TfLiteTensor*> tensors;
};

// 模型结构示例
struct Subgraph {
// 输入索引
std::vector<int> inputs;

// 输出索引
std::vector<int> outputs;

// 算子列表
std::vector<Operator*> operators;

// 张量列表
std::vector<TfLiteTensor*> tensors;
};

1.3 推理执行流程

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
// 标准推理流程
absl::Status TFLiteInferenceCalculator::Process(CalculatorContext* cc) {
// 1. 获取输入图像
const ImageFrame& image = cc->Inputs().Tag("IMAGE").Get<ImageFrame>();

// 2. 预处理图像
float* input_tensor = interpreter_->typed_input_tensor<float>(0);
MP_RETURN_IF_ERROR(Preprocess(image, input_tensor));

// 3. 执行推理
TF_LITE_ENSURE_OK(interpreter_.get(), interpreter_->Invoke());

// 4. 获取输出
float* output_tensor = interpreter_->typed_output_tensor<float>(0);
std::vector<Detection> detections = Postprocess(output_tensor);

// 5. 输出结果
cc->Outputs().Tag("DETECTIONS").AddPacket(
MakePacket<std::vector<Detection>>(detections).At(cc->InputTimestamp()));

return absl::OkStatus();
}

二、TFLite 集成详解

2.1 模型加载

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
absl::Status TFLiteInferenceCalculator::LoadModel(
const std::string& model_path, int num_threads) {

// 1. 从文件加载模型
model_ = tflite::FlatBufferModel::BuildFromFile(model_path.c_str());
RET_CHECK(model_ != nullptr) << "Failed to load model: " << model_path;

// 2. 构建解释器
tflite::ops::builtin::BuiltinOpResolver resolver;

// 注册自定义算子(如果有)
// resolver.AddCustom("MyCustomOp", MyCustomOpCreate);
// resolver.AddCustom("MyCustomOp", MyCustomOpPrepare);
// resolver.AddCustom("MyCustomOp", MyCustomOpInvoke);

tflite::InterpreterBuilder builder(*model_, resolver);
builder(&interpreter_);

RET_CHECK(interpreter_ != nullptr) << "Failed to create interpreter";

// 3. 设置线程数(多线程加速)
interpreter_->SetNumThreads(num_threads);

// 4. 分配张量内存
TF_LITE_ENSURE_OK(interpreter_.get(), interpreter_->AllocateTensors());

// 5. 获取输入输出信息
auto* input_tensor = interpreter_->input_tensor(0);
auto* output_tensor = interpreter_->output_tensor(0);

LOG(INFO) << "Model loaded successfully";
LOG(INFO) << " Input shape: [" << input_tensor->dims->data[0] << ", "
<< input_tensor->dims->data[1] << ", "
<< input_tensor->dims->data[2] << ", "
<< input_tensor->dims->data[3] << "]";
LOG(INFO) << " Output shape: [" << output_tensor->dims->data[0] << ", "
<< output_tensor->dims->data[1] << ", "
<< output_tensor->dims->data[2] << ", "
<< output_tensor->dims->data[3] << "]";
LOG(INFO) << " Input type: " << input_tensor->type
<< " (Float=" << kTfLiteFloat32 << ", Int8=" << kTfLiteUInt8 << ")";

return absl::OkStatus();
}

2.2 输入预处理

图像预处理是模型推理的关键步骤,直接影响检测精度和性能。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
absl::Status TFLiteInferenceCalculator::Preprocess(
const ImageFrame& input, float* input_tensor) {

// 转换为 OpenCV Mat
cv::Mat input_mat = formats::MatView(&input);

// 获取目标尺寸
int target_width = input_width_;
int target_height = input_height_;

// 1. 缩放图像
cv::Mat resized;
cv::resize(input_mat, resized, cv::Size(target_width, target_height),
0, 0, cv::INTER_LINEAR);

// 2. 颜色空间转换
if (input.Format() == ImageFormat::SRGBA) {
// RGBA → RGB
cv::cvtColor(resized, resized, cv::COLOR_RGBA2RGB);
} else if (input.Format() == ImageFormat::BGRA) {
// BGRA → RGB
cv::cvtColor(resized, resized, cv::COLOR_BGRA2RGB);
} else if (input.Format() == ImageFormat::GRAY8) {
// 灰度 → RGB(复制通道)
cv::Mat rgb;
cv::cvtColor(resized, rgb, cv::COLOR_GRAY2RGB);
resized = rgb;
}

// 3. 归一化
int size = target_width * target_height * input_channels_;
for (int i = 0; i < size; ++i) {
// [0, 255] → [0, 1]
input_tensor[i] = resized.data[i] / 255.0f;
}

return absl::OkStatus();
}

量化模型预处理:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
absl::Status TFLiteInferenceCalculator::Preprocess(
const ImageFrame& input, uint8_t* input_tensor) {

cv::Mat input_mat = formats::MatView(&input);
cv::Mat resized;
cv::resize(input_mat, resized, cv::Size(input_width_, input_height_));

// 颜色空间转换
if (input.Format() == ImageFormat::SRGBA) {
cv::cvtColor(resized, resized, cv::COLOR_RGBA2RGB);
}

// 直接复制(uint8)
std::memcpy(input_tensor, resized.data,
input_width_ * input_height_ * input_channels_);

return absl::OkStatus();
}

2.3 输出后处理

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
std::vector<Detection> TFLiteInferenceCalculator::Postprocess(
const float* output_tensor, int num_detections) {

std::vector<Detection> detections;

// 解析输出格式
// 假设格式: [num_detections, 6] (ymin, xmin, ymax, xmax, score, class_id)

for (int i = 0; i < num_detections; ++i) {
const float* detection = output_tensor + i * 6;

float score = detection[4];

// 过滤低分检测
if (score < score_threshold_) {
continue;
}

Detection det;
det.set_ymin(detection[0]);
det.set_xmin(detection[1]);
det.set_ymax(detection[2]);
det.set_xmax(detection[3]);
det.set_score(score);
det.set_class_id(static_cast<int>(detection[5]));

detections.push_back(det);
}

return detections;
}

复杂后处理(如 NMS):

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
std::vector<int> NonMaxSuppression(
const std::vector<std::vector<float>>& boxes,
const std::vector<float>& scores,
float nms_threshold) {

std::vector<int> indices;
std::vector<bool> suppressed(boxes.size(), false);

// 按分数排序的索引
std::vector<int> order(scores.size());
std::iota(order.begin(), order.end(), 0);
std::sort(order.begin(), order.end(),
[&scores](int a, int b) { return scores[a] > scores[b]; });

for (int i : order) {
if (suppressed[i]) continue;

indices.push_back(i);

for (int j : order) {
if (suppressed[j]) continue;

// 计算 IoU
float iou = CalculateIoU(boxes[i], boxes[j]);

if (iou > nms_threshold) {
suppressed[j] = true;
}
}
}

return indices;
}

三、GPU/CPU/NNAPI Delegate 选择

3.1 性能对比

硬件加速方式 适用场景 性能提升 优点 缺点
CPU 通用设备、调试 基准 兼容性好、无依赖 性能受限
GPU Android/iOS、桌面 2-10x 高并行度、通用 需要驱动支持
NNAPI Android (Snapdragon) 2-5x 原生支持、低功耗 仅 Android
XNNPACK 多平台、ARM 2-8x 高效、轻量 需要编译
CoreML iOS/macOS 2-6x 原生支持、优化 仅 Apple

3.2 GPU Delegate

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
#include "tensorflow/lite/delegates/gpu/gpu_delegate.h"

absl::Status TFLiteInferenceCalculator::Open(CalculatorContext* cc) override {
// ... 加载模型 ...

// 1. 创建 GPU Delegate
TfLiteGpuDelegateOptionsV2 options = TfLiteGpuDelegateOptionsV2Default();

// GPU 模式选择
options.is_precision_loss_allowed = true; // 允许 FP16(精度损失)
options.inference_preference =
TFLITE_GPU_INFERENCE_PREFERENCE_SUSTAINED_SPEED; // 持续性能优先

// 2. 创建 Delegate
auto* delegate = TfLiteGpuDelegateV2Create(&options);
RET_CHECK(delegate != nullptr) << "Failed to create GPU delegate";

// 3. 应用 Delegate
TF_LITE_ENSURE_OK(interpreter_.get(),
interpreter_->ModifyGraphWithDelegate(delegate));

// 4. 释放 Delegate(TFLite 会管理)
// TfLiteGpuDelegateV2Delete(delegate);

LOG(INFO) << "GPU delegate applied successfully";
LOG(INFO) << " Precision: " << (options.is_precision_loss_allowed ? "FP16" : "FP32");

return absl::OkStatus();
}

GPU Delegate 配置选项:

1
2
3
4
5
6
7
8
9
10
11
// GPU Delegate 高级配置
TfLiteGpuDelegateOptionsV2 advanced_options = {
.cache_dir = "/data/local/tmp/tflite_gpu_cache", // 缓存目录
.model_buffer = nullptr, // 内存模型
.model_buffer_size = 0,
.is_precision_loss_allowed = true, // 允许 FP16
.inference_preferred_precision = kTfLiteFloat32, // 预期精度
.inference_preference = TFLITE_GPU_INFERENCE_PREFERENCE_SUSTAINED_SPEED,
.enable_delegate_optimization = true, // 启用优化
.allow_fp16_precision_for_2d_tensors = true, // 允许 2D 张量使用 FP16
};

3.3 NNAPI Delegate

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
#include "tensorflow/lite/delegates/nnapi/nnapi_delegate.h"

absl::Status TFLiteInferenceCalculator::Open(CalculatorContext* cc) override {
// ... 加载模型 ...

// 1. 创建 NNAPI Delegate
TfLiteNnApiDelegateOptions options = {
.enabled = true,
.num_threads = 4, // 线程数
.model_cache = nullptr, // 模型缓存
};

auto* delegate = TfLiteNnApiDelegateCreate(&options);
RET_CHECK(delegate != nullptr) << "Failed to create NNAPI delegate";

// 2. 应用 Delegate
TF_LITE_ENSURE_OK(interpreter_.get(),
interpreter_->ModifyGraphWithDelegate(delegate));

LOG(INFO) << "NNAPI delegate applied successfully";
LOG(INFO) << " Backend: " << TfLiteNnApiGetBackendName();

return absl::OkStatus();
}

NNAPI 硬件后端:

1
2
3
4
5
6
7
8
// 检测可用的 NNAPI 后端
const char* TfLiteNnApiGetBackendName() {
// 返回: "nnapi" (通用), "gpu" (GPU), "dsp" (DSP), "cpu" (CPU)
// 具体取决于设备
}

// 查询 NNAPI 版本
int TfLiteNnApiGetVersion();

3.4 XNNPACK Delegate

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
#include "tensorflow/lite/delegates/xnnpack/xnnpack_delegate.h"

absl::Status TFLiteInferenceCalculator::Open(CalculatorContext* cc) override {
// ... 加载模型 ...

// 1. 创建 XNNPACK Delegate
TfLiteXNNPackDelegateOptions options = {};
options.num_threads = 4;

auto* delegate = TfLiteXNNPackDelegateCreate(&options);
RET_CHECK(delegate != nullptr) << "Failed to create XNNPACK delegate";

// 2. 应用 Delegate
TF_LITE_ENSURE_OK(interpreter_.get(),
interpreter_->ModifyGraphWithDelegate(delegate));

LOG(INFO) << "XNNPACK delegate applied successfully";
LOG(INFO) << " Threads: " << options.num_threads;

return absl::OkStatus();
}

3.5 Delegate 组合使用

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
absl::Status TFLiteInferenceCalculator::Open(CalculatorContext* cc) override {
// ... 加载模型 ...

std::vector<TfLiteDelegate*> delegates;

// 1. 优先使用 GPU(Android/iOS)
#ifdef __ANDROID__
TfLiteGpuDelegateOptionsV2 gpu_options = TfLiteGpuDelegateOptionsV2Default();
gpu_options.is_precision_loss_allowed = true;
gpu_options.inference_preference = TFLITE_GPU_INFERENCE_PREFERENCE_SUSTAINED_SPEED;
auto* gpu_delegate = TfLiteGpuDelegateV2Create(&gpu_options);
if (gpu_delegate) delegates.push_back(gpu_delegate);
#endif

// 2. 次选 NNAPI(Android)
#ifdef __ANDROID__
TfLiteNnApiDelegateOptions nnapi_options = { .enabled = true, .num_threads = 4 };
auto* nnapi_delegate = TfLiteNnApiDelegateCreate(&nnapi_options);
if (nnapi_delegate) delegates.push_back(nnapi_delegate);
#endif

// 3. 回退到 XNNPACK(通用)
TfLiteXNNPackDelegateOptions xnnpack_options = { .num_threads = 4 };
auto* xnnpack_delegate = TfLiteXNNPackDelegateCreate(&xnnpack_options);
if (xnnpack_delegate) delegates.push_back(xnnpack_delegate);

// 4. 应用所有 Delegate
if (!delegates.empty()) {
interpreter_->ModifyGraphWithDelegates(delegates);
LOG(INFO) << "Applied " << delegates.size() << " delegate(s)";
}

// 清理
for (auto* delegate : delegates) {
TfLiteDelegateDelete(delegate);
}

return absl::OkStatus();
}

四、完整 Calculator 实现

4.1 头文件

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
// tflite_inference_calculator.h
#ifndef TFLITE_INFERENCE_CALCULATOR_H_
#define TFLITE_INFERENCE_CALCULATOR_H_

#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/formats/image_frame.h"
#include "mediapipe/framework/formats/detection.pb.h"
#include "mediapipe/framework/formats/tensor.h"
#include "tensorflow/lite/interpreter.h"
#include "tensorflow/lite/kernels/register.h"
#include "tensorflow/lite/model.h"
#include "tensorflow/lite/delegates/gpu/gpu_delegate.h"
#include "tensorflow/lite/delegates/nnapi/nnapi_delegate.h"
#include "tensorflow/lite/delegates/xnnpack/xnnpack_delegate.h"

namespace mediapipe {

// 推理选项
struct TFLiteInferenceCalculatorOptions {
std::string model_path = "";
int num_threads = 4;
float score_threshold = 0.5f;
float iou_threshold = 0.45f;
bool use_gpu = false;
bool use_nnapi = false;
bool use_xnnpack = false;
bool enable_delegate_optimization = true;
};

class TFLiteInferenceCalculator : public CalculatorBase {
public:
static absl::Status GetContract(CalculatorContract* cc);
static absl::Status RegisterTypes(CalculatorContext* cc);

absl::Status Open(CalculatorContext* cc) override;
absl::Status Process(CalculatorContext* cc) override;

private:
// 模型相关
std::unique_ptr<tflite::FlatBufferModel> model_;
std::unique_ptr<tflite::Interpreter> interpreter_;

// 输入输出信息
int input_width_ = 320;
int input_height_ = 320;
int input_channels_ = 3;
int output_tensor_size_ = 0;

// 量化相关
bool use_quantized_ = false;
float input_scale_ = 1.0f;
int input_zero_point_ = 0;

// 后处理配置
float score_threshold_ = 0.5f;
float iou_threshold_ = 0.45f;

// 加载模型
absl::Status LoadModel(const std::string& model_path, int num_threads);

// 预处理
absl::Status Preprocess(const ImageFrame& input, float* input_tensor);
absl::Status Preprocess(const ImageFrame& input, uint8_t* input_tensor);

// 后处理
std::vector<Detection> Postprocess(const float* output_tensor);
std::vector<int> NonMaxSuppression(
const std::vector<std::vector<float>>& boxes,
const std::vector<float>& scores);
float CalculateIoU(const std::vector<float>& box1,
const std::vector<float>& box2);
};

} // namespace mediapipe

#endif

4.2 实现文件

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
// tflite_inference_calculator.cc
#include "tflite_inference_calculator.h"
#include "mediapipe/framework/formats/image_frame_opencv.h"
#include "mediapipe/framework/port/opencv_imgproc.h"
#include "mediapipe/framework/port/opencv_highgui.h"

namespace mediapipe {

absl::Status TFLiteInferenceCalculator::GetContract(CalculatorContract* cc) {
// 输入流
cc->Inputs().Tag("IMAGE").Set<ImageFrame>();

// 输出流
cc->Outputs().Tag("DETECTIONS").Set<std::vector<Detection>>();

// 输入侧包
cc->InputSidePackets().Tag("MODEL_PATH").Set<std::string>();

// 选项
cc->Options<TFLiteInferenceCalculatorOptions>();

return absl::OkStatus();
}

absl::Status TFLiteInferenceCalculator::RegisterTypes(CalculatorContext* cc) {
// 注册自定义类型(如果有)
return absl::OkStatus();
}

absl::Status TFLiteInferenceCalculator::Open(CalculatorContext* cc) {
const auto& options = cc->Options<TFLiteInferenceCalculatorOptions>();

// 获取模型路径
std::string model_path = cc->InputSidePackets().Tag("MODEL_PATH").Get<std::string>();

// 加载模型
MP_RETURN_IF_ERROR(LoadModel(model_path, options.num_threads));

// 获取输入输出信息
auto* input_tensor = interpreter_->input_tensor(0);
input_height_ = input_tensor->dims->data[1];
input_width_ = input_tensor->dims->data[2];
input_channels_ = input_tensor->dims->data[3];

// 检查量化
use_quantized_ = (input_tensor->type == kTfLiteUInt8);
if (use_quantized_) {
input_scale_ = input_tensor->params.scale;
input_zero_point_ = input_tensor->params.zero_point;
LOG(INFO) << "Quantized model detected: scale=" << input_scale_
<< ", zero_point=" << input_zero_point_;
}

// 设置后处理阈值
score_threshold_ = options.score_threshold();
iou_threshold_ = options.iou_threshold();

LOG(INFO) << "Model loaded: " << input_width_ << "x" << input_height_
<< " channels=" << input_channels_
<< " quantized=" << use_quantized_
<< " score_threshold=" << score_threshold_;

return absl::OkStatus();
}

absl::Status TFLiteInferenceCalculator::Process(CalculatorContext* cc) {
if (cc->Inputs().Tag("IMAGE").IsEmpty()) {
return absl::OkStatus();
}

const ImageFrame& image = cc->Inputs().Tag("IMAGE").Get<ImageFrame>();

// 1. 预处理
if (use_quantized_) {
uint8_t* input_tensor = interpreter_->typed_input_tensor<uint8_t>(0);
MP_RETURN_IF_ERROR(Preprocess(image, input_tensor));
} else {
float* input_tensor = interpreter_->typed_input_tensor<float>(0);
MP_RETURN_IF_ERROR(Preprocess(image, input_tensor));
}

// 2. 推理
TF_LITE_ENSURE_OK(interpreter_.get(), interpreter_->Invoke());

// 3. 后处理
float* output_tensor = interpreter_->typed_output_tensor<float>(0);
int num_detections = interpreter_->output_tensor(0)->dims->data[1];
std::vector<Detection> detections = Postprocess(output_tensor, num_detections);

// 4. 输出
cc->Outputs().Tag("DETECTIONS").AddPacket(
MakePacket<std::vector<Detection>>(detections).At(cc->InputTimestamp()));

return absl::OkStatus();
}

absl::Status TFLiteInferenceCalculator::LoadModel(
const std::string& model_path, int num_threads) {

// 1. 从文件加载模型
model_ = tflite::FlatBufferModel::BuildFromFile(model_path.c_str());
RET_CHECK(model_ != nullptr) << "Failed to load model: " << model_path;

// 2. 构建解释器
tflite::ops::builtin::BuiltinOpResolver resolver;
tflite::InterpreterBuilder builder(*model_, resolver);
builder(&interpreter_);

RET_CHECK(interpreter_ != nullptr) << "Failed to create interpreter";

// 3. 设置线程数
interpreter_->SetNumThreads(num_threads);

// 4. 分配张量
TF_LITE_ENSURE_OK(interpreter_.get(), interpreter_->AllocateTensors());

// 5. 应用 Delegate
std::vector<TfLiteDelegate*> delegates;
if (use_gpu_) {
#ifdef __ANDROID__
TfLiteGpuDelegateOptionsV2 gpu_options = TfLiteGpuDelegateOptionsV2Default();
gpu_options.is_precision_loss_allowed = true;
gpu_options.inference_preference = TFLITE_GPU_INFERENCE_PREFERENCE_SUSTAINED_SPEED;
auto* gpu_delegate = TfLiteGpuDelegateV2Create(&gpu_options);
if (gpu_delegate) delegates.push_back(gpu_delegate);
#endif
}

if (use_nnapi_) {
#ifdef __ANDROID__
TfLiteNnApiDelegateOptions nnapi_options = { .enabled = true, .num_threads = 4 };
auto* nnapi_delegate = TfLiteNnApiDelegateCreate(&nnapi_options);
if (nnapi_delegate) delegates.push_back(nnapi_delegate);
#endif
}

if (use_xnnpack_) {
TfLiteXNNPackDelegateOptions xnnpack_options = { .num_threads = 4 };
auto* xnnpack_delegate = TfLiteXNNPackDelegateCreate(&xnnpack_options);
if (xnnpack_delegate) delegates.push_back(xnnpack_delegate);
}

if (!delegates.empty()) {
interpreter_->ModifyGraphWithDelegates(delegates);
LOG(INFO) << "Applied " << delegates.size() << " delegate(s)";
}

for (auto* delegate : delegates) {
TfLiteDelegateDelete(delegate);
}

return absl::OkStatus();
}

absl::Status TFLiteInferenceCalculator::Preprocess(
const ImageFrame& input, float* input_tensor) {

cv::Mat input_mat = formats::MatView(&input);

// 缩放
cv::Mat resized;
cv::resize(input_mat, resized, cv::Size(input_width_, input_height_),
0, 0, cv::INTER_LINEAR);

// 颜色空间转换
if (input.Format() == ImageFormat::SRGBA) {
cv::cvtColor(resized, resized, cv::COLOR_RGBA2RGB);
} else if (input.Format() == ImageFormat::BGRA) {
cv::cvtColor(resized, resized, cv::COLOR_BGRA2RGB);
} else if (input.Format() == ImageFormat::GRAY8) {
cv::Mat rgb;
cv::cvtColor(resized, rgb, cv::COLOR_GRAY2RGB);
resized = rgb;
}

// 归一化
int size = input_width_ * input_height_ * input_channels_;
for (int i = 0; i < size; ++i) {
input_tensor[i] = resized.data[i] / 255.0f;
}

return absl::OkStatus();
}

absl::Status TFLiteInferenceCalculator::Preprocess(
const ImageFrame& input, uint8_t* input_tensor) {

cv::Mat input_mat = formats::MatView(&input);
cv::Mat resized;
cv::resize(input_mat, resized, cv::Size(input_width_, input_height_));

// 颜色空间转换
if (input.Format() == ImageFormat::SRGBA) {
cv::cvtColor(resized, resized, cv::COLOR_RGBA2RGB);
} else if (input.Format() == ImageFormat::BGRA) {
cv::cvtColor(resized, resized, cv::COLOR_BGRA2RGB);
}

// 直接复制
std::memcpy(input_tensor, resized.data,
input_width_ * input_height_ * input_channels_);

return absl::OkStatus();
}

std::vector<Detection> TFLiteInferenceCalculator::Postprocess(
const float* output_tensor, int num_detections) {

std::vector<Detection> detections;

// 解析输出格式
for (int i = 0; i < num_detections; ++i) {
const float* detection = output_tensor + i * 6;

float score = detection[4];

// 过滤低分检测
if (score < score_threshold_) {
continue;
}

Detection det;
det.set_ymin(detection[0]);
det.set_xmin(detection[1]);
det.set_ymax(detection[2]);
det.set_xmax(detection[3]);
det.set_score(score);
det.set_class_id(static_cast<int>(detection[5]));

detections.push_back(det);
}

// NMS
std::vector<std::vector<float>> boxes;
std::vector<float> scores;

for (const auto& det : detections) {
boxes.push_back({det.ymin(), det.xmin(), det.ymax(), det.xmax()});
scores.push_back(det.score());
}

auto keep_indices = NonMaxSuppression(boxes, scores, iou_threshold_);

// 重新构建输出
std::vector<Detection> final_detections;
for (int idx : keep_indices) {
final_detections.push_back(detections[idx]);
}

return final_detections;
}

std::vector<int> TFLiteInferenceCalculator::NonMaxSuppression(
const std::vector<std::vector<float>>& boxes,
const std::vector<float>& scores,
float nms_threshold) {

std::vector<int> indices;
std::vector<bool> suppressed(boxes.size(), false);

// 按分数排序的索引
std::vector<int> order(scores.size());
std::iota(order.begin(), order.end(), 0);
std::sort(order.begin(), order.end(),
[&scores](int a, int b) { return scores[a] > scores[b]; });

for (int i : order) {
if (suppressed[i]) continue;

indices.push_back(i);

for (int j : order) {
if (suppressed[j]) continue;

// 计算 IoU
float iou = CalculateIoU(boxes[i], boxes[j]);

if (iou > nms_threshold) {
suppressed[j] = true;
}
}
}

return indices;
}

float TFLiteInferenceCalculator::CalculateIoU(
const std::vector<float>& box1,
const std::vector<float>& box2) {

float x1 = std::max(box1[1], box2[1]);
float y1 = std::max(box1[0], box2[0]);
float x2 = std::min(box1[3], box2[3]);
float y2 = std::min(box1[2], box2[2]);

float inter_width = std::max(0.0f, x2 - x1);
float inter_height = std::max(0.0f, y2 - y1);
float inter_area = inter_width * inter_height;

float box1_area = (box1[3] - box1[1]) * (box1[2] - box1[0]);
float box2_area = (box2[3] - box2[1]) * (box2[2] - box2[0]);

float union_area = box1_area + box2_area - inter_area;

return (union_area > 0) ? (inter_area / union_area) : 0.0f;
}

REGISTER_CALCULATOR(TFLiteInferenceCalculator);

} // namespace mediapipe

4.3 Options 定义

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
// tflite_inference_options.proto
syntax = "proto3";

package mediapipe;

message TFLiteInferenceCalculatorOptions {
// 模型路径
optional string model_path = 1;

// 线程数
optional int32 num_threads = 2 [default = 4];

// 分数阈值
optional float score_threshold = 3 [default = 0.5];

// IoU 阈值
optional float iou_threshold = 4 [default = 0.45];

// GPU 加速
optional bool use_gpu = 5 [default = false];

// NNAPI 加速
optional bool use_nnapi = 6 [default = false];

// XNNPACK 加速
optional bool use_xnnpack = 7 [default = false];

// 启用 Delegate 优化
optional bool enable_delegate_optimization = 8 [default = true];
}

五、Graph 配置

5.1 基础配置

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
# face_detection_graph.pbtxt

input_stream: "IMAGE:image"
output_stream: "DETECTIONS:detections"
input_side_packet: "MODEL_PATH:model_path"

# 流量限制
node {
calculator: "FlowLimiterCalculator"
input_stream: "image"
input_stream: "detections"
input_stream_info: { tag_index: "detections" back_edge: true }
output_stream: "throttled_image"
}

# 推理 Calculator
node {
calculator: "TFLiteInferenceCalculator"
input_stream: "IMAGE:throttled_image"
input_side_packet: "MODEL_PATH:model_path"
output_stream: "DETECTIONS:detections"
options {
[mediapipe.TFLiteInferenceCalculatorOptions.ext] {
num_threads: 4
score_threshold: 0.5
iou_threshold: 0.45
use_gpu: true
use_nnapi: true
}
}
}

5.2 完整人脸检测 Graph

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
# mediapipe/graphs/face_detection/face_detection_short_range.pbtxt

input_stream: "IMAGE:image"
output_stream: "DETECTIONS:detections"

# 1. 图像格式转换
node {
calculator: "ImageTransformationCalculator"
input_stream: "IMAGE:image"
output_stream: "IMAGE:converted_image"
options {
[mediapipe.ImageTransformationCalculatorOptions.ext] {
output_format: SRGB
}
}
}

# 2. 缩放到模型输入尺寸
node {
calculator: "ImageTransformationCalculator"
input_stream: "IMAGE:converted_image"
output_stream: "IMAGE:resized_image"
options {
[mediapipe.ImageTransformationCalculatorOptions.ext] {
output_width: 320
output_height: 320
scale_mode: FIT
}
}
}

# 3. 转换为 Tensor
node {
calculator: "ImageToTensorCalculator"
input_stream: "IMAGE:resized_image"
output_stream: "TENSORS:tensors"
options {
[mediapipe.ImageToTensorCalculatorOptions.ext] {
tensor_width: 320
tensor_height: 320
tensor_channels: 3
tensor_float_range {
min: -1.0
max: 1.0
}
}
}
}

# 4. 模型推理
node {
calculator: "TFLiteInferenceCalculator"
input_stream: "TENSORS:tensors"
input_side_packet: "MODEL_PATH:model_path"
output_stream: "DETECTIONS:detections"
options {
[mediapipe.TFLiteInferenceCalculatorOptions.ext] {
model_path: "/models/blazeface.tflite"
num_threads: 4
score_threshold: 0.5
iou_threshold: 0.45
use_gpu: true
use_nnapi: true
use_xnnpack: true
}
}
}

# 5. 后处理(如果需要)
node {
calculator: "BlazeFacePostprocessorCalculator"
input_stream: "DETECTIONS:detections"
input_stream: "ORIGINAL_IMAGE_SIZE:image_size"
output_stream: "DETECTIONS:final_detections"
options {
[mediapipe.BlazeFaceOptions.ext] {
score_threshold: 0.5
min_suppression_threshold: 0.3
num_keypoints: 6
}
}
}

5.3 Bazel 构建

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
# mediapipe/calculators/tflite/BUILD

cc_library(
name = "tflite_inference_calculator",
srcs = [
"tflite_inference_calculator.cc",
],
hdrs = [
"tflite_inference_calculator.h",
],
deps = [
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework/formats:tensor",
"//mediapipe/framework/formats:image_frame",
"//mediapipe/framework/formats:detection",
"@org_tensorflow//tensorflow/lite:framework",
"@org_tensorflow//tensorflow/lite/kernels:builtin_ops",
"@org_tensorflow//tensorflow/lite/delegates/gpu:gpu_delegate",
"@org_tensorflow//tensorflow/lite/delegates/nnapi:nnapi_delegate",
"@org_tensorflow//tensorflow/lite/delegates/xnnpack:xnnpack_delegate",
"@org_opencv//:opencv_core",
"@org_opencv//:opencv_imgproc",
"@org_opencv//:opencv_highgui",
],
alwayslink = 1,
)

cc_library(
name = "blazeface_postprocessor_calculator",
srcs = [
"blazeface_postprocessor.cc",
],
hdrs = [
"blazeface_postprocessor.h",
],
deps = [
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework/formats:detection",
"@org_tensorflow//tensorflow/lite:framework",
],
alwayslink = 1,
)

六、性能优化技巧

6.1 模型优化

量化模型:

1
2
3
4
5
6
7
8
9
10
# 使用 TFLite Converter 量化
tflite_convert \
--saved_model_dir=/path/to/saved_model \
--output_file=blazeface_quant.tflite \
--post_training_quantize \
--inference_type=QUANTIZED_UINT8 \
--input_arrays=Input \
--output_arrays=Identity \
--default_ranges_min=0.0 \
--default_ranges_max=255.0

模型压缩:

1
2
3
4
# 使用 TensorFlow Model Optimization Toolkit 压缩
python3 model_optimization_toolkit/quantization/quantize_model.py \
--saved_model_dir=/path/to/saved_model \
--output_dir=/path/to/quantized_model

6.2 运行时优化

多线程加速:

1
2
3
4
5
// 设置合理的线程数
interpreter_->SetNumThreads(num_threads);

// 推荐线程数:CPU 核心数 + 1
int num_threads = std::thread::hardware_concurrency() + 1;

Delegate 选择策略:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
absl::Status TFLiteInferenceCalculator::LoadModel(...) {
// 1. 优先使用 GPU(Android/iOS)
if (use_gpu_) {
auto* gpu_delegate = CreateGpuDelegate();
if (gpu_delegate) {
interpreter_->ModifyGraphWithDelegate(gpu_delegate);
return absl::OkStatus();
}
}

// 2. 次选 NNAPI(Android)
if (use_nnapi_) {
auto* nnapi_delegate = CreateNnApiDelegate();
if (nnapi_delegate) {
interpreter_->ModifyGraphWithDelegate(nnapi_delegate);
return absl::OkStatus();
}
}

// 3. 回退到 XNNPACK(通用)
if (use_xnnpack_) {
auto* xnnpack_delegate = CreateXnnpackDelegate();
if (xnnpack_delegate) {
interpreter_->ModifyGraphWithDelegate(xnnpack_delegate);
return absl::OkStatus();
}
}

// 4. 最后使用 CPU
return absl::OkStatus();
}

6.3 内存优化

张量复用:

1
2
3
4
5
// 避免重复分配内存
float* input_tensor = interpreter_->typed_input_tensor<float>(0);

// 预分配输出缓冲区
std::vector<float> output_buffer(output_tensor_size_);

内存池:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
// 使用内存池减少分配开销
class TensorPool {
public:
std::unique_ptr<float[]> GetBuffer(size_t size) {
if (free_buffers_.size() > 0) {
auto buffer = std::move(free_buffers_.back());
free_buffers_.pop_back();
return buffer;
}
return std::make_unique<float[]>(size);
}

void ReturnBuffer(std::unique_ptr<float[]> buffer) {
free_buffers_.push_back(std::move(buffer));
}

private:
std::vector<std::unique_ptr<float[]>> free_buffers_;
};

七、IMS 实战:疲劳检测模型

7.1 疲劳检测应用场景

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
┌─────────────────────────────────────────────────────────────────────────┐
│ DMS 疲劳检测应用流程 │
├─────────────────────────────────────────────────────────────────────────┤
│ │
│ 输入: IR Camera (640×480) │
│ │ │
│ ▼ │
│ ┌─────────────┐ │
│ │ Face │ 检测人脸位置 │
│ │ Detection │ - 驾驶员是否存在 │
│ └─────────────┘ - 多人场景识别 │
│ │ │
│ ▼ │
│ ┌─────────────┐ │
│ │ Face Mesh │ 提取 468 个面部关键点 │
│ │ (468点) │ - 眼睛闭合度、眨眼频率 │
│ └─────────────┘ - 眼睛睁开角度、眼睛间距 │
│ │ │
│ ▼ │
│ ┌─────────────┐ │
│ │ Eye State │ 疲劳检测 │
│ │ Analysis │ - 眼睛闭合时间 │
│ └─────────────┘ - 眨眼频率 │
│ │ │
│ ▼ │
│ ┌─────────────┐ │
│ │ Fatigue │ 疲劳状态判定 │
│ │ Detector │ - 疲劳分数 │
│ └─────────────┘ - 警告级别 │
│ │
│ 应用场景:
│ ├── 驾驶员疲劳检测 │
│ ├── 分心检测(闭眼) │
│ ├── 驾驶行为监控 │
│ └── 安全提醒 │
│ │
└─────────────────────────────────────────────────────────────────────────┘

7.2 疲劳检测 Graph

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
# mediapipe/graphs/ims/dms_fatigue_detection_graph.pbtxt

input_stream: "IR_IMAGE:ir_image"
output_stream: "FATIGUE_STATUS:fatigue_status"
output_stream: "WARNING_LEVEL:warning_level"

# 1. 人脸检测
node {
calculator: "FaceDetectionShortRangeGpu"
input_stream: "IMAGE:ir_image"
output_stream: "DETECTIONS:raw_detections"
}

# 2. 选择主驾驶员人脸
node {
calculator: "PrimaryFaceSelectorCalculator"
input_stream: "DETECTIONS:raw_detections"
output_stream: "DETECTION:primary_detection"
options {
[mediapipe.PrimaryFaceSelectorOptions.ext] {
selection_strategy: LARGEST
}
}
}

# 3. Face Mesh(468点)
node {
calculator: "FaceGeometryCalculator"
input_stream: "IMAGE:ir_image"
input_stream: "DETECTION:primary_detection"
output_stream: "FACE_GEOMETRY:face_geometry"
}

# 4. 眼睛状态分析
node {
calculator: "EyeStateCalculator"
input_stream: "FACE_GEOMETRY:face_geometry"
output_stream: "EYE_STATE:eye_state"
}

# 5. 疲劳检测
node {
calculator: "FatigueDetectorCalculator"
input_stream: "EYE_STATE:eye_state"
input_stream: "FRAME_TIME:frame_time"
output_stream: "FATIGUE_STATUS:fatigue_status"
output_stream: "WARNING_LEVEL:warning_level"
options {
[mediapipe.FatigueDetectorOptions.ext] {
eye_closure_threshold: 0.2
blink_frequency_threshold: 0.3
fatigue_score_threshold: 0.7
warning_interval_ms: 5000
}
}
}

# 6. 结果输出
node {
calculator: "NotificationCalculator"
input_stream: "WARNING_LEVEL:warning_level"
output_stream: "NOTIFICATIONS:notifications"
}

7.3 疲劳检测 Calculator 实现

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
// fatigue_detector_calculator.h
#ifndef FATIGUE_DETECTOR_CALCULATOR_H_
#define FATIGUE_DETECTOR_CALCULATOR_H_

#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/formats/geometry_data.pb.h"
#include "mediapipe/framework/formats/detection.pb.h"

namespace mediapipe {

struct EyeState {
float left_eye_openness = 1.0f;
float right_eye_openness = 1.0f;
float left_blink_frequency = 0.0f;
float right_blink_frequency = 0.0f;
float eye_closure_duration = 0.0f;
};

struct FatigueStatus {
float fatigue_score = 0.0f;
FatigueLevel level = FatigueLevel::NORMAL;
int warning_count = 0;
int64 last_warning_time = 0;
};

enum FatigueLevel {
NORMAL = 0,
MILD = 1,
MODERATE = 2,
SEVERE = 3
};

class FatigueDetectorCalculator : public CalculatorBase {
public:
static absl::Status GetContract(CalculatorContract* cc);
static absl::Status RegisterTypes(CalculatorContext* cc);

absl::Status Open(CalculatorContext* cc) override;
absl::Status Process(CalculatorContext* cc) override;

private:
// 计算疲劳分数
float CalculateFatigueScore(
const EyeState& eye_state,
int64 frame_time);

// 判断疲劳等级
FatigueLevel DetermineFatigueLevel(float fatigue_score);

// 发送警告
void SendWarning(CalculatorContext* cc, FatigueLevel level);

// 配置
float eye_closure_threshold_ = 0.2f;
float blink_frequency_threshold_ = 0.3f;
float fatigue_score_threshold_ = 0.7f;
int64 warning_interval_ms_ = 5000;

// 状态
EyeState last_eye_state_;
FatigueStatus current_status_;
int64 last_warning_time_ = 0;
};

} // namespace mediapipe

#endif
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
// fatigue_detector_calculator.cc
#include "fatigue_detector_calculator.h"
#include "mediapipe/framework/port/opencv_highgui.h"

namespace mediapipe {

absl::Status FatigueDetectorCalculator::GetContract(CalculatorContract* cc) {
cc->Inputs().Tag("EYE_STATE").Set<EyeState>();
cc->Inputs().Tag("FRAME_TIME").Set<int64>();

cc->Outputs().Tag("FATIGUE_STATUS").Set<FatigueStatus>();
cc->Outputs().Tag("WARNING_LEVEL").Set<FatigueLevel>();

cc->Options<FatigueDetectorCalculatorOptions>();

return absl::OkStatus();
}

absl::Status FatigueDetectorCalculator::Open(CalculatorContext* cc) {
const auto& options = cc->Options<FatigueDetectorCalculatorOptions>();

eye_closure_threshold_ = options.eye_closure_threshold();
blink_frequency_threshold_ = options.blink_frequency_threshold();
fatigue_score_threshold_ = options.fatigue_score_threshold();
warning_interval_ms_ = options.warning_interval_ms();

return absl::OkStatus();
}

absl::Status FatigueDetectorCalculator::Process(CalculatorContext* cc) {
if (cc->Inputs().Tag("EYE_STATE").IsEmpty()) {
return absl::OkStatus();
}

const EyeState& eye_state = cc->Inputs().Tag("EYE_STATE").Get<EyeState>();
int64 frame_time = cc->Inputs().Tag("FRAME_TIME").Get<int64>();

// 1. 计算疲劳分数
float fatigue_score = CalculateFatigueScore(eye_state, frame_time);

// 2. 更新状态
current_status_.fatigue_score = fatigue_score;
current_status_.level = DetermineFatigueLevel(fatigue_score);

// 3. 发送警告
if (current_status_.level >= FatigueLevel::MILD) {
int64 now = frame_time;
if (now - last_warning_time_ >= warning_interval_ms_) {
SendWarning(cc, current_status_.level);
last_warning_time_ = now;
}
}

// 4. 输出结果
cc->Outputs().Tag("FATIGUE_STATUS").AddPacket(
MakePacket<FatigueStatus>(current_status_).At(cc->InputTimestamp()));
cc->Outputs().Tag("WARNING_LEVEL").AddPacket(
MakePacket<FatigueLevel>(current_status_.level).At(cc->InputTimestamp()));

return absl::OkStatus();
}

float FatigueDetectorCalculator::CalculateFatigueScore(
const EyeState& eye_state,
int64 frame_time) {

// 眼睛闭合度分数
float closure_score = std::max(eye_state.left_eye_openness,
eye_state.right_eye_openness);

// 眨眼频率分数
float blink_score = std::max(eye_state.left_blink_frequency,
eye_state.right_blink_frequency);

// 疲劳分数 = 0.6 * 眼睛闭合度 + 0.4 * 眨眼频率
float fatigue_score = 0.6f * closure_score + 0.4f * blink_score;

return fatigue_score;
}

FatigueLevel FatigueDetectorCalculator::DetermineFatigueLevel(float fatigue_score) {
if (fatigue_score < 0.3f) {
return FatigueLevel::NORMAL;
} else if (fatigue_score < 0.5f) {
return FatigueLevel::MILD;
} else if (fatigue_score < 0.7f) {
return FatigueLevel::MODERATE;
} else {
return FatigueLevel::SEVERE;
}
}

void FatigueDetectorCalculator::SendWarning(CalculatorContext* cc, FatigueLevel level) {
std::string message;
int level_int = static_cast<int>(level);

switch (level) {
case FatigueLevel::MILD:
message = "Mild fatigue detected";
break;
case FatigueLevel::MODERATE:
message = "Moderate fatigue detected";
break;
case FatigueLevel::SEVERE:
message = "Severe fatigue detected - Please take a break!";
break;
default:
message = "Fatigue warning";
}

cc->Outputs().Tag("NOTIFICATIONS").AddPacket(
MakePacket<std::string>(message).At(cc->InputTimestamp()));
}

REGISTER_CALCULATOR(FatigueDetectorCalculator);

} // namespace mediapipe

7.4 疲劳检测模型

模型选择:

模型 输入尺寸 参数量 FPS (手机) 精度 说明
BlazeFace 128×128 0.5M 30-60 95% 人脸检测
Face Mesh 192×192 10M 15-25 90% 面部关键点
Eye State - - - - 眼睛状态分析

训练数据:

  • WIDER FACE(人脸检测)
  • AFLW2000(面部关键点)
  • 自定义疲劳数据集(眼睛状态)

训练流程:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
# 训练疲劳检测模型
import tensorflow as tf

# 1. 加载模型
model = tf.keras.models.load_model('face_mesh_model.h5')

# 2. 准备数据
train_dataset = tf.data.Dataset.from_tensor_slices((images, labels))

# 3. 训练
model.fit(train_dataset, epochs=100, batch_size=32)

# 4. 转换为 TFLite
converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
tflite_model = converter.convert()

# 5. 保存
with open('fatigue_detection.tflite', 'wb') as f:
f.write(tflite_model)

7.5 部署到 IMS

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
# 1. 构建镜像
docker build -t ims-dms-fatigue-detection .

# 2. 运行容器
docker run -d \
--name ims-dms \
--privileged \
--net=host \
-v /dev/video0:/dev/video0 \
-v /path/to/models:/models \
ims-dms-fatigue-detection

# 3. 查看日志
docker logs -f ims-dms

# 4. 性能测试
adb shell /data/local/tmp/ims-dms --test-fps

八、调试与测试

8.1 可视化调试

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
# 使用 MediaPipe Python API 可视化
import mediapipe as mp
import cv2
import time

mp_face_detection = mp.solutions.face_detection
mp_face_mesh = mp.solutions.face_mesh
mp_drawing = mp.solutions.drawing_utils

cap = cv2.VideoCapture(0)

with mp_face_detection.FaceDetection(
model_selection=0, min_detection_confidence=0.5
) as face_detection:

with mp_face_mesh.FaceMesh(
max_num_faces=1,
refine_landmarks=True,
min_detection_confidence=0.5,
min_tracking_confidence=0.5
) as face_mesh:

fps = 0
frame_count = 0
start_time = time.time()

while cap.isOpened():
ret, frame = cap.read()
if not ret:
break

# 记录时间
frame_time = time.time() - start_time

# 转换颜色空间
image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

# 人脸检测
face_results = face_detection.process(image)

# Face Mesh
mesh_results = face_mesh.process(image)

# 绘制结果
if mesh_results.multi_face_landmarks:
for face_landmarks in mesh_results.multi_face_landmarks:
mp_drawing.draw_landmarks(
image,
face_landmarks,
mp_face_mesh.FACEMESH_TESSELATION,
mp_drawing.DrawingSpec(color=(0, 255, 0), thickness=1),
mp_drawing.DrawingSpec(color=(0, 0, 255), thickness=1)
)

# 提取眼睛关键点
left_eye = [face_landmarks.landmark[i] for i in [33, 133, 160, 158, 133]]
right_eye = [face_landmarks.landmark[i] for i in [362, 263, 386, 384, 263]]

# 计算眼睛张开度
left_openness = calculate_eye_openness(left_eye)
right_openness = calculate_eye_openness(right_eye)

# 疲劳检测
fatigue_score = (left_openness + right_openness) / 2

# 显示结果
cv2.putText(image, f'Fatigue: {fatigue_score:.2f}',
(10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

if fatigue_score < 0.3:
cv2.putText(image, 'WARNING: Fatigue!',
(10, 70), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)

# FPS
frame_count += 1
if frame_count % 30 == 0:
fps = frame_count / (time.time() - start_time)
frame_count = 0
start_time = time.time()

cv2.putText(image, f'FPS: {fps:.1f}', (10, 90),
cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)

cv2.imshow('Fatigue Detection', image)

if cv2.waitKey(1) & 0xFF == ord('q'):
break

cap.release()
cv2.destroyAllWindows()

8.2 性能测试

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
# 1. 测试 FPS
adb shell /data/local/tmp/mediapipe_cpu \
--calculator_graph_config_path=/sdcard/face_detection.pbtxt \
--input_stream_path=/sdcard/test.mp4 \
--output_stream_path=/sdcard/output.mp4

# 2. 性能分析
adb logcat | grep -E "(FPS|Latency|Memory|GPU|NNAPI)"

# 3. 帧率统计
adb shell dumpsys media.camera | grep -E "fps"

# 4. 内存使用
adb shell dumpsys meminfo com.example.mediapipe

# 5. GPU 使用率
adb shell dumpsys gpu | grep -E "GPU|Memory"

8.3 单元测试

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
// tflite_inference_calculator_test.cc
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "tflite_inference_calculator.h"

namespace mediapipe {

class TFLiteInferenceCalculatorTest : public ::testing::Test {
protected:
void SetUp() override {
calculator_ = std::make_unique<TFLiteInferenceCalculator>();
}

void TearDown() override {
calculator_.reset();
}

std::unique_ptr<TFLiteInferenceCalculator> calculator_;
};

TEST_F(TFLiteInferenceCalculatorTest, LoadModel) {
absl::Status status = calculator_->Open(nullptr);
EXPECT_TRUE(status.ok()) << status.message();
}

TEST_F(TFLiteInferenceCalculatorTest, Preprocess) {
// 测试预处理
ImageFrame image(8, 6, 3, ImageFormat::SRGBA);
// ... 填充图像数据

float* input_tensor = nullptr;
// ... 调用 Preprocess

// 验证结果
EXPECT_GT(input_tensor[0], 0.0f);
EXPECT_LT(input_tensor[0], 1.0f);
}

TEST_F(TFLiteInferenceCalculatorTest, Postprocess) {
// 测试后处理
float output_tensor[] = {0.1, 0.2, 0.3, 0.4, 0.9, 0.0};
std::vector<Detection> detections = calculator_->Postprocess(output_tensor, 1);

EXPECT_EQ(detections.size(), 1);
EXPECT_EQ(detections[0].score(), 0.9f);
}

} // namespace mediapipe

int main(int argc, char** argv) {
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}

九、总结

要点 说明
模型加载 FlatBufferModel::BuildFromFile
解释器创建 InterpreterBuilder
输入预处理 缩放、归一化、格式转换
推理执行 interpreter_->Invoke()
输出后处理 解析检测结果、NMS
GPU 加速 GPU Delegate、FP16 量化
NNAPI 加速 Android 原生支持
XNNPACK 加速 高效多线程执行
IMS 实战 疲劳检测全流程

系列进度: 13/55
更新时间: 2026-03-12


MediaPipe 系列 13:推理 Calculator——集成 TFLite 模型
https://dapalm.com/2026/03/12/MediaPipe系列13-推理Calculator:集成TFLite模型/
作者
Mars
发布于
2026年3月12日
许可协议