MediaPipe 系列 13：推理 Calculator——集成 TFLite 模型

一、TFLite 推理原理

1.1 TFLite 模型架构

TensorFlow Lite (TFLite) 是 Google 推出的轻量级深度学习推理框架，专为移动端和嵌入式设备优化。

┌─────────────────────────────────────────────────────────────────────────┐
│                    TFLite 模型加载与执行流程                              │
├─────────────────────────────────────────────────────────────────────────┤
│                                                                         │
│  模型文件 (.tflite)                                                     │
│  ┌─────────────────────────────────────────────────────────┐           │
│  │  FlatBuffer: 模型结构、算子定义、张量信息                │           │
│  │  - Model: 全局元数据                                       │           │
│  │  - OperatorCodes: 算子列表                                 │           │
│  │  - Subgraphs: 计算图（输入→输出）                         │           │
│  │  - Buffers: 张量数据存储                                   │           │
│  └─────────────────────────────────────────────────────────┘           │
│                              │                                          │
│                              ▼                                          │
│  解释器 (Interpreter)                                                        │
│  ┌─────────────────────────────────────────────────────────┐           │
│  │  1. LoadModel()      - 从文件加载模型                     │           │
│  │  2. AllocateTensors() - 分配张量内存                      │           │
│  │  3. Invoke()          - 执行推理                          │           │
│  │  4. GetTensor()       - 获取输入/输出张量                  │           │
│  └─────────────────────────────────────────────────────────┘           │
│                              │                                          │
│                              ▼                                          │
│  执行引擎 (Execution Engine)                                           │
│  ┌─────────────────────────────────────────────────────────┐           │
│  │  - 串行执行: 顺序执行算子                                  │           │
│  │  - 并行执行: 多线程加速                                    │           │
│  │  - Delegate: GPU/NNAPI 硬件加速                            │           │
│  └─────────────────────────────────────────────────────────┘           │
│                                                                         │
└─────────────────────────────────────────────────────────────────────────┘

1.2 FlatBuffer 格式

TFLite 模型使用 FlatBuffer 二进制格式，相比 Protocol Buffers 更紧凑、加载更快。

// TFLite 模型结构
struct FlatBufferModel {
  // 模型元数据
  tflite::Model* model;

  // 算子注册表
  tflite::ops::builtin::BuiltinOpResolver resolver;

  // 输入输出信息
  std::vector<int> input_indices;
  std::vector<int> output_indices;

  // 张量信息
  std::vector<TfLiteTensor*> tensors;
};

// 模型结构示例
struct Subgraph {
  // 输入索引
  std::vector<int> inputs;

  // 输出索引
  std::vector<int> outputs;

  // 算子列表
  std::vector<Operator*> operators;

  // 张量列表
  std::vector<TfLiteTensor*> tensors;
};

1.3 推理执行流程

// 标准推理流程
absl::Status TFLiteInferenceCalculator::Process(CalculatorContext* cc) {
  // 1. 获取输入图像
  const ImageFrame& image = cc->Inputs().Tag("IMAGE").Get<ImageFrame>();

  // 2. 预处理图像
  float* input_tensor = interpreter_->typed_input_tensor<float>(0);
  MP_RETURN_IF_ERROR(Preprocess(image, input_tensor));

  // 3. 执行推理
  TF_LITE_ENSURE_OK(interpreter_.get(), interpreter_->Invoke());

  // 4. 获取输出
  float* output_tensor = interpreter_->typed_output_tensor<float>(0);
  std::vector<Detection> detections = Postprocess(output_tensor);

  // 5. 输出结果
  cc->Outputs().Tag("DETECTIONS").AddPacket(
      MakePacket<std::vector<Detection>>(detections).At(cc->InputTimestamp()));

  return absl::OkStatus();
}

二、TFLite 集成详解

2.1 模型加载

absl::Status TFLiteInferenceCalculator::LoadModel(
    const std::string& model_path, int num_threads) {

  // 1. 从文件加载模型
  model_ = tflite::FlatBufferModel::BuildFromFile(model_path.c_str());
  RET_CHECK(model_ != nullptr) << "Failed to load model: " << model_path;

  // 2. 构建解释器
  tflite::ops::builtin::BuiltinOpResolver resolver;

  // 注册自定义算子（如果有）
  // resolver.AddCustom("MyCustomOp", MyCustomOpCreate);
  // resolver.AddCustom("MyCustomOp", MyCustomOpPrepare);
  // resolver.AddCustom("MyCustomOp", MyCustomOpInvoke);

  tflite::InterpreterBuilder builder(*model_, resolver);
  builder(&interpreter_);

  RET_CHECK(interpreter_ != nullptr) << "Failed to create interpreter";

  // 3. 设置线程数（多线程加速）
  interpreter_->SetNumThreads(num_threads);

  // 4. 分配张量内存
  TF_LITE_ENSURE_OK(interpreter_.get(), interpreter_->AllocateTensors());

  // 5. 获取输入输出信息
  auto* input_tensor = interpreter_->input_tensor(0);
  auto* output_tensor = interpreter_->output_tensor(0);

  LOG(INFO) << "Model loaded successfully";
  LOG(INFO) << "  Input shape: [" << input_tensor->dims->data[0] << ", "
            << input_tensor->dims->data[1] << ", "
            << input_tensor->dims->data[2] << ", "
            << input_tensor->dims->data[3] << "]";
  LOG(INFO) << "  Output shape: [" << output_tensor->dims->data[0] << ", "
            << output_tensor->dims->data[1] << ", "
            << output_tensor->dims->data[2] << ", "
            << output_tensor->dims->data[3] << "]";
  LOG(INFO) << "  Input type: " << input_tensor->type
            << " (Float=" << kTfLiteFloat32 << ", Int8=" << kTfLiteUInt8 << ")";

  return absl::OkStatus();
}

2.2 输入预处理

图像预处理是模型推理的关键步骤，直接影响检测精度和性能。

absl::Status TFLiteInferenceCalculator::Preprocess(
    const ImageFrame& input, float* input_tensor) {

  // 转换为 OpenCV Mat
  cv::Mat input_mat = formats::MatView(&input);

  // 获取目标尺寸
  int target_width = input_width_;
  int target_height = input_height_;

  // 1. 缩放图像
  cv::Mat resized;
  cv::resize(input_mat, resized, cv::Size(target_width, target_height),
             0, 0, cv::INTER_LINEAR);

  // 2. 颜色空间转换
  if (input.Format() == ImageFormat::SRGBA) {
    // RGBA → RGB
    cv::cvtColor(resized, resized, cv::COLOR_RGBA2RGB);
  } else if (input.Format() == ImageFormat::BGRA) {
    // BGRA → RGB
    cv::cvtColor(resized, resized, cv::COLOR_BGRA2RGB);
  } else if (input.Format() == ImageFormat::GRAY8) {
    // 灰度 → RGB（复制通道）
    cv::Mat rgb;
    cv::cvtColor(resized, rgb, cv::COLOR_GRAY2RGB);
    resized = rgb;
  }

  // 3. 归一化
  int size = target_width * target_height * input_channels_;
  for (int i = 0; i < size; ++i) {
    // [0, 255] → [0, 1]
    input_tensor[i] = resized.data[i] / 255.0f;
  }

  return absl::OkStatus();
}

量化模型预处理：

absl::Status TFLiteInferenceCalculator::Preprocess(
    const ImageFrame& input, uint8_t* input_tensor) {

  cv::Mat input_mat = formats::MatView(&input);
  cv::Mat resized;
  cv::resize(input_mat, resized, cv::Size(input_width_, input_height_));

  // 颜色空间转换
  if (input.Format() == ImageFormat::SRGBA) {
    cv::cvtColor(resized, resized, cv::COLOR_RGBA2RGB);
  }

  // 直接复制（uint8）
  std::memcpy(input_tensor, resized.data,
              input_width_ * input_height_ * input_channels_);

  return absl::OkStatus();
}

2.3 输出后处理

std::vector<Detection> TFLiteInferenceCalculator::Postprocess(
    const float* output_tensor, int num_detections) {

  std::vector<Detection> detections;

  // 解析输出格式
  // 假设格式: [num_detections, 6] (ymin, xmin, ymax, xmax, score, class_id)

  for (int i = 0; i < num_detections; ++i) {
    const float* detection = output_tensor + i * 6;

    float score = detection[4];

    // 过滤低分检测
    if (score < score_threshold_) {
      continue;
    }

    Detection det;
    det.set_ymin(detection[0]);
    det.set_xmin(detection[1]);
    det.set_ymax(detection[2]);
    det.set_xmax(detection[3]);
    det.set_score(score);
    det.set_class_id(static_cast<int>(detection[5]));

    detections.push_back(det);
  }

  return detections;
}

复杂后处理（如 NMS）：

std::vector<int> NonMaxSuppression(
    const std::vector<std::vector<float>>& boxes,
    const std::vector<float>& scores,
    float nms_threshold) {

  std::vector<int> indices;
  std::vector<bool> suppressed(boxes.size(), false);

  // 按分数排序的索引
  std::vector<int> order(scores.size());
  std::iota(order.begin(), order.end(), 0);
  std::sort(order.begin(), order.end(),
            [&scores](int a, int b) { return scores[a] > scores[b]; });

  for (int i : order) {
    if (suppressed[i]) continue;

    indices.push_back(i);

    for (int j : order) {
      if (suppressed[j]) continue;

      // 计算 IoU
      float iou = CalculateIoU(boxes[i], boxes[j]);

      if (iou > nms_threshold) {
        suppressed[j] = true;
      }
    }
  }

  return indices;
}

三、GPU/CPU/NNAPI Delegate 选择

3.1 性能对比

硬件加速方式	适用场景	性能提升	优点	缺点
CPU	通用设备、调试	基准	兼容性好、无依赖	性能受限
GPU	Android/iOS、桌面	2-10x	高并行度、通用	需要驱动支持
NNAPI	Android (Snapdragon)	2-5x	原生支持、低功耗	仅 Android
XNNPACK	多平台、ARM	2-8x	高效、轻量	需要编译
CoreML	iOS/macOS	2-6x	原生支持、优化	仅 Apple

3.2 GPU Delegate

#include "tensorflow/lite/delegates/gpu/gpu_delegate.h"

absl::Status TFLiteInferenceCalculator::Open(CalculatorContext* cc) override {
  // ... 加载模型 ...

  // 1. 创建 GPU Delegate
  TfLiteGpuDelegateOptionsV2 options = TfLiteGpuDelegateOptionsV2Default();

  // GPU 模式选择
  options.is_precision_loss_allowed = true;  // 允许 FP16（精度损失）
  options.inference_preference =
      TFLITE_GPU_INFERENCE_PREFERENCE_SUSTAINED_SPEED;  // 持续性能优先

  // 2. 创建 Delegate
  auto* delegate = TfLiteGpuDelegateV2Create(&options);
  RET_CHECK(delegate != nullptr) << "Failed to create GPU delegate";

  // 3. 应用 Delegate
  TF_LITE_ENSURE_OK(interpreter_.get(),
      interpreter_->ModifyGraphWithDelegate(delegate));

  // 4. 释放 Delegate（TFLite 会管理）
  // TfLiteGpuDelegateV2Delete(delegate);

  LOG(INFO) << "GPU delegate applied successfully";
  LOG(INFO) << "  Precision: " << (options.is_precision_loss_allowed ? "FP16" : "FP32");

  return absl::OkStatus();
}

GPU Delegate 配置选项：

// GPU Delegate 高级配置
TfLiteGpuDelegateOptionsV2 advanced_options = {
  .cache_dir = "/data/local/tmp/tflite_gpu_cache",  // 缓存目录
  .model_buffer = nullptr,                           // 内存模型
  .model_buffer_size = 0,
  .is_precision_loss_allowed = true,                 // 允许 FP16
  .inference_preferred_precision = kTfLiteFloat32,  // 预期精度
  .inference_preference = TFLITE_GPU_INFERENCE_PREFERENCE_SUSTAINED_SPEED,
  .enable_delegate_optimization = true,              // 启用优化
  .allow_fp16_precision_for_2d_tensors = true,      // 允许 2D 张量使用 FP16
};

3.3 NNAPI Delegate

#include "tensorflow/lite/delegates/nnapi/nnapi_delegate.h"

absl::Status TFLiteInferenceCalculator::Open(CalculatorContext* cc) override {
  // ... 加载模型 ...

  // 1. 创建 NNAPI Delegate
  TfLiteNnApiDelegateOptions options = {
    .enabled = true,
    .num_threads = 4,        // 线程数
    .model_cache = nullptr,  // 模型缓存
  };

  auto* delegate = TfLiteNnApiDelegateCreate(&options);
  RET_CHECK(delegate != nullptr) << "Failed to create NNAPI delegate";

  // 2. 应用 Delegate
  TF_LITE_ENSURE_OK(interpreter_.get(),
      interpreter_->ModifyGraphWithDelegate(delegate));

  LOG(INFO) << "NNAPI delegate applied successfully";
  LOG(INFO) << "  Backend: " << TfLiteNnApiGetBackendName();

  return absl::OkStatus();
}

NNAPI 硬件后端：

// 检测可用的 NNAPI 后端
const char* TfLiteNnApiGetBackendName() {
  // 返回: "nnapi" (通用), "gpu" (GPU), "dsp" (DSP), "cpu" (CPU)
  // 具体取决于设备
}

// 查询 NNAPI 版本
int TfLiteNnApiGetVersion();

3.4 XNNPACK Delegate

#include "tensorflow/lite/delegates/xnnpack/xnnpack_delegate.h"

absl::Status TFLiteInferenceCalculator::Open(CalculatorContext* cc) override {
  // ... 加载模型 ...

  // 1. 创建 XNNPACK Delegate
  TfLiteXNNPackDelegateOptions options = {};
  options.num_threads = 4;

  auto* delegate = TfLiteXNNPackDelegateCreate(&options);
  RET_CHECK(delegate != nullptr) << "Failed to create XNNPACK delegate";

  // 2. 应用 Delegate
  TF_LITE_ENSURE_OK(interpreter_.get(),
      interpreter_->ModifyGraphWithDelegate(delegate));

  LOG(INFO) << "XNNPACK delegate applied successfully";
  LOG(INFO) << "  Threads: " << options.num_threads;

  return absl::OkStatus();
}

3.5 Delegate 组合使用

absl::Status TFLiteInferenceCalculator::Open(CalculatorContext* cc) override {
  // ... 加载模型 ...

  std::vector<TfLiteDelegate*> delegates;

  // 1. 优先使用 GPU（Android/iOS）
  #ifdef __ANDROID__
  TfLiteGpuDelegateOptionsV2 gpu_options = TfLiteGpuDelegateOptionsV2Default();
  gpu_options.is_precision_loss_allowed = true;
  gpu_options.inference_preference = TFLITE_GPU_INFERENCE_PREFERENCE_SUSTAINED_SPEED;
  auto* gpu_delegate = TfLiteGpuDelegateV2Create(&gpu_options);
  if (gpu_delegate) delegates.push_back(gpu_delegate);
  #endif

  // 2. 次选 NNAPI（Android）
  #ifdef __ANDROID__
  TfLiteNnApiDelegateOptions nnapi_options = { .enabled = true, .num_threads = 4 };
  auto* nnapi_delegate = TfLiteNnApiDelegateCreate(&nnapi_options);
  if (nnapi_delegate) delegates.push_back(nnapi_delegate);
  #endif

  // 3. 回退到 XNNPACK（通用）
  TfLiteXNNPackDelegateOptions xnnpack_options = { .num_threads = 4 };
  auto* xnnpack_delegate = TfLiteXNNPackDelegateCreate(&xnnpack_options);
  if (xnnpack_delegate) delegates.push_back(xnnpack_delegate);

  // 4. 应用所有 Delegate
  if (!delegates.empty()) {
    interpreter_->ModifyGraphWithDelegates(delegates);
    LOG(INFO) << "Applied " << delegates.size() << " delegate(s)";
  }

  // 清理
  for (auto* delegate : delegates) {
    TfLiteDelegateDelete(delegate);
  }

  return absl::OkStatus();
}

四、完整 Calculator 实现

4.1 头文件

// tflite_inference_calculator.h
#ifndef TFLITE_INFERENCE_CALCULATOR_H_
#define TFLITE_INFERENCE_CALCULATOR_H_

#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/formats/image_frame.h"
#include "mediapipe/framework/formats/detection.pb.h"
#include "mediapipe/framework/formats/tensor.h"
#include "tensorflow/lite/interpreter.h"
#include "tensorflow/lite/kernels/register.h"
#include "tensorflow/lite/model.h"
#include "tensorflow/lite/delegates/gpu/gpu_delegate.h"
#include "tensorflow/lite/delegates/nnapi/nnapi_delegate.h"
#include "tensorflow/lite/delegates/xnnpack/xnnpack_delegate.h"

namespace mediapipe {

// 推理选项
struct TFLiteInferenceCalculatorOptions {
  std::string model_path = "";
  int num_threads = 4;
  float score_threshold = 0.5f;
  float iou_threshold = 0.45f;
  bool use_gpu = false;
  bool use_nnapi = false;
  bool use_xnnpack = false;
  bool enable_delegate_optimization = true;
};

class TFLiteInferenceCalculator : public CalculatorBase {
 public:
  static absl::Status GetContract(CalculatorContract* cc);
  static absl::Status RegisterTypes(CalculatorContext* cc);

  absl::Status Open(CalculatorContext* cc) override;
  absl::Status Process(CalculatorContext* cc) override;

 private:
  // 模型相关
  std::unique_ptr<tflite::FlatBufferModel> model_;
  std::unique_ptr<tflite::Interpreter> interpreter_;

  // 输入输出信息
  int input_width_ = 320;
  int input_height_ = 320;
  int input_channels_ = 3;
  int output_tensor_size_ = 0;

  // 量化相关
  bool use_quantized_ = false;
  float input_scale_ = 1.0f;
  int input_zero_point_ = 0;

  // 后处理配置
  float score_threshold_ = 0.5f;
  float iou_threshold_ = 0.45f;

  // 加载模型
  absl::Status LoadModel(const std::string& model_path, int num_threads);

  // 预处理
  absl::Status Preprocess(const ImageFrame& input, float* input_tensor);
  absl::Status Preprocess(const ImageFrame& input, uint8_t* input_tensor);

  // 后处理
  std::vector<Detection> Postprocess(const float* output_tensor);
  std::vector<int> NonMaxSuppression(
      const std::vector<std::vector<float>>& boxes,
      const std::vector<float>& scores);
  float CalculateIoU(const std::vector<float>& box1,
                     const std::vector<float>& box2);
};

}  // namespace mediapipe

#endif

4.2 实现文件

// tflite_inference_calculator.cc
#include "tflite_inference_calculator.h"
#include "mediapipe/framework/formats/image_frame_opencv.h"
#include "mediapipe/framework/port/opencv_imgproc.h"
#include "mediapipe/framework/port/opencv_highgui.h"

namespace mediapipe {

absl::Status TFLiteInferenceCalculator::GetContract(CalculatorContract* cc) {
  // 输入流
  cc->Inputs().Tag("IMAGE").Set<ImageFrame>();

  // 输出流
  cc->Outputs().Tag("DETECTIONS").Set<std::vector<Detection>>();

  // 输入侧包
  cc->InputSidePackets().Tag("MODEL_PATH").Set<std::string>();

  // 选项
  cc->Options<TFLiteInferenceCalculatorOptions>();

  return absl::OkStatus();
}

absl::Status TFLiteInferenceCalculator::RegisterTypes(CalculatorContext* cc) {
  // 注册自定义类型（如果有）
  return absl::OkStatus();
}

absl::Status TFLiteInferenceCalculator::Open(CalculatorContext* cc) {
  const auto& options = cc->Options<TFLiteInferenceCalculatorOptions>();

  // 获取模型路径
  std::string model_path = cc->InputSidePackets().Tag("MODEL_PATH").Get<std::string>();

  // 加载模型
  MP_RETURN_IF_ERROR(LoadModel(model_path, options.num_threads));

  // 获取输入输出信息
  auto* input_tensor = interpreter_->input_tensor(0);
  input_height_ = input_tensor->dims->data[1];
  input_width_ = input_tensor->dims->data[2];
  input_channels_ = input_tensor->dims->data[3];

  // 检查量化
  use_quantized_ = (input_tensor->type == kTfLiteUInt8);
  if (use_quantized_) {
    input_scale_ = input_tensor->params.scale;
    input_zero_point_ = input_tensor->params.zero_point;
    LOG(INFO) << "Quantized model detected: scale=" << input_scale_
              << ", zero_point=" << input_zero_point_;
  }

  // 设置后处理阈值
  score_threshold_ = options.score_threshold();
  iou_threshold_ = options.iou_threshold();

  LOG(INFO) << "Model loaded: " << input_width_ << "x" << input_height_
            << " channels=" << input_channels_
            << " quantized=" << use_quantized_
            << " score_threshold=" << score_threshold_;

  return absl::OkStatus();
}

absl::Status TFLiteInferenceCalculator::Process(CalculatorContext* cc) {
  if (cc->Inputs().Tag("IMAGE").IsEmpty()) {
    return absl::OkStatus();
  }

  const ImageFrame& image = cc->Inputs().Tag("IMAGE").Get<ImageFrame>();

  // 1. 预处理
  if (use_quantized_) {
    uint8_t* input_tensor = interpreter_->typed_input_tensor<uint8_t>(0);
    MP_RETURN_IF_ERROR(Preprocess(image, input_tensor));
  } else {
    float* input_tensor = interpreter_->typed_input_tensor<float>(0);
    MP_RETURN_IF_ERROR(Preprocess(image, input_tensor));
  }

  // 2. 推理
  TF_LITE_ENSURE_OK(interpreter_.get(), interpreter_->Invoke());

  // 3. 后处理
  float* output_tensor = interpreter_->typed_output_tensor<float>(0);
  int num_detections = interpreter_->output_tensor(0)->dims->data[1];
  std::vector<Detection> detections = Postprocess(output_tensor, num_detections);

  // 4. 输出
  cc->Outputs().Tag("DETECTIONS").AddPacket(
      MakePacket<std::vector<Detection>>(detections).At(cc->InputTimestamp()));

  return absl::OkStatus();
}

absl::Status TFLiteInferenceCalculator::LoadModel(
    const std::string& model_path, int num_threads) {

  // 1. 从文件加载模型
  model_ = tflite::FlatBufferModel::BuildFromFile(model_path.c_str());
  RET_CHECK(model_ != nullptr) << "Failed to load model: " << model_path;

  // 2. 构建解释器
  tflite::ops::builtin::BuiltinOpResolver resolver;
  tflite::InterpreterBuilder builder(*model_, resolver);
  builder(&interpreter_);

  RET_CHECK(interpreter_ != nullptr) << "Failed to create interpreter";

  // 3. 设置线程数
  interpreter_->SetNumThreads(num_threads);

  // 4. 分配张量
  TF_LITE_ENSURE_OK(interpreter_.get(), interpreter_->AllocateTensors());

  // 5. 应用 Delegate
  std::vector<TfLiteDelegate*> delegates;
  if (use_gpu_) {
    #ifdef __ANDROID__
    TfLiteGpuDelegateOptionsV2 gpu_options = TfLiteGpuDelegateOptionsV2Default();
    gpu_options.is_precision_loss_allowed = true;
    gpu_options.inference_preference = TFLITE_GPU_INFERENCE_PREFERENCE_SUSTAINED_SPEED;
    auto* gpu_delegate = TfLiteGpuDelegateV2Create(&gpu_options);
    if (gpu_delegate) delegates.push_back(gpu_delegate);
    #endif
  }

  if (use_nnapi_) {
    #ifdef __ANDROID__
    TfLiteNnApiDelegateOptions nnapi_options = { .enabled = true, .num_threads = 4 };
    auto* nnapi_delegate = TfLiteNnApiDelegateCreate(&nnapi_options);
    if (nnapi_delegate) delegates.push_back(nnapi_delegate);
    #endif
  }

  if (use_xnnpack_) {
    TfLiteXNNPackDelegateOptions xnnpack_options = { .num_threads = 4 };
    auto* xnnpack_delegate = TfLiteXNNPackDelegateCreate(&xnnpack_options);
    if (xnnpack_delegate) delegates.push_back(xnnpack_delegate);
  }

  if (!delegates.empty()) {
    interpreter_->ModifyGraphWithDelegates(delegates);
    LOG(INFO) << "Applied " << delegates.size() << " delegate(s)";
  }

  for (auto* delegate : delegates) {
    TfLiteDelegateDelete(delegate);
  }

  return absl::OkStatus();
}

absl::Status TFLiteInferenceCalculator::Preprocess(
    const ImageFrame& input, float* input_tensor) {

  cv::Mat input_mat = formats::MatView(&input);

  // 缩放
  cv::Mat resized;
  cv::resize(input_mat, resized, cv::Size(input_width_, input_height_),
             0, 0, cv::INTER_LINEAR);

  // 颜色空间转换
  if (input.Format() == ImageFormat::SRGBA) {
    cv::cvtColor(resized, resized, cv::COLOR_RGBA2RGB);
  } else if (input.Format() == ImageFormat::BGRA) {
    cv::cvtColor(resized, resized, cv::COLOR_BGRA2RGB);
  } else if (input.Format() == ImageFormat::GRAY8) {
    cv::Mat rgb;
    cv::cvtColor(resized, rgb, cv::COLOR_GRAY2RGB);
    resized = rgb;
  }

  // 归一化
  int size = input_width_ * input_height_ * input_channels_;
  for (int i = 0; i < size; ++i) {
    input_tensor[i] = resized.data[i] / 255.0f;
  }

  return absl::OkStatus();
}

absl::Status TFLiteInferenceCalculator::Preprocess(
    const ImageFrame& input, uint8_t* input_tensor) {

  cv::Mat input_mat = formats::MatView(&input);
  cv::Mat resized;
  cv::resize(input_mat, resized, cv::Size(input_width_, input_height_));

  // 颜色空间转换
  if (input.Format() == ImageFormat::SRGBA) {
    cv::cvtColor(resized, resized, cv::COLOR_RGBA2RGB);
  } else if (input.Format() == ImageFormat::BGRA) {
    cv::cvtColor(resized, resized, cv::COLOR_BGRA2RGB);
  }

  // 直接复制
  std::memcpy(input_tensor, resized.data,
              input_width_ * input_height_ * input_channels_);

  return absl::OkStatus();
}

std::vector<Detection> TFLiteInferenceCalculator::Postprocess(
    const float* output_tensor, int num_detections) {

  std::vector<Detection> detections;

  // 解析输出格式
  for (int i = 0; i < num_detections; ++i) {
    const float* detection = output_tensor + i * 6;

    float score = detection[4];

    // 过滤低分检测
    if (score < score_threshold_) {
      continue;
    }

    Detection det;
    det.set_ymin(detection[0]);
    det.set_xmin(detection[1]);
    det.set_ymax(detection[2]);
    det.set_xmax(detection[3]);
    det.set_score(score);
    det.set_class_id(static_cast<int>(detection[5]));

    detections.push_back(det);
  }

  // NMS
  std::vector<std::vector<float>> boxes;
  std::vector<float> scores;

  for (const auto& det : detections) {
    boxes.push_back({det.ymin(), det.xmin(), det.ymax(), det.xmax()});
    scores.push_back(det.score());
  }

  auto keep_indices = NonMaxSuppression(boxes, scores, iou_threshold_);

  // 重新构建输出
  std::vector<Detection> final_detections;
  for (int idx : keep_indices) {
    final_detections.push_back(detections[idx]);
  }

  return final_detections;
}

std::vector<int> TFLiteInferenceCalculator::NonMaxSuppression(
    const std::vector<std::vector<float>>& boxes,
    const std::vector<float>& scores,
    float nms_threshold) {

  std::vector<int> indices;
  std::vector<bool> suppressed(boxes.size(), false);

  // 按分数排序的索引
  std::vector<int> order(scores.size());
  std::iota(order.begin(), order.end(), 0);
  std::sort(order.begin(), order.end(),
            [&scores](int a, int b) { return scores[a] > scores[b]; });

  for (int i : order) {
    if (suppressed[i]) continue;

    indices.push_back(i);

    for (int j : order) {
      if (suppressed[j]) continue;

      // 计算 IoU
      float iou = CalculateIoU(boxes[i], boxes[j]);

      if (iou > nms_threshold) {
        suppressed[j] = true;
      }
    }
  }

  return indices;
}

float TFLiteInferenceCalculator::CalculateIoU(
    const std::vector<float>& box1,
    const std::vector<float>& box2) {

  float x1 = std::max(box1[1], box2[1]);
  float y1 = std::max(box1[0], box2[0]);
  float x2 = std::min(box1[3], box2[3]);
  float y2 = std::min(box1[2], box2[2]);

  float inter_width = std::max(0.0f, x2 - x1);
  float inter_height = std::max(0.0f, y2 - y1);
  float inter_area = inter_width * inter_height;

  float box1_area = (box1[3] - box1[1]) * (box1[2] - box1[0]);
  float box2_area = (box2[3] - box2[1]) * (box2[2] - box2[0]);

  float union_area = box1_area + box2_area - inter_area;

  return (union_area > 0) ? (inter_area / union_area) : 0.0f;
}

REGISTER_CALCULATOR(TFLiteInferenceCalculator);

}  // namespace mediapipe

4.3 Options 定义

// tflite_inference_options.proto
syntax = "proto3";

package mediapipe;

message TFLiteInferenceCalculatorOptions {
  // 模型路径
  optional string model_path = 1;

  // 线程数
  optional int32 num_threads = 2 [default = 4];

  // 分数阈值
  optional float score_threshold = 3 [default = 0.5];

  // IoU 阈值
  optional float iou_threshold = 4 [default = 0.45];

  // GPU 加速
  optional bool use_gpu = 5 [default = false];

  // NNAPI 加速
  optional bool use_nnapi = 6 [default = false];

  // XNNPACK 加速
  optional bool use_xnnpack = 7 [default = false];

  // 启用 Delegate 优化
  optional bool enable_delegate_optimization = 8 [default = true];
}

五、Graph 配置

5.1 基础配置

# face_detection_graph.pbtxt

input_stream: "IMAGE:image"
output_stream: "DETECTIONS:detections"
input_side_packet: "MODEL_PATH:model_path"

# 流量限制
node {
  calculator: "FlowLimiterCalculator"
  input_stream: "image"
  input_stream: "detections"
  input_stream_info: { tag_index: "detections" back_edge: true }
  output_stream: "throttled_image"
}

# 推理 Calculator
node {
  calculator: "TFLiteInferenceCalculator"
  input_stream: "IMAGE:throttled_image"
  input_side_packet: "MODEL_PATH:model_path"
  output_stream: "DETECTIONS:detections"
  options {
    [mediapipe.TFLiteInferenceCalculatorOptions.ext] {
      num_threads: 4
      score_threshold: 0.5
      iou_threshold: 0.45
      use_gpu: true
      use_nnapi: true
    }
  }
}

5.2 完整人脸检测 Graph

# mediapipe/graphs/face_detection/face_detection_short_range.pbtxt

input_stream: "IMAGE:image"
output_stream: "DETECTIONS:detections"

# 1. 图像格式转换
node {
  calculator: "ImageTransformationCalculator"
  input_stream: "IMAGE:image"
  output_stream: "IMAGE:converted_image"
  options {
    [mediapipe.ImageTransformationCalculatorOptions.ext] {
      output_format: SRGB
    }
  }
}

# 2. 缩放到模型输入尺寸
node {
  calculator: "ImageTransformationCalculator"
  input_stream: "IMAGE:converted_image"
  output_stream: "IMAGE:resized_image"
  options {
    [mediapipe.ImageTransformationCalculatorOptions.ext] {
      output_width: 320
      output_height: 320
      scale_mode: FIT
    }
  }
}

# 3. 转换为 Tensor
node {
  calculator: "ImageToTensorCalculator"
  input_stream: "IMAGE:resized_image"
  output_stream: "TENSORS:tensors"
  options {
    [mediapipe.ImageToTensorCalculatorOptions.ext] {
      tensor_width: 320
      tensor_height: 320
      tensor_channels: 3
      tensor_float_range {
        min: -1.0
        max: 1.0
      }
    }
  }
}

# 4. 模型推理
node {
  calculator: "TFLiteInferenceCalculator"
  input_stream: "TENSORS:tensors"
  input_side_packet: "MODEL_PATH:model_path"
  output_stream: "DETECTIONS:detections"
  options {
    [mediapipe.TFLiteInferenceCalculatorOptions.ext] {
      model_path: "/models/blazeface.tflite"
      num_threads: 4
      score_threshold: 0.5
      iou_threshold: 0.45
      use_gpu: true
      use_nnapi: true
      use_xnnpack: true
    }
  }
}

# 5. 后处理（如果需要）
node {
  calculator: "BlazeFacePostprocessorCalculator"
  input_stream: "DETECTIONS:detections"
  input_stream: "ORIGINAL_IMAGE_SIZE:image_size"
  output_stream: "DETECTIONS:final_detections"
  options {
    [mediapipe.BlazeFaceOptions.ext] {
      score_threshold: 0.5
      min_suppression_threshold: 0.3
      num_keypoints: 6
    }
  }
}

5.3 Bazel 构建

# mediapipe/calculators/tflite/BUILD

cc_library(
    name = "tflite_inference_calculator",
    srcs = [
        "tflite_inference_calculator.cc",
    ],
    hdrs = [
        "tflite_inference_calculator.h",
    ],
    deps = [
        "//mediapipe/framework:calculator_framework",
        "//mediapipe/framework/formats:tensor",
        "//mediapipe/framework/formats:image_frame",
        "//mediapipe/framework/formats:detection",
        "@org_tensorflow//tensorflow/lite:framework",
        "@org_tensorflow//tensorflow/lite/kernels:builtin_ops",
        "@org_tensorflow//tensorflow/lite/delegates/gpu:gpu_delegate",
        "@org_tensorflow//tensorflow/lite/delegates/nnapi:nnapi_delegate",
        "@org_tensorflow//tensorflow/lite/delegates/xnnpack:xnnpack_delegate",
        "@org_opencv//:opencv_core",
        "@org_opencv//:opencv_imgproc",
        "@org_opencv//:opencv_highgui",
    ],
    alwayslink = 1,
)

cc_library(
    name = "blazeface_postprocessor_calculator",
    srcs = [
        "blazeface_postprocessor.cc",
    ],
    hdrs = [
        "blazeface_postprocessor.h",
    ],
    deps = [
        "//mediapipe/framework:calculator_framework",
        "//mediapipe/framework/formats:detection",
        "@org_tensorflow//tensorflow/lite:framework",
    ],
    alwayslink = 1,
)

六、性能优化技巧

6.1 模型优化

量化模型：

# 使用 TFLite Converter 量化
tflite_convert \
  --saved_model_dir=/path/to/saved_model \
  --output_file=blazeface_quant.tflite \
  --post_training_quantize \
  --inference_type=QUANTIZED_UINT8 \
  --input_arrays=Input \
  --output_arrays=Identity \
  --default_ranges_min=0.0 \
  --default_ranges_max=255.0

模型压缩：

# 使用 TensorFlow Model Optimization Toolkit 压缩
python3 model_optimization_toolkit/quantization/quantize_model.py \
  --saved_model_dir=/path/to/saved_model \
  --output_dir=/path/to/quantized_model

6.2 运行时优化

多线程加速：

// 设置合理的线程数
interpreter_->SetNumThreads(num_threads);

// 推荐线程数：CPU 核心数 + 1
int num_threads = std::thread::hardware_concurrency() + 1;

Delegate 选择策略：

absl::Status TFLiteInferenceCalculator::LoadModel(...) {
  // 1. 优先使用 GPU（Android/iOS）
  if (use_gpu_) {
    auto* gpu_delegate = CreateGpuDelegate();
    if (gpu_delegate) {
      interpreter_->ModifyGraphWithDelegate(gpu_delegate);
      return absl::OkStatus();
    }
  }

  // 2. 次选 NNAPI（Android）
  if (use_nnapi_) {
    auto* nnapi_delegate = CreateNnApiDelegate();
    if (nnapi_delegate) {
      interpreter_->ModifyGraphWithDelegate(nnapi_delegate);
      return absl::OkStatus();
    }
  }

  // 3. 回退到 XNNPACK（通用）
  if (use_xnnpack_) {
    auto* xnnpack_delegate = CreateXnnpackDelegate();
    if (xnnpack_delegate) {
      interpreter_->ModifyGraphWithDelegate(xnnpack_delegate);
      return absl::OkStatus();
    }
  }

  // 4. 最后使用 CPU
  return absl::OkStatus();
}

6.3 内存优化

张量复用：

// 避免重复分配内存
float* input_tensor = interpreter_->typed_input_tensor<float>(0);

// 预分配输出缓冲区
std::vector<float> output_buffer(output_tensor_size_);

内存池：

// 使用内存池减少分配开销
class TensorPool {
 public:
  std::unique_ptr<float[]> GetBuffer(size_t size) {
    if (free_buffers_.size() > 0) {
      auto buffer = std::move(free_buffers_.back());
      free_buffers_.pop_back();
      return buffer;
    }
    return std::make_unique<float[]>(size);
  }

  void ReturnBuffer(std::unique_ptr<float[]> buffer) {
    free_buffers_.push_back(std::move(buffer));
  }

 private:
  std::vector<std::unique_ptr<float[]>> free_buffers_;
};

七、IMS 实战：疲劳检测模型

7.1 疲劳检测应用场景

┌─────────────────────────────────────────────────────────────────────────┐
│                    DMS 疲劳检测应用流程                                    │
├─────────────────────────────────────────────────────────────────────────┤
│                                                                         │
│  输入: IR Camera (640×480)                                              │
│         │                                                               │
│         ▼                                                               │
│  ┌─────────────┐                                                       │
│  │ Face        │  检测人脸位置                                          │
│  │ Detection   │  - 驾驶员是否存在                                      │
│  └─────────────┘  - 多人场景识别                                        │
│         │                                                               │
│         ▼                                                               │
│  ┌─────────────┐                                                       │
│  │ Face Mesh   │  提取 468 个面部关键点                                  │
│  │ (468点)     │  - 眼睛闭合度、眨眼频率                                 │
│  └─────────────┘  - 眼睛睁开角度、眼睛间距                               │
│         │                                                               │
│         ▼                                                               │
│  ┌─────────────┐                                                       │
│  │ Eye State   │  疲劳检测                                             │
│  │ Analysis    │  - 眼睛闭合时间                                         │
│  └─────────────┘  - 眨眼频率                                           │
│         │                                                               │
│         ▼                                                               │
│  ┌─────────────┐                                                       │
│  │ Fatigue     │  疲劳状态判定                                         │
│  │ Detector    │  - 疲劳分数                                           │
│  └─────────────┘  - 警告级别                                           │
│                                                                         │
│  应用场景:                                                              │
│  ├── 驾驶员疲劳检测                                                     │
│  ├── 分心检测（闭眼）                                                   │
│  ├── 驾驶行为监控                                                     │
│  └── 安全提醒                                                         │
│                                                                         │
└─────────────────────────────────────────────────────────────────────────┘

7.2 疲劳检测 Graph

# mediapipe/graphs/ims/dms_fatigue_detection_graph.pbtxt

input_stream: "IR_IMAGE:ir_image"
output_stream: "FATIGUE_STATUS:fatigue_status"
output_stream: "WARNING_LEVEL:warning_level"

# 1. 人脸检测
node {
  calculator: "FaceDetectionShortRangeGpu"
  input_stream: "IMAGE:ir_image"
  output_stream: "DETECTIONS:raw_detections"
}

# 2. 选择主驾驶员人脸
node {
  calculator: "PrimaryFaceSelectorCalculator"
  input_stream: "DETECTIONS:raw_detections"
  output_stream: "DETECTION:primary_detection"
  options {
    [mediapipe.PrimaryFaceSelectorOptions.ext] {
      selection_strategy: LARGEST
    }
  }
}

# 3. Face Mesh（468点）
node {
  calculator: "FaceGeometryCalculator"
  input_stream: "IMAGE:ir_image"
  input_stream: "DETECTION:primary_detection"
  output_stream: "FACE_GEOMETRY:face_geometry"
}

# 4. 眼睛状态分析
node {
  calculator: "EyeStateCalculator"
  input_stream: "FACE_GEOMETRY:face_geometry"
  output_stream: "EYE_STATE:eye_state"
}

# 5. 疲劳检测
node {
  calculator: "FatigueDetectorCalculator"
  input_stream: "EYE_STATE:eye_state"
  input_stream: "FRAME_TIME:frame_time"
  output_stream: "FATIGUE_STATUS:fatigue_status"
  output_stream: "WARNING_LEVEL:warning_level"
  options {
    [mediapipe.FatigueDetectorOptions.ext] {
      eye_closure_threshold: 0.2
      blink_frequency_threshold: 0.3
      fatigue_score_threshold: 0.7
      warning_interval_ms: 5000
    }
  }
}

# 6. 结果输出
node {
  calculator: "NotificationCalculator"
  input_stream: "WARNING_LEVEL:warning_level"
  output_stream: "NOTIFICATIONS:notifications"
}

7.3 疲劳检测 Calculator 实现

// fatigue_detector_calculator.h
#ifndef FATIGUE_DETECTOR_CALCULATOR_H_
#define FATIGUE_DETECTOR_CALCULATOR_H_

#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/formats/geometry_data.pb.h"
#include "mediapipe/framework/formats/detection.pb.h"

namespace mediapipe {

struct EyeState {
  float left_eye_openness = 1.0f;
  float right_eye_openness = 1.0f;
  float left_blink_frequency = 0.0f;
  float right_blink_frequency = 0.0f;
  float eye_closure_duration = 0.0f;
};

struct FatigueStatus {
  float fatigue_score = 0.0f;
  FatigueLevel level = FatigueLevel::NORMAL;
  int warning_count = 0;
  int64 last_warning_time = 0;
};

enum FatigueLevel {
  NORMAL = 0,
  MILD = 1,
  MODERATE = 2,
  SEVERE = 3
};

class FatigueDetectorCalculator : public CalculatorBase {
 public:
  static absl::Status GetContract(CalculatorContract* cc);
  static absl::Status RegisterTypes(CalculatorContext* cc);

  absl::Status Open(CalculatorContext* cc) override;
  absl::Status Process(CalculatorContext* cc) override;

 private:
  // 计算疲劳分数
  float CalculateFatigueScore(
      const EyeState& eye_state,
      int64 frame_time);

  // 判断疲劳等级
  FatigueLevel DetermineFatigueLevel(float fatigue_score);

  // 发送警告
  void SendWarning(CalculatorContext* cc, FatigueLevel level);

  // 配置
  float eye_closure_threshold_ = 0.2f;
  float blink_frequency_threshold_ = 0.3f;
  float fatigue_score_threshold_ = 0.7f;
  int64 warning_interval_ms_ = 5000;

  // 状态
  EyeState last_eye_state_;
  FatigueStatus current_status_;
  int64 last_warning_time_ = 0;
};

}  // namespace mediapipe

#endif

// fatigue_detector_calculator.cc
#include "fatigue_detector_calculator.h"
#include "mediapipe/framework/port/opencv_highgui.h"

namespace mediapipe {

absl::Status FatigueDetectorCalculator::GetContract(CalculatorContract* cc) {
  cc->Inputs().Tag("EYE_STATE").Set<EyeState>();
  cc->Inputs().Tag("FRAME_TIME").Set<int64>();

  cc->Outputs().Tag("FATIGUE_STATUS").Set<FatigueStatus>();
  cc->Outputs().Tag("WARNING_LEVEL").Set<FatigueLevel>();

  cc->Options<FatigueDetectorCalculatorOptions>();

  return absl::OkStatus();
}

absl::Status FatigueDetectorCalculator::Open(CalculatorContext* cc) {
  const auto& options = cc->Options<FatigueDetectorCalculatorOptions>();

  eye_closure_threshold_ = options.eye_closure_threshold();
  blink_frequency_threshold_ = options.blink_frequency_threshold();
  fatigue_score_threshold_ = options.fatigue_score_threshold();
  warning_interval_ms_ = options.warning_interval_ms();

  return absl::OkStatus();
}

absl::Status FatigueDetectorCalculator::Process(CalculatorContext* cc) {
  if (cc->Inputs().Tag("EYE_STATE").IsEmpty()) {
    return absl::OkStatus();
  }

  const EyeState& eye_state = cc->Inputs().Tag("EYE_STATE").Get<EyeState>();
  int64 frame_time = cc->Inputs().Tag("FRAME_TIME").Get<int64>();

  // 1. 计算疲劳分数
  float fatigue_score = CalculateFatigueScore(eye_state, frame_time);

  // 2. 更新状态
  current_status_.fatigue_score = fatigue_score;
  current_status_.level = DetermineFatigueLevel(fatigue_score);

  // 3. 发送警告
  if (current_status_.level >= FatigueLevel::MILD) {
    int64 now = frame_time;
    if (now - last_warning_time_ >= warning_interval_ms_) {
      SendWarning(cc, current_status_.level);
      last_warning_time_ = now;
    }
  }

  // 4. 输出结果
  cc->Outputs().Tag("FATIGUE_STATUS").AddPacket(
      MakePacket<FatigueStatus>(current_status_).At(cc->InputTimestamp()));
  cc->Outputs().Tag("WARNING_LEVEL").AddPacket(
      MakePacket<FatigueLevel>(current_status_.level).At(cc->InputTimestamp()));

  return absl::OkStatus();
}

float FatigueDetectorCalculator::CalculateFatigueScore(
    const EyeState& eye_state,
    int64 frame_time) {

  // 眼睛闭合度分数
  float closure_score = std::max(eye_state.left_eye_openness,
                                  eye_state.right_eye_openness);

  // 眨眼频率分数
  float blink_score = std::max(eye_state.left_blink_frequency,
                                eye_state.right_blink_frequency);

  // 疲劳分数 = 0.6 * 眼睛闭合度 + 0.4 * 眨眼频率
  float fatigue_score = 0.6f * closure_score + 0.4f * blink_score;

  return fatigue_score;
}

FatigueLevel FatigueDetectorCalculator::DetermineFatigueLevel(float fatigue_score) {
  if (fatigue_score < 0.3f) {
    return FatigueLevel::NORMAL;
  } else if (fatigue_score < 0.5f) {
    return FatigueLevel::MILD;
  } else if (fatigue_score < 0.7f) {
    return FatigueLevel::MODERATE;
  } else {
    return FatigueLevel::SEVERE;
  }
}

void FatigueDetectorCalculator::SendWarning(CalculatorContext* cc, FatigueLevel level) {
  std::string message;
  int level_int = static_cast<int>(level);

  switch (level) {
    case FatigueLevel::MILD:
      message = "Mild fatigue detected";
      break;
    case FatigueLevel::MODERATE:
      message = "Moderate fatigue detected";
      break;
    case FatigueLevel::SEVERE:
      message = "Severe fatigue detected - Please take a break!";
      break;
    default:
      message = "Fatigue warning";
  }

  cc->Outputs().Tag("NOTIFICATIONS").AddPacket(
      MakePacket<std::string>(message).At(cc->InputTimestamp()));
}

REGISTER_CALCULATOR(FatigueDetectorCalculator);

}  // namespace mediapipe

7.4 疲劳检测模型

模型选择：

模型	输入尺寸	参数量	FPS (手机)	精度	说明
BlazeFace	128×128	0.5M	30-60	95%	人脸检测
Face Mesh	192×192	10M	15-25	90%	面部关键点
Eye State	-	-	-	-	眼睛状态分析

训练数据：

WIDER FACE（人脸检测）
AFLW2000（面部关键点）
自定义疲劳数据集（眼睛状态）

训练流程：

# 训练疲劳检测模型
import tensorflow as tf

# 1. 加载模型
model = tf.keras.models.load_model('face_mesh_model.h5')

# 2. 准备数据
train_dataset = tf.data.Dataset.from_tensor_slices((images, labels))

# 3. 训练
model.fit(train_dataset, epochs=100, batch_size=32)

# 4. 转换为 TFLite
converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
tflite_model = converter.convert()

# 5. 保存
with open('fatigue_detection.tflite', 'wb') as f:
  f.write(tflite_model)

7.5 部署到 IMS

# 1. 构建镜像
docker build -t ims-dms-fatigue-detection .

# 2. 运行容器
docker run -d \
  --name ims-dms \
  --privileged \
  --net=host \
  -v /dev/video0:/dev/video0 \
  -v /path/to/models:/models \
  ims-dms-fatigue-detection

# 3. 查看日志
docker logs -f ims-dms

# 4. 性能测试
adb shell /data/local/tmp/ims-dms --test-fps

八、调试与测试

8.1 可视化调试

# 使用 MediaPipe Python API 可视化
import mediapipe as mp
import cv2
import time

mp_face_detection = mp.solutions.face_detection
mp_face_mesh = mp.solutions.face_mesh
mp_drawing = mp.solutions.drawing_utils

cap = cv2.VideoCapture(0)

with mp_face_detection.FaceDetection(
    model_selection=0, min_detection_confidence=0.5
) as face_detection:

  with mp_face_mesh.FaceMesh(
      max_num_faces=1,
      refine_landmarks=True,
      min_detection_confidence=0.5,
      min_tracking_confidence=0.5
  ) as face_mesh:

    fps = 0
    frame_count = 0
    start_time = time.time()

    while cap.isOpened():
      ret, frame = cap.read()
      if not ret:
        break

      # 记录时间
      frame_time = time.time() - start_time

      # 转换颜色空间
      image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

      # 人脸检测
      face_results = face_detection.process(image)

      # Face Mesh
      mesh_results = face_mesh.process(image)

      # 绘制结果
      if mesh_results.multi_face_landmarks:
        for face_landmarks in mesh_results.multi_face_landmarks:
          mp_drawing.draw_landmarks(
              image,
              face_landmarks,
              mp_face_mesh.FACEMESH_TESSELATION,
              mp_drawing.DrawingSpec(color=(0, 255, 0), thickness=1),
              mp_drawing.DrawingSpec(color=(0, 0, 255), thickness=1)
          )

          # 提取眼睛关键点
          left_eye = [face_landmarks.landmark[i] for i in [33, 133, 160, 158, 133]]
          right_eye = [face_landmarks.landmark[i] for i in [362, 263, 386, 384, 263]]

          # 计算眼睛张开度
          left_openness = calculate_eye_openness(left_eye)
          right_openness = calculate_eye_openness(right_eye)

          # 疲劳检测
          fatigue_score = (left_openness + right_openness) / 2

          # 显示结果
          cv2.putText(image, f'Fatigue: {fatigue_score:.2f}',
                      (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

          if fatigue_score < 0.3:
            cv2.putText(image, 'WARNING: Fatigue!',
                        (10, 70), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)

      # FPS
      frame_count += 1
      if frame_count % 30 == 0:
        fps = frame_count / (time.time() - start_time)
        frame_count = 0
        start_time = time.time()

      cv2.putText(image, f'FPS: {fps:.1f}', (10, 90),
                  cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)

      cv2.imshow('Fatigue Detection', image)

      if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

8.2 性能测试

# 1. 测试 FPS
adb shell /data/local/tmp/mediapipe_cpu \
  --calculator_graph_config_path=/sdcard/face_detection.pbtxt \
  --input_stream_path=/sdcard/test.mp4 \
  --output_stream_path=/sdcard/output.mp4

# 2. 性能分析
adb logcat | grep -E "(FPS|Latency|Memory|GPU|NNAPI)"

# 3. 帧率统计
adb shell dumpsys media.camera | grep -E "fps"

# 4. 内存使用
adb shell dumpsys meminfo com.example.mediapipe

# 5. GPU 使用率
adb shell dumpsys gpu | grep -E "GPU|Memory"

8.3 单元测试

// tflite_inference_calculator_test.cc
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "tflite_inference_calculator.h"

namespace mediapipe {

class TFLiteInferenceCalculatorTest : public ::testing::Test {
 protected:
  void SetUp() override {
    calculator_ = std::make_unique<TFLiteInferenceCalculator>();
  }

  void TearDown() override {
    calculator_.reset();
  }

  std::unique_ptr<TFLiteInferenceCalculator> calculator_;
};

TEST_F(TFLiteInferenceCalculatorTest, LoadModel) {
  absl::Status status = calculator_->Open(nullptr);
  EXPECT_TRUE(status.ok()) << status.message();
}

TEST_F(TFLiteInferenceCalculatorTest, Preprocess) {
  // 测试预处理
  ImageFrame image(8, 6, 3, ImageFormat::SRGBA);
  // ... 填充图像数据

  float* input_tensor = nullptr;
  // ... 调用 Preprocess

  // 验证结果
  EXPECT_GT(input_tensor[0], 0.0f);
  EXPECT_LT(input_tensor[0], 1.0f);
}

TEST_F(TFLiteInferenceCalculatorTest, Postprocess) {
  // 测试后处理
  float output_tensor[] = {0.1, 0.2, 0.3, 0.4, 0.9, 0.0};
  std::vector<Detection> detections = calculator_->Postprocess(output_tensor, 1);

  EXPECT_EQ(detections.size(), 1);
  EXPECT_EQ(detections[0].score(), 0.9f);
}

}  // namespace mediapipe

int main(int argc, char** argv) {
  ::testing::InitGoogleTest(&argc, argv);
  return RUN_ALL_TESTS();
}

九、总结

要点	说明
模型加载	`FlatBufferModel::BuildFromFile`
解释器创建	`InterpreterBuilder`
输入预处理	缩放、归一化、格式转换
推理执行	`interpreter_->Invoke()`
输出后处理	解析检测结果、NMS
GPU 加速	GPU Delegate、FP16 量化
NNAPI 加速	Android 原生支持
XNNPACK 加速	高效多线程执行
IMS 实战	疲劳检测全流程

系列进度： 13/55
更新时间： 2026-03-12

MediaPipe 系列 > Calculator 开发

#MediaPipe #TFLite #推理 #TensorFlow Lite

MediaPipe 系列 13：推理 Calculator——集成 TFLite 模型

https://dapalm.com/2026/03/12/MediaPipe系列13-推理Calculator：集成TFLite模型/

作者

Mars

发布于

2026年3月12日

许可协议

MediaPipe 系列 34-40：其他内置 Solution 概览上一篇

合成数据如何解决 DMS/OMS 长尾场景困境下一篇