chromium/third_party/mediapipe/src/mediapipe/modules/pose_landmark/tensors_to_pose_landmarks_and_segmentation.pbtxt

# MediaPipe graph performing tensor post processing to detect/predict pose
# landmarks and segmenation mask.
#
# EXAMPLE:
#   node {
#     calculator: "TensorsToPoseLandmarksAndSegmentation"
#     input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation"
#     input_stream: "TENSORS:tensors"
#     output_stream: "LANDMARKS:landmarks"
#     output_stream: "AUXILIARY_LANDMARKS:auxiliary_landmarks"
#     output_stream: "WORLD_LANDMARKS:world_landmarks"
#     output_stream: "SEGMENTATION_MASK:segmentation_mask"
#   }

type: "TensorsToPoseLandmarksAndSegmentation"

# Whether to predict segmentation mask. If unspecified, functions as set to
# false. (bool)
input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation"

# Tensors from mode inference of
# "mediapipe/modules/pose_landmark/pose_landmark_lite|full|heavy.tflite".
# (std::vector<Tensor>)
# tensors[0]: landmarks
# tensors[1]: pose flag
# tensors[2]: segmentation
# tensors[3]: heatmap
# tensors[4]: world landmarks
input_stream: "TENSORS:tensors"

# Pose landmarks. (NormalizedLandmarkList)
# We have 33 landmarks (see pose_landmark_topology.svg) and there are other
# auxiliary key points.
# 0 - nose
# 1 - left eye (inner)
# 2 - left eye
# 3 - left eye (outer)
# 4 - right eye (inner)
# 5 - right eye
# 6 - right eye (outer)
# 7 - left ear
# 8 - right ear
# 9 - mouth (left)
# 10 - mouth (right)
# 11 - left shoulder
# 12 - right shoulder
# 13 - left elbow
# 14 - right elbow
# 15 - left wrist
# 16 - right wrist
# 17 - left pinky
# 18 - right pinky
# 19 - left index
# 20 - right index
# 21 - left thumb
# 22 - right thumb
# 23 - left hip
# 24 - right hip
# 25 - left knee
# 26 - right knee
# 27 - left ankle
# 28 - right ankle
# 29 - left heel
# 30 - right heel
# 31 - left foot index
# 32 - right foot index
#
# NOTE: If a pose is not present, for this particular timestamp there will not
# be an output packet in the LANDMARKS stream. However, the MediaPipe framework
# will internally inform the downstream calculators of the absence of this
# packet so that they don't wait for it unnecessarily.
output_stream: "LANDMARKS:landmarks"
# Auxiliary landmarks (e.g., for deriving the ROI in the subsequent image).
# (NormalizedLandmarkList)
output_stream: "AUXILIARY_LANDMARKS:auxiliary_landmarks"

# Pose world landmarks. (LandmarkList)
# World landmarks are real-world 3D coordinates in meters with the origin at the
# center between hips. WORLD_LANDMARKS shares the same landmark topology as
# LANDMARKS. However, LANDMARKS provides coordinates (in pixels) of a 3D object
# projected onto the 2D image surface, while WORLD_LANDMARKS provides
# coordinates (in meters) of the 3D object itself.
output_stream: "WORLD_LANDMARKS:world_landmarks"

# Segmentation mask. (Image)
output_stream: "SEGMENTATION_MASK:segmentation_mask"

# Splits a vector of tensors to multiple vectors according to the ranges
# specified in the option.
node {
  calculator: "SplitTensorVectorCalculator"
  input_stream: "tensors"
  output_stream: "landmark_tensor"
  output_stream: "pose_flag_tensor"
  output_stream: "segmentation_tensor"
  output_stream: "heatmap_tensor"
  output_stream: "world_landmark_tensor"
  options: {
    [mediapipe.SplitVectorCalculatorOptions.ext] {
      ranges: { begin: 0 end: 1 }
      ranges: { begin: 1 end: 2 }
      ranges: { begin: 2 end: 3 }
      ranges: { begin: 3 end: 4 }
      ranges: { begin: 4 end: 5 }
    }
  }
}

# Converts the pose-flag tensor into a float that represents the confidence
# score of pose presence.
node {
  calculator: "TensorsToFloatsCalculator"
  input_stream: "TENSORS:pose_flag_tensor"
  output_stream: "FLOAT:pose_presence_score"
}

# Applies a threshold to the confidence score to determine whether a pose is
# present.
node {
  calculator: "ThresholdingCalculator"
  input_stream: "FLOAT:pose_presence_score"
  output_stream: "FLAG:pose_presence"
  options: {
    [mediapipe.ThresholdingCalculatorOptions.ext] {
      threshold: 0.5
    }
  }
}

# Drops input tensors if pose is not present.
node {
  calculator: "GateCalculator"
  input_stream: "landmark_tensor"
  input_stream: "world_landmark_tensor"
  input_stream: "segmentation_tensor"
  input_stream: "heatmap_tensor"
  input_stream: "ALLOW:pose_presence"
  output_stream: "ensured_landmark_tensor"
  output_stream: "ensured_world_landmark_tensor"
  output_stream: "ensured_segmentation_tensor"
  output_stream: "ensured_heatmap_tensor"
}

# -----------------------------------------------------------------------------
# LANDMARKS
# -----------------------------------------------------------------------------

# Decodes the landmark tensors into a vector of landmarks, where the landmark
# coordinates are normalized by the spatial dimensions of the tensor.
node {
  calculator: "TensorsToLandmarksCalculator"
  input_stream: "TENSORS:ensured_landmark_tensor"
  output_stream: "NORM_LANDMARKS:raw_landmarks"
  options: {
    [mediapipe.TensorsToLandmarksCalculatorOptions.ext] {
      num_landmarks: 39
      input_image_width: 256
      input_image_height: 256
      visibility_activation: SIGMOID
      presence_activation: SIGMOID
    }
  }
}

# Refines landmarks with the heatmap tensor.
node {
  calculator: "RefineLandmarksFromHeatmapCalculator"
  input_stream: "NORM_LANDMARKS:raw_landmarks"
  input_stream: "TENSORS:ensured_heatmap_tensor"
  output_stream: "NORM_LANDMARKS:all_landmarks"
  options: {
    [mediapipe.RefineLandmarksFromHeatmapCalculatorOptions.ext] {
      kernel_size: 7
    }
  }
}

# Splits the landmarks into two sets: the actual pose landmarks and the
# auxiliary landmarks.
node {
  calculator: "SplitNormalizedLandmarkListCalculator"
  input_stream: "all_landmarks"
  output_stream: "landmarks"
  output_stream: "auxiliary_landmarks"
  options: {
    [mediapipe.SplitVectorCalculatorOptions.ext] {
      ranges: { begin: 0 end: 33 }
      ranges: { begin: 33 end: 35 }
    }
  }
}

# -----------------------------------------------------------------------------
# WORLD_LANDMARKS
# -----------------------------------------------------------------------------

# Decodes the world-landmark tensors into a vector of world landmarks.
node {
  calculator: "TensorsToLandmarksCalculator"
  input_stream: "TENSORS:ensured_world_landmark_tensor"
  output_stream: "LANDMARKS:all_world_landmarks"
  options: {
    [mediapipe.TensorsToLandmarksCalculatorOptions.ext] {
      num_landmarks: 39
    }
  }
}

# Keeps only the actual world landmarks.
node {
  calculator: "SplitLandmarkListCalculator"
  input_stream: "all_world_landmarks"
  output_stream: "world_landmarks_without_visibility"
  options: {
    [mediapipe.SplitVectorCalculatorOptions.ext] {
      ranges: { begin: 0 end: 33 }
    }
  }
}

# Reuses the visibility and presence field in pose landmarks for the world
# landmarks.
node {
  calculator: "VisibilityCopyCalculator"
  input_stream: "NORM_LANDMARKS_FROM:landmarks"
  input_stream: "LANDMARKS_TO:world_landmarks_without_visibility"
  output_stream: "LANDMARKS_TO:world_landmarks"
  options: {
    [mediapipe.VisibilityCopyCalculatorOptions.ext] {
      copy_visibility: true
      copy_presence: true
    }
  }
}

# -----------------------------------------------------------------------------
# SEGMENTATION_MASK
# -----------------------------------------------------------------------------

# Drops segmentation tensors if segmentation is not enabled.
node {
  calculator: "GateCalculator"
  input_side_packet: "ALLOW:enable_segmentation"
  input_stream: "ensured_segmentation_tensor"
  output_stream: "enabled_segmentation_tensor"
  options: {
    [mediapipe.GateCalculatorOptions.ext] {
      allow: false
    }
  }
}

# Decodes the segmentation tensor into a mask image with pixel values in [0, 1]
# (1 for person and 0 for background).
node {
  calculator: "TensorsToSegmentationCalculator"
  input_stream: "TENSORS:enabled_segmentation_tensor"
  output_stream: "MASK:segmentation_mask"
  options: {
    [mediapipe.TensorsToSegmentationCalculatorOptions.ext] {
      activation: SIGMOID
      gpu_origin: TOP_LEFT
    }
  }
}