chromium/third_party/mediapipe/src/mediapipe/modules/holistic_landmark/hand_recrop_by_roi_gpu.pbtxt

# Predicts more accurate hand location (re-crop ROI) within a given ROI.

type: "HandRecropByRoiGpu"

# GPU image. (ImageFrame)
input_stream: "IMAGE:input_video"
# ROI (region of interest) within the given image where a palm/hand is located.
# (NormalizedRect)
input_stream: "ROI:roi"

# Refined (more accurate) ROI to use for hand landmark prediction.
# (NormalizedRect)
output_stream: "HAND_ROI_FROM_RECROP:hand_roi_from_recrop_refined"

# Transforms hand ROI from the input image to a 256x256 tensor. Preserves aspect
# ratio, which results in a letterbox padding.
node {
  calculator: "ImageToTensorCalculator"
  input_stream: "IMAGE_GPU:input_video"
  input_stream: "NORM_RECT:roi"
  output_stream: "TENSORS:initial_crop_tensor"
  output_stream: "LETTERBOX_PADDING:letterbox_padding"
  options: {
    [mediapipe.ImageToTensorCalculatorOptions.ext] {
      output_tensor_width: 256
      output_tensor_height: 256
      keep_aspect_ratio: true
      output_tensor_float_range {
        min: 0.0
        max: 1.0
      }
      # For OpenGL origin should be at the top left corner.
      gpu_origin: TOP_LEFT,
    }
  }
}

# Predicts hand re-crop rectangle.
node {
  calculator: "InferenceCalculator"
  input_stream: "TENSORS:initial_crop_tensor"
  output_stream: "TENSORS:landmark_tensors"
  options: {
    [mediapipe.InferenceCalculatorOptions.ext] {
      model_path: "mediapipe/modules/holistic_landmark/hand_recrop.tflite"
    }
  }
}

# Decodes the landmark tensors into a vector of landmarks, where the landmark
# coordinates are normalized by the size of the input image to the model. Two
# landmarks represent two virtual points: crop and scale of the new crop.
node {
  calculator: "TensorsToLandmarksCalculator"
  input_stream: "TENSORS:landmark_tensors"
  output_stream: "NORM_LANDMARKS:landmarks"
  options: {
    [mediapipe.TensorsToLandmarksCalculatorOptions.ext] {
      num_landmarks: 2
      input_image_width: 256
      input_image_height: 256
    }
  }
}

# Adjusts landmarks (already normalized to [0.f, 1.f]) on the letterboxed hand
# image (after image transformation with the FIT scale mode) to the
# corresponding locations on the same image with the letterbox removed (hand
# image before image transformation).
node {
  calculator: "LandmarkLetterboxRemovalCalculator"
  input_stream: "LANDMARKS:landmarks"
  input_stream: "LETTERBOX_PADDING:letterbox_padding"
  output_stream: "LANDMARKS:scaled_landmarks"
}

# Projects the landmarks from the cropped hand image to the corresponding
# locations on the full image before cropping (input to the graph).
node {
  calculator: "LandmarkProjectionCalculator"
  input_stream: "NORM_LANDMARKS:scaled_landmarks"
  input_stream: "NORM_RECT:roi"
  output_stream: "NORM_LANDMARKS:alignment_landmarks"
}

# Converts hand landmarks to a detection that tightly encloses all landmarks.
node {
  calculator: "LandmarksToDetectionCalculator"
  input_stream: "NORM_LANDMARKS:alignment_landmarks"
  output_stream: "DETECTION:hand_detection"
}

# Extracts image size from the input images.
node {
  calculator: "ImagePropertiesCalculator"
  input_stream: "IMAGE_GPU:input_video"
  output_stream: "SIZE:image_size"
}

# Converts hand detection into a rectangle based on center and scale alignment
# points.
node {
  calculator: "AlignmentPointsRectsCalculator"
  input_stream: "DETECTION:hand_detection"
  input_stream: "IMAGE_SIZE:image_size"
  output_stream: "NORM_RECT:hand_roi_from_recrop"
  options: {
    [mediapipe.DetectionsToRectsCalculatorOptions.ext] {
      rotation_vector_start_keypoint_index: 0
      rotation_vector_end_keypoint_index: 1
      rotation_vector_target_angle_degrees: -90
    }
  }
}

# TODO: revise hand recrop roi calculation.
# Slighly moves hand re-crop rectangle from wrist towards fingertips. Due to the
# new hand cropping logic, crop border is to close to finger tips while a lot of
# space is below the wrist. And when moving hand up fast (with fingers pointing
# up) and using hand rect from the previous frame for tracking - fingertips can
# be cropped. This adjustment partially solves it, but hand cropping logic
# should be reviewed.
node {
  calculator: "RectTransformationCalculator"
  input_stream: "NORM_RECT:hand_roi_from_recrop"
  input_stream: "IMAGE_SIZE:image_size"
  output_stream: "hand_roi_from_recrop_refined"
  options: {
    [mediapipe.RectTransformationCalculatorOptions.ext] {
      scale_x: 1.0
      scale_y: 1.0
      shift_y: -0.1
      square_long: true
    }
  }
}