# MediaPipe Objectron on GPU that produces 3D bounding boxes for objects.
type: "ObjectronGpuSubgraph"
# Input/Output streams and input side packets.
# Note that the input image is assumed to have aspect ratio 3:4 (width:height).
input_stream: "IMAGE_GPU:image"
# Allowed category labels, e.g. Footwear, Coffee cup, Mug, Chair, Camera
input_side_packet: "LABELS_CSV:allowed_labels"
# Max number of objects to detect/track. (int)
input_side_packet: "MAX_NUM_OBJECTS:max_num_objects"
# Whether landmarks on the previous image should be used to help localize
# landmarks on the current image. (bool)
input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks"
# Collection of detected 3D objects, represented as a FrameAnnotation.
output_stream: "FRAME_ANNOTATION:detected_objects"
# When the optional input side packet "use_prev_landmarks" is either absent or
# set to true, uses the landmarks on the previous image to help localize
# landmarks on the current image.
node {
calculator: "GateCalculator"
input_side_packet: "ALLOW:use_prev_landmarks"
input_stream: "prev_box_rects_from_landmarks"
output_stream: "gated_prev_box_rects_from_landmarks"
options: {
[mediapipe.GateCalculatorOptions.ext] {
allow: true
}
}
}
# Determines if an input vector of NormalizedRect has a size greater than or
# equal to the provided max_num_objects.
node {
calculator: "NormalizedRectVectorHasMinSizeCalculator"
input_stream: "ITERABLE:gated_prev_box_rects_from_landmarks"
input_side_packet: "max_num_objects"
output_stream: "prev_has_enough_objects"
}
# Drops the incoming image if BoxLandmarkSubgraph was able to identify box
# presence in the previous image. Otherwise, passes the incoming image through
# to trigger a new round of box detection in ObjectDetectionOidV4Subgraph.
node {
calculator: "GateCalculator"
input_stream: "image"
input_stream: "DISALLOW:prev_has_enough_objects"
output_stream: "detection_image"
options: {
[mediapipe.GateCalculatorOptions.ext] {
empty_packets_as_allow: true
}
}
}
# Subgraph that performs 2D object detection.
node {
calculator: "ObjectDetectionOidV4Subgraph"
input_stream: "IMAGE_GPU:detection_image"
input_side_packet: "LABELS_CSV:allowed_labels"
output_stream: "DETECTIONS:raw_detections"
}
# Makes sure there are no more detections than provided max_num_objects.
node {
calculator: "ClipDetectionVectorSizeCalculator"
input_stream: "raw_detections"
output_stream: "detections"
input_side_packet: "max_num_objects"
}
# Extracts image size from the input images.
node {
calculator: "ImagePropertiesCalculator"
input_stream: "IMAGE_GPU:image"
output_stream: "SIZE:image_size"
}
# Converts results of box detection into rectangles (normalized by image size)
# that encloses the box.
node {
calculator: "DetectionsToRectsCalculator"
input_stream: "DETECTIONS:detections"
input_stream: "IMAGE_SIZE:image_size"
output_stream: "NORM_RECTS:box_rects_from_detections"
options: {
[mediapipe.DetectionsToRectsCalculatorOptions.ext] {
output_zero_rect_for_empty_detections: false
}
}
}
# Performs association between NormalizedRect vector elements from previous
# image and rects based on object detections from the current image. This
# calculator ensures that the output box_rects vector doesn't contain
# overlapping regions based on the specified min_similarity_threshold.
node {
calculator: "AssociationNormRectCalculator"
input_stream: "box_rects_from_detections"
input_stream: "gated_prev_box_rects_from_landmarks"
output_stream: "box_rects"
options: {
[mediapipe.AssociationCalculatorOptions.ext] {
min_similarity_threshold: 0.2
}
}
}
# Outputs each element of box_rects at a fake timestamp for the rest of the
# graph to process. Clones image and image size packets for each
# single_box_rect at the fake timestamp. At the end of the loop, outputs the
# BATCH_END timestamp for downstream calculators to inform them that all
# elements in the vector have been processed.
node {
calculator: "BeginLoopNormalizedRectCalculator"
input_stream: "ITERABLE:box_rects"
input_stream: "CLONE:image"
output_stream: "ITEM:single_box_rect"
output_stream: "CLONE:landmarks_image"
output_stream: "BATCH_END:box_rects_timestamp"
}
# Subgraph that localizes box landmarks.
node {
calculator: "BoxLandmarkSubgraph"
input_stream: "IMAGE:landmarks_image"
input_stream: "NORM_RECT:single_box_rect"
output_stream: "NORM_LANDMARKS:single_box_landmarks"
}
# Collects a set of landmarks for each hand into a vector. Upon receiving the
# BATCH_END timestamp, outputs the vector of landmarks at the BATCH_END
# timestamp.
node {
calculator: "EndLoopNormalizedLandmarkListVectorCalculator"
input_stream: "ITEM:single_box_landmarks"
input_stream: "BATCH_END:box_rects_timestamp"
output_stream: "ITERABLE:multi_box_landmarks"
}
# Convert box landmarks to frame annotations.
node {
calculator: "LandmarksToFrameAnnotationCalculator"
input_stream: "MULTI_LANDMARKS:multi_box_landmarks"
output_stream: "FRAME_ANNOTATION:box_annotations"
}
# Lift the 2D landmarks to 3D using EPnP algorithm.
node {
calculator: "Lift2DFrameAnnotationTo3DCalculator"
input_stream: "FRAME_ANNOTATION:box_annotations"
output_stream: "LIFTED_FRAME_ANNOTATION:detected_objects"
options: {
[mediapipe.Lift2DFrameAnnotationTo3DCalculatorOptions.ext] {
normalized_focal_x: 2.0975
normalized_focal_y: 1.5731
}
}
}
# Get rotated rectangle from detected box.
node {
calculator: "FrameAnnotationToRectCalculator"
input_stream: "FRAME_ANNOTATION:detected_objects"
output_stream: "NORM_RECTS:box_rects_from_landmarks"
}
# Caches a box rectangle fed back from boxLandmarkSubgraph, and upon the
# arrival of the next input image sends out the cached rectangle with the
# timestamp replaced by that of the input image, essentially generating a packet
# that carries the previous box rectangle. Note that upon the arrival of the
# very first input image, an empty packet is sent out to jump start the
# feedback loop.
node {
calculator: "PreviousLoopbackCalculator"
input_stream: "MAIN:image"
input_stream: "LOOP:box_rects_from_landmarks"
input_stream_info: {
tag_index: "LOOP"
back_edge: true
}
output_stream: "PREV_LOOP:prev_box_rects_from_landmarks"
}