object_occlusion_tracking_1stage.pbtxt

# MediaPipe object detection 3D with tracking graph.

# Images on GPU coming into and out of the graph.
input_stream: "input_video"
input_stream: "input_width"
input_stream: "input_height"
output_stream: "output_video"

# Crops the image from the center to the size WIDTHxHEIGHT.
node: {
  calculator: "ImageCroppingCalculator"
  input_stream: "IMAGE_GPU:input_video"
  output_stream: "IMAGE_GPU:input_video_4x3"
  input_stream: "WIDTH:input_width"
  input_stream: "HEIGHT:input_height"
  node_options: {
    [type.googleapis.com/mediapipe.ImageCroppingCalculatorOptions] {
      border_mode: BORDER_REPLICATE
    }
  }
}

# Creates a copy of the input_video stream. At the end of the graph, the
# GlAnimationOverlayCalculator will consume the input_video texture and draws
# on top of it.
node: {
  calculator: "GlScalerCalculator"
  input_stream: "VIDEO:input_video_4x3"
  output_stream: "VIDEO:input_video_copy"
}

# Resamples the images by specific frame rate. This calculator is used to
# control the frequecy of subsequent calculators/subgraphs, e.g. less power
# consumption for expensive process.
node {
  calculator: "PacketResamplerCalculator"
  input_stream: "DATA:input_video_copy"
  output_stream: "DATA:sampled_input_video"
  node_options: {
    [type.googleapis.com/mediapipe.PacketResamplerCalculatorOptions] {
      frame_rate: 5
    }
  }
}

node {
  calculator: "ObjectronDetection1StageSubgraphGpu"
  input_stream: "IMAGE_GPU:sampled_input_video"
  output_stream: "ANNOTATIONS:objects"
}

node {
  calculator: "ObjectronTracking1StageSubgraphGpu"
  input_stream: "FRAME_ANNOTATION:objects"
  input_stream: "IMAGE_GPU:input_video_copy"
  output_stream: "LIFTED_FRAME_ANNOTATION:lifted_tracked_objects"
}

# The rendering nodes:
# We are rendering two meshes: 1) a 3D bounding box, which we overlay directly
# on the texture, and 2) a shoe CAD model, which we use as an occlusion mask.
# These models are designed using different tools, so we supply a transformation
# to bring both of them to the Objectron's coordinate system.

# Creates a model matrices for the tracked object given the lifted 3D points.
# This calculator does two things: 1) Estimates object's pose (orientation,
# translation, and scale) from the 3D vertices, and
# 2) bring the object from the objectron's coordinate system to the renderer
# (OpenGL) coordinate system. Since the final goal is to render a mesh file on
# top of the object, we also supply a transformation to bring the mesh to the
# objectron's coordinate system, and rescale mesh to the unit size.
node {
  calculator: "AnnotationsToModelMatricesCalculator"
  input_stream: "ANNOTATIONS:lifted_tracked_objects"
  output_stream: "MODEL_MATRICES:model_matrices"
  node_options: {
    [type.googleapis.com/mediapipe.AnnotationsToModelMatricesCalculatorOptions] {
      # Re-scale the CAD model to the size of a unit box
      model_scale: [0.05, 0.05, 0.05]
      # Bring the box CAD model to objectron's coordinate system. This
      # is equivalent of -pi/2 rotation along the y-axis (right-hand rule):
      # Eigen::AngleAxisf(-M_PI / 2., Eigen::Vector3f::UnitY())
      model_transformation: [0.0,  0.0, -1.0,  0.0]
      model_transformation: [0.0,  1.0,  0.0,  0.0]
      model_transformation: [1.0,  0.0,  0.0,  0.0]
      model_transformation: [0.0,  0.0,  0.0,  1.0]
    }
  }
}

# Compute the model matrices for the CAD model of the chair, to be used as an
# occlusion mask. The model will be rendered at the exact same location as the
# bounding box.
node {
  calculator: "AnnotationsToModelMatricesCalculator"
  input_stream: "ANNOTATIONS:lifted_tracked_objects"
  output_stream: "MODEL_MATRICES:mask_model_matrices"
  node_options: {
    [type.googleapis.com/mediapipe.AnnotationsToModelMatricesCalculatorOptions] {
      # Re-scale the CAD model to the size of a unit box
      model_scale: [0.15, 0.1, 0.15]
      # Bring the CAD model to Deep Pursuit 3D's coordinate system. This
      # is equivalent of -pi/2 rotation along the x-axis:
      # Eigen::AngleAxisf(-M_PI / 2., Eigen::Vector3f::UnitX())
      model_transformation: [1.0,  0.0,  0.0,  0.0]
      model_transformation: [0.0,  1.0,  0.0,  -10.0]
      model_transformation: [0.0,  0.0,  -1.0,  0.0]
      model_transformation: [0.0,  0.0,  0.0,  1.0]
    }
  }
}

# Render everything together. First we render the 3D bounding box animation,
# then we render the occlusion mask.
node:{
  calculator:"GlAnimationOverlayCalculator"
  input_stream:"VIDEO:input_video_4x3"
  input_stream:"MODEL_MATRICES:model_matrices"
  input_stream:"MASK_MODEL_MATRICES:mask_model_matrices"
  output_stream:"output_video"
  input_side_packet:"TEXTURE:box_texture"
  input_side_packet:"ANIMATION_ASSET:box_asset_name"
  input_side_packet:"MASK_TEXTURE:obj_texture"
  input_side_packet:"MASK_ASSET:obj_asset_name"
  node_options: {
    [type.googleapis.com/mediapipe.GlAnimationOverlayCalculatorOptions] {
    # Output resolution is 480x640 with the aspect ratio of 0.75
      aspect_ratio: 0.75
      vertical_fov_degrees: 70.
      animation_speed_fps: 25
    }
  }
}
chromium/third_party/mediapipe/src/mediapipe/graphs/object_detection_3d/object_occlusion_tracking_1stage.pbtxt