// Copyright 2020 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#if defined(__ANDROID__)
#include "mediapipe/util/android/asset_manager_util.h"
#else
#include <fstream>
#include <iostream>
#endif
#include <cstdint>
#include "absl/log/absl_check.h"
#include "absl/log/absl_log.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/port/ret_check.h"
#include "mediapipe/framework/port/status.h"
#include "mediapipe/gpu/gl_calculator_helper.h"
#include "mediapipe/gpu/shader_util.h"
#include "mediapipe/graphs/object_detection_3d/calculators/gl_animation_overlay_calculator.pb.h"
#include "mediapipe/graphs/object_detection_3d/calculators/model_matrix.pb.h"
#include "mediapipe/modules/objectron/calculators/camera_parameters.pb.h"
namespace mediapipe {
namespace {
#if defined(GL_DEBUG)
#define GLCHECK(command) \
command; \
if (int err = glGetError()) ABSL_LOG(ERROR) << "GL error detected: " << err;
#else
#define GLCHECK(command) command
#endif
// For ease of use, we prefer ImageFrame on Android and GpuBuffer otherwise.
#if defined(__ANDROID__)
typedef ImageFrame AssetTextureFormat;
#else
typedef GpuBuffer AssetTextureFormat;
#endif
enum { ATTRIB_VERTEX, ATTRIB_TEXTURE_POSITION, ATTRIB_NORMAL, NUM_ATTRIBUTES };
static const int kNumMatrixEntries = 16;
// Hard-coded MVP Matrix for testing.
static const float kModelMatrix[] = {0.83704215, -0.36174262, 0.41049102, 0.0,
0.06146407, 0.8076706, 0.5864218, 0.0,
-0.54367524, -0.4656292, 0.69828844, 0.0,
0.0, 0.0, -98.64117, 1.0};
// Loads a texture from an input side packet, and streams in an animation file
// from a filename given in another input side packet, and renders the animation
// over the screen according to the input timestamp and desired animation FPS.
//
// Inputs:
// VIDEO (GpuBuffer, optional):
// If provided, the input buffer will be assumed to be unique, and will be
// consumed by this calculator and rendered to directly. The output video
// buffer will then be the released reference to the input video buffer.
// MODEL_MATRICES (TimedModelMatrixProtoList, optional):
// If provided, will set the model matrices for the objects to be rendered
// during future rendering calls.
// TEXTURE (ImageFrame on Android / GpuBuffer on iOS, semi-optional):
// Texture to use with animation file. Texture is REQUIRED to be passed into
// the calculator, but can be passed in as a Side Packet OR Input Stream.
//
// Input side packets:
// TEXTURE (ImageFrame on Android / GpuBuffer on iOS, semi-optional):
// Texture to use with animation file. Texture is REQUIRED to be passed into
// the calculator, but can be passed in as a Side Packet OR Input Stream.
// ANIMATION_ASSET (String, required):
// Path of animation file to load and render. The file format expects an
// arbitrary number of animation frames, concatenated directly together,
// with each animation frame looking like:
// HEADER
// VERTICES
// TEXTURE_COORDS
// INDICES
// The header consists of 3 int32_t lengths, the sizes of the vertex data,
// the texcoord data, and the index data, respectively. Let us call those
// N1, N2, and N3. Then we expect N1 float32's for vertex information
// (x1,y1,z1,x2,y2,z2,etc.), followed by N2 float32's for texcoord
// information (u1,v1,u2,v2,u3,v3,etc.), followed by N3 shorts/int16_t's
// for triangle indices (a1,b1,c1,a2,b2,c2,etc.).
// CAMERA_PARAMETERS_PROTO_STRING (String, optional):
// Serialized proto std::string of CameraParametersProto. We need this to
// get the right aspect ratio and field of view.
// Options:
// aspect_ratio: the ratio between the rendered image width and height.
// It will be ignored if CAMERA_PARAMETERS_PROTO_STRING input side packet
// is provided.
// vertical_fov_degrees: vertical field of view in degrees.
// It will be ignored if CAMERA_PARAMETERS_PROTO_STRING input side packet
// is provided.
// z_clipping_plane_near: near plane value for z-clipping.
// z_clipping_plane_far: far plane value for z-clipping.
// animation_speed_fps: speed at which to cycle through animation frames (in
// frames per second).
//
// Outputs:
// OUTPUT, or index 0 (GpuBuffer):
// Frames filled with the given texture.
// Simple helper-struct for containing the parsed geometry data from a 3D
// animation frame for rendering.
struct TriangleMesh {
int index_count = 0; // Needed for glDrawElements rendering call
std::unique_ptr<float[]> normals = nullptr;
std::unique_ptr<float[]> vertices = nullptr;
std::unique_ptr<float[]> texture_coords = nullptr;
std::unique_ptr<int16_t[]> triangle_indices = nullptr;
};
typedef std::unique_ptr<float[]> ModelMatrix;
} // namespace
class GlAnimationOverlayCalculator : public CalculatorBase {
public:
GlAnimationOverlayCalculator() {}
~GlAnimationOverlayCalculator();
static absl::Status GetContract(CalculatorContract *cc);
absl::Status Open(CalculatorContext *cc) override;
absl::Status Process(CalculatorContext *cc) override;
private:
bool has_video_stream_ = false;
bool has_model_matrix_stream_ = false;
bool has_mask_model_matrix_stream_ = false;
bool has_occlusion_mask_ = false;
GlCalculatorHelper helper_;
bool initialized_ = false;
GlTexture texture_;
GlTexture mask_texture_;
GLuint renderbuffer_ = 0;
bool depth_buffer_created_ = false;
GLuint program_ = 0;
GLint texture_uniform_ = -1;
GLint perspective_matrix_uniform_ = -1;
GLint model_matrix_uniform_ = -1;
std::vector<TriangleMesh> triangle_meshes_;
std::vector<TriangleMesh> mask_meshes_;
Timestamp animation_start_time_;
int frame_count_ = 0;
float animation_speed_fps_;
std::vector<ModelMatrix> current_model_matrices_;
std::vector<ModelMatrix> current_mask_model_matrices_;
// Perspective matrix for rendering, to be applied to all model matrices
// prior to passing through to the shader as a MVP matrix. Initialized during
// first image packet read.
float perspective_matrix_[kNumMatrixEntries];
void ComputeAspectRatioAndFovFromCameraParameters(
const CameraParametersProto &camera_parameters, float *aspect_ratio,
float *vertical_fov_degrees);
int GetAnimationFrameIndex(Timestamp timestamp);
absl::Status GlSetup();
absl::Status GlBind(const TriangleMesh &triangle_mesh,
const GlTexture &texture);
absl::Status GlRender(const TriangleMesh &triangle_mesh,
const float *model_matrix);
void InitializePerspectiveMatrix(float aspect_ratio,
float vertical_fov_degrees, float z_near,
float z_far);
void LoadModelMatrices(const TimedModelMatrixProtoList &model_matrices,
std::vector<ModelMatrix> *current_model_matrices);
void CalculateTriangleMeshNormals(int normals_len,
TriangleMesh *triangle_mesh);
void Normalize3f(float input[3]);
#if !defined(__ANDROID__)
// Asset loading routine for all non-Android platforms.
bool LoadAnimation(const std::string &filename);
#else
// Asset loading for all Android platforms.
bool LoadAnimationAndroid(const std::string &filename,
std::vector<TriangleMesh> *mesh);
bool ReadBytesFromAsset(AAsset *asset, void *buffer, int num_bytes_to_read);
#endif
};
REGISTER_CALCULATOR(GlAnimationOverlayCalculator);
// static
absl::Status GlAnimationOverlayCalculator::GetContract(CalculatorContract *cc) {
MP_RETURN_IF_ERROR(
GlCalculatorHelper::SetupInputSidePackets(&(cc->InputSidePackets())));
if (cc->Inputs().HasTag("VIDEO")) {
// Currently used only for size and timestamp.
cc->Inputs().Tag("VIDEO").Set<GpuBuffer>();
}
TagOrIndex(&(cc->Outputs()), "OUTPUT", 0).Set<GpuBuffer>();
if (cc->Inputs().HasTag("MODEL_MATRICES")) {
cc->Inputs().Tag("MODEL_MATRICES").Set<TimedModelMatrixProtoList>();
}
if (cc->Inputs().HasTag("MASK_MODEL_MATRICES")) {
cc->Inputs().Tag("MASK_MODEL_MATRICES").Set<TimedModelMatrixProtoList>();
}
// Must have texture as Input Stream or Side Packet
if (cc->InputSidePackets().HasTag("TEXTURE")) {
cc->InputSidePackets().Tag("TEXTURE").Set<AssetTextureFormat>();
} else {
cc->Inputs().Tag("TEXTURE").Set<AssetTextureFormat>();
}
cc->InputSidePackets().Tag("ANIMATION_ASSET").Set<std::string>();
if (cc->InputSidePackets().HasTag("CAMERA_PARAMETERS_PROTO_STRING")) {
cc->InputSidePackets()
.Tag("CAMERA_PARAMETERS_PROTO_STRING")
.Set<std::string>();
}
if (cc->InputSidePackets().HasTag("MASK_TEXTURE")) {
cc->InputSidePackets().Tag("MASK_TEXTURE").Set<AssetTextureFormat>();
}
if (cc->InputSidePackets().HasTag("MASK_ASSET")) {
cc->InputSidePackets().Tag("MASK_ASSET").Set<std::string>();
}
return absl::OkStatus();
}
void GlAnimationOverlayCalculator::CalculateTriangleMeshNormals(
int normals_len, TriangleMesh *triangle_mesh) {
// Set triangle_mesh normals for shader usage
triangle_mesh->normals.reset(new float[normals_len]);
// Used for storing the vertex normals prior to averaging
std::vector<float> vertex_normals_sum(normals_len, 0.0f);
// Compute every triangle surface normal and store them for averaging
for (int idx = 0; idx < triangle_mesh->index_count; idx += 3) {
int v_idx[3];
v_idx[0] = triangle_mesh->triangle_indices.get()[idx];
v_idx[1] = triangle_mesh->triangle_indices.get()[idx + 1];
v_idx[2] = triangle_mesh->triangle_indices.get()[idx + 2];
// (V1) vertex X,Y,Z indices in triangle_mesh.vertices
const float v1x = triangle_mesh->vertices[v_idx[0] * 3];
const float v1y = triangle_mesh->vertices[v_idx[0] * 3 + 1];
const float v1z = triangle_mesh->vertices[v_idx[0] * 3 + 2];
// (V2) vertex X,Y,Z indices in triangle_mesh.vertices
const float v2x = triangle_mesh->vertices[v_idx[1] * 3];
const float v2y = triangle_mesh->vertices[v_idx[1] * 3 + 1];
const float v2z = triangle_mesh->vertices[v_idx[1] * 3 + 2];
// (V3) vertex X,Y,Z indices in triangle_mesh.vertices
const float v3x = triangle_mesh->vertices[v_idx[2] * 3];
const float v3y = triangle_mesh->vertices[v_idx[2] * 3 + 1];
const float v3z = triangle_mesh->vertices[v_idx[2] * 3 + 2];
// Calculate normals from vertices
// V2 - V1
const float ax = v2x - v1x;
const float ay = v2y - v1y;
const float az = v2z - v1z;
// V3 - V1
const float bx = v3x - v1x;
const float by = v3y - v1y;
const float bz = v3z - v1z;
// Calculate cross product
const float normal_x = ay * bz - az * by;
const float normal_y = az * bx - ax * bz;
const float normal_z = ax * by - ay * bx;
// The normals calculated above must be normalized if we wish to prevent
// triangles with a larger surface area from dominating the normal
// calculations, however, none of our current models require this
// normalization.
// Add connected normal to each associated vertex
// It is also necessary to increment each vertex denominator for averaging
for (int i = 0; i < 3; i++) {
vertex_normals_sum[v_idx[i] * 3] += normal_x;
vertex_normals_sum[v_idx[i] * 3 + 1] += normal_y;
vertex_normals_sum[v_idx[i] * 3 + 2] += normal_z;
}
}
// Combine all triangle normals connected to each vertex by adding the X,Y,Z
// value of each adjacent triangle surface normal to every vertex and then
// averaging the combined value.
for (int idx = 0; idx < normals_len; idx += 3) {
float normal[3];
normal[0] = vertex_normals_sum[idx];
normal[1] = vertex_normals_sum[idx + 1];
normal[2] = vertex_normals_sum[idx + 2];
Normalize3f(normal);
triangle_mesh->normals.get()[idx] = normal[0];
triangle_mesh->normals.get()[idx + 1] = normal[1];
triangle_mesh->normals.get()[idx + 2] = normal[2];
}
}
void GlAnimationOverlayCalculator::Normalize3f(float input[3]) {
float product = 0.0;
product += input[0] * input[0];
product += input[1] * input[1];
product += input[2] * input[2];
float magnitude = sqrt(product);
input[0] /= magnitude;
input[1] /= magnitude;
input[2] /= magnitude;
}
// Helper function for initializing our perspective matrix.
void GlAnimationOverlayCalculator::InitializePerspectiveMatrix(
float aspect_ratio, float fov_degrees, float z_near, float z_far) {
// Standard perspective projection matrix calculations.
const float f = 1.0f / std::tan(fov_degrees * M_PI / 360.0f);
for (int i = 0; i < kNumMatrixEntries; i++) {
perspective_matrix_[i] = 0;
}
const float denom = 1.0f / (z_near - z_far);
perspective_matrix_[0] = f / aspect_ratio;
perspective_matrix_[5] = f;
perspective_matrix_[10] = (z_near + z_far) * denom;
perspective_matrix_[11] = -1.0f;
perspective_matrix_[14] = 2.0f * z_far * z_near * denom;
}
#if defined(__ANDROID__)
// Helper function for reading in a specified number of bytes from an Android
// asset. Returns true if successfully reads in all bytes into buffer.
bool GlAnimationOverlayCalculator::ReadBytesFromAsset(AAsset *asset,
void *buffer,
int num_bytes_to_read) {
// Most file systems use block sizes of 4KB or 8KB; ideally we'd choose a
// small multiple of the block size for best input streaming performance, so
// we go for a reasobably safe buffer size of 8KB = 8*1024 bytes.
static const int kMaxChunkSize = 8192;
int bytes_left = num_bytes_to_read;
int bytes_read = 1; // any value > 0 here just to start looping.
// Treat as uint8_t array so we can deal in single byte arithmetic easily.
uint8_t *currBufferIndex = reinterpret_cast<uint8_t *>(buffer);
while (bytes_read > 0 && bytes_left > 0) {
bytes_read = AAsset_read(asset, (void *)currBufferIndex,
std::min(bytes_left, kMaxChunkSize));
bytes_left -= bytes_read;
currBufferIndex += bytes_read;
}
// At least log any I/O errors encountered.
if (bytes_read < 0) {
ABSL_LOG(ERROR) << "Error reading from AAsset: " << bytes_read;
return false;
}
if (bytes_left > 0) {
// Reached EOF before reading in specified number of bytes.
ABSL_LOG(WARNING)
<< "Reached EOF before reading in specified number of bytes.";
return false;
}
return true;
}
// The below asset streaming code is Android-only, making use of the platform
// JNI helper classes AAssetManager and AAsset.
bool GlAnimationOverlayCalculator::LoadAnimationAndroid(
const std::string &filename, std::vector<TriangleMesh> *meshes) {
mediapipe::AssetManager *mediapipe_asset_manager =
Singleton<mediapipe::AssetManager>::get();
AAssetManager *asset_manager = mediapipe_asset_manager->GetAssetManager();
if (!asset_manager) {
ABSL_LOG(ERROR) << "Failed to access Android asset manager.";
return false;
}
// New read-bytes stuff here! First we open file for streaming.
AAsset *asset = AAssetManager_open(asset_manager, filename.c_str(),
AASSET_MODE_STREAMING);
if (!asset) {
ABSL_LOG(ERROR) << "Failed to open animation asset: " << filename;
return false;
}
// And now, while we are able to stream in more frames, we do so.
frame_count_ = 0;
int32_t lengths[3];
while (ReadBytesFromAsset(asset, (void *)lengths, sizeof(lengths[0]) * 3)) {
// About to start reading the next animation frame. Stream it in here.
// Each frame stores first the object counts of its three arrays
// (vertices, texture coordinates, triangle indices; respectively), and
// then stores each of those arrays as a byte dump, in order.
meshes->emplace_back();
TriangleMesh &triangle_mesh = meshes->back();
// Try to read in vertices (4-byte floats)
triangle_mesh.vertices.reset(new float[lengths[0]]);
if (!ReadBytesFromAsset(asset, (void *)triangle_mesh.vertices.get(),
sizeof(float) * lengths[0])) {
ABSL_LOG(ERROR) << "Failed to read vertices for frame " << frame_count_;
return false;
}
// Try to read in texture coordinates (4-byte floats)
triangle_mesh.texture_coords.reset(new float[lengths[1]]);
if (!ReadBytesFromAsset(asset, (void *)triangle_mesh.texture_coords.get(),
sizeof(float) * lengths[1])) {
ABSL_LOG(ERROR) << "Failed to read tex-coords for frame " << frame_count_;
return false;
}
// Try to read in indices (2-byte shorts)
triangle_mesh.index_count = lengths[2];
triangle_mesh.triangle_indices.reset(new int16_t[lengths[2]]);
if (!ReadBytesFromAsset(asset, (void *)triangle_mesh.triangle_indices.get(),
sizeof(int16_t) * lengths[2])) {
ABSL_LOG(ERROR) << "Failed to read indices for frame " << frame_count_;
return false;
}
// Set the normals for this triangle_mesh
CalculateTriangleMeshNormals(lengths[0], &triangle_mesh);
frame_count_++;
}
AAsset_close(asset);
ABSL_LOG(INFO) << "Finished parsing " << frame_count_ << " animation frames.";
if (meshes->empty()) {
ABSL_LOG(ERROR)
<< "No animation frames were parsed! Erroring out calculator.";
return false;
}
return true;
}
#else // defined(__ANDROID__)
bool GlAnimationOverlayCalculator::LoadAnimation(const std::string &filename) {
std::ifstream infile(filename.c_str(), std::ifstream::binary);
if (!infile) {
ABSL_LOG(ERROR) << "Error opening asset with filename: " << filename;
return false;
}
frame_count_ = 0;
int32_t lengths[3];
while (true) {
// See if we have more initial size counts to read in.
infile.read((char *)(lengths), sizeof(lengths[0]) * 3);
if (!infile) {
// No more frames to read. Close out.
infile.close();
break;
}
triangle_meshes_.emplace_back();
TriangleMesh &triangle_mesh = triangle_meshes_.back();
// Try to read in vertices (4-byte floats).
triangle_mesh.vertices.reset(new float[lengths[0]]);
infile.read((char *)(triangle_mesh.vertices.get()),
sizeof(float) * lengths[0]);
if (!infile) {
ABSL_LOG(ERROR) << "Failed to read vertices for frame " << frame_count_;
return false;
}
// Try to read in texture coordinates (4-byte floats)
triangle_mesh.texture_coords.reset(new float[lengths[1]]);
infile.read((char *)(triangle_mesh.texture_coords.get()),
sizeof(float) * lengths[1]);
if (!infile) {
ABSL_LOG(ERROR) << "Failed to read texture coordinates for frame "
<< frame_count_;
return false;
}
// Try to read in the triangle indices (2-byte shorts)
triangle_mesh.index_count = lengths[2];
triangle_mesh.triangle_indices.reset(new int16_t[lengths[2]]);
infile.read((char *)(triangle_mesh.triangle_indices.get()),
sizeof(int16_t) * lengths[2]);
if (!infile) {
ABSL_LOG(ERROR) << "Failed to read triangle indices for frame "
<< frame_count_;
return false;
}
// Set the normals for this triangle_mesh
CalculateTriangleMeshNormals(lengths[0], &triangle_mesh);
frame_count_++;
}
ABSL_LOG(INFO) << "Finished parsing " << frame_count_ << " animation frames.";
if (triangle_meshes_.empty()) {
ABSL_LOG(ERROR)
<< "No animation frames were parsed! Erroring out calculator.";
return false;
}
return true;
}
#endif
void GlAnimationOverlayCalculator::ComputeAspectRatioAndFovFromCameraParameters(
const CameraParametersProto &camera_parameters, float *aspect_ratio,
float *vertical_fov_degrees) {
ABSL_CHECK(aspect_ratio != nullptr);
ABSL_CHECK(vertical_fov_degrees != nullptr);
*aspect_ratio =
camera_parameters.portrait_width() / camera_parameters.portrait_height();
*vertical_fov_degrees =
std::atan(camera_parameters.portrait_height() * 0.5f) * 2 * 180 / M_PI;
}
absl::Status GlAnimationOverlayCalculator::Open(CalculatorContext *cc) {
cc->SetOffset(TimestampDiff(0));
MP_RETURN_IF_ERROR(helper_.Open(cc));
const auto &options = cc->Options<GlAnimationOverlayCalculatorOptions>();
animation_speed_fps_ = options.animation_speed_fps();
// Construct projection matrix using input side packets or option
float aspect_ratio;
float vertical_fov_degrees;
if (cc->InputSidePackets().HasTag("CAMERA_PARAMETERS_PROTO_STRING")) {
const std::string &camera_parameters_proto_string =
cc->InputSidePackets()
.Tag("CAMERA_PARAMETERS_PROTO_STRING")
.Get<std::string>();
CameraParametersProto camera_parameters_proto;
camera_parameters_proto.ParseFromString(camera_parameters_proto_string);
ComputeAspectRatioAndFovFromCameraParameters(
camera_parameters_proto, &aspect_ratio, &vertical_fov_degrees);
} else {
aspect_ratio = options.aspect_ratio();
vertical_fov_degrees = options.vertical_fov_degrees();
}
// when constructing projection matrix.
InitializePerspectiveMatrix(aspect_ratio, vertical_fov_degrees,
options.z_clipping_plane_near(),
options.z_clipping_plane_far());
// See what streams we have.
has_video_stream_ = cc->Inputs().HasTag("VIDEO");
has_model_matrix_stream_ = cc->Inputs().HasTag("MODEL_MATRICES");
has_mask_model_matrix_stream_ = cc->Inputs().HasTag("MASK_MODEL_MATRICES");
// Try to load in the animation asset in a platform-specific manner.
const std::string &asset_name =
cc->InputSidePackets().Tag("ANIMATION_ASSET").Get<std::string>();
bool loaded_animation = false;
#if defined(__ANDROID__)
if (cc->InputSidePackets().HasTag("MASK_ASSET")) {
has_occlusion_mask_ = true;
const std::string &mask_asset_name =
cc->InputSidePackets().Tag("MASK_ASSET").Get<std::string>();
loaded_animation = LoadAnimationAndroid(mask_asset_name, &mask_meshes_);
if (!loaded_animation) {
ABSL_LOG(ERROR) << "Failed to load mask asset.";
return absl::UnknownError("Failed to load mask asset.");
}
}
loaded_animation = LoadAnimationAndroid(asset_name, &triangle_meshes_);
#else
loaded_animation = LoadAnimation(asset_name);
#endif
if (!loaded_animation) {
ABSL_LOG(ERROR) << "Failed to load animation asset.";
return absl::UnknownError("Failed to load animation asset.");
}
return helper_.RunInGlContext([this, &cc]() -> absl::Status {
if (cc->InputSidePackets().HasTag("MASK_TEXTURE")) {
const auto &mask_texture =
cc->InputSidePackets().Tag("MASK_TEXTURE").Get<AssetTextureFormat>();
mask_texture_ = helper_.CreateSourceTexture(mask_texture);
}
// Load in all static texture data if it exists
if (cc->InputSidePackets().HasTag("TEXTURE")) {
const auto &input_texture =
cc->InputSidePackets().Tag("TEXTURE").Get<AssetTextureFormat>();
texture_ = helper_.CreateSourceTexture(input_texture);
}
VLOG(2) << "Input texture size: " << texture_.width() << ", "
<< texture_.height() << std::endl;
return absl::OkStatus();
});
}
int GlAnimationOverlayCalculator::GetAnimationFrameIndex(Timestamp timestamp) {
double seconds_delta = timestamp.Seconds() - animation_start_time_.Seconds();
int64_t frame_index =
static_cast<int64_t>(seconds_delta * animation_speed_fps_);
frame_index %= frame_count_;
return static_cast<int>(frame_index);
}
void GlAnimationOverlayCalculator::LoadModelMatrices(
const TimedModelMatrixProtoList &model_matrices,
std::vector<ModelMatrix> *current_model_matrices) {
current_model_matrices->clear();
for (int i = 0; i < model_matrices.model_matrix_size(); ++i) {
const auto &model_matrix = model_matrices.model_matrix(i);
ABSL_CHECK(model_matrix.matrix_entries_size() == kNumMatrixEntries)
<< "Invalid Model Matrix";
current_model_matrices->emplace_back();
ModelMatrix &new_matrix = current_model_matrices->back();
new_matrix.reset(new float[kNumMatrixEntries]);
for (int j = 0; j < kNumMatrixEntries; j++) {
// Model matrices streamed in using ROW-MAJOR format, but we want
// COLUMN-MAJOR for rendering, so we transpose here.
int col = j % 4;
int row = j / 4;
new_matrix[row + col * 4] = model_matrix.matrix_entries(j);
}
}
}
absl::Status GlAnimationOverlayCalculator::Process(CalculatorContext *cc) {
return helper_.RunInGlContext([this, &cc]() -> absl::Status {
if (!initialized_) {
MP_RETURN_IF_ERROR(GlSetup());
initialized_ = true;
animation_start_time_ = cc->InputTimestamp();
}
// Process model matrices, if any are being streamed in, and update our
// list.
current_model_matrices_.clear();
if (has_model_matrix_stream_ &&
!cc->Inputs().Tag("MODEL_MATRICES").IsEmpty()) {
const TimedModelMatrixProtoList &model_matrices =
cc->Inputs().Tag("MODEL_MATRICES").Get<TimedModelMatrixProtoList>();
LoadModelMatrices(model_matrices, ¤t_model_matrices_);
}
current_mask_model_matrices_.clear();
if (has_mask_model_matrix_stream_ &&
!cc->Inputs().Tag("MASK_MODEL_MATRICES").IsEmpty()) {
const TimedModelMatrixProtoList &model_matrices =
cc->Inputs()
.Tag("MASK_MODEL_MATRICES")
.Get<TimedModelMatrixProtoList>();
LoadModelMatrices(model_matrices, ¤t_mask_model_matrices_);
}
// Arbitrary default width and height for output destination texture, in the
// event that we don't have a valid and unique input buffer to overlay.
int width = 640;
int height = 480;
GlTexture dst;
std::unique_ptr<GpuBuffer> input_frame(nullptr);
if (has_video_stream_ && !(cc->Inputs().Tag("VIDEO").IsEmpty())) {
auto result = cc->Inputs().Tag("VIDEO").Value().Consume<GpuBuffer>();
if (result.ok()) {
input_frame = std::move(result).value();
#if !MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER
input_frame->internal_storage<GlTextureBuffer>()->Reuse();
#endif
width = input_frame->width();
height = input_frame->height();
dst = helper_.CreateSourceTexture(*input_frame);
} else {
ABSL_LOG(ERROR) << "Unable to consume input video frame for overlay!";
ABSL_LOG(ERROR) << "Status returned was: " << result.status();
dst = helper_.CreateDestinationTexture(width, height);
}
} else if (!has_video_stream_) {
dst = helper_.CreateDestinationTexture(width, height);
} else {
// We have an input video stream, but not for this frame. Don't render!
return absl::OkStatus();
}
helper_.BindFramebuffer(dst);
if (!depth_buffer_created_) {
// Create our private depth buffer.
GLCHECK(glGenRenderbuffers(1, &renderbuffer_));
GLCHECK(glBindRenderbuffer(GL_RENDERBUFFER, renderbuffer_));
GLCHECK(glRenderbufferStorage(GL_RENDERBUFFER, GL_DEPTH_COMPONENT16,
width, height));
GLCHECK(glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT,
GL_RENDERBUFFER, renderbuffer_));
GLCHECK(glBindRenderbuffer(GL_RENDERBUFFER, 0));
depth_buffer_created_ = true;
}
// Re-bind our depth renderbuffer to our FBO depth attachment here.
GLCHECK(glBindRenderbuffer(GL_RENDERBUFFER, renderbuffer_));
GLCHECK(glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT,
GL_RENDERBUFFER, renderbuffer_));
GLenum status = GLCHECK(glCheckFramebufferStatus(GL_FRAMEBUFFER));
if (status != GL_FRAMEBUFFER_COMPLETE) {
ABSL_LOG(ERROR) << "Incomplete framebuffer with status: " << status;
}
GLCHECK(glClear(GL_DEPTH_BUFFER_BIT));
if (has_occlusion_mask_) {
glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE);
const TriangleMesh &mask_frame = mask_meshes_.front();
MP_RETURN_IF_ERROR(GlBind(mask_frame, mask_texture_));
// Draw objects using our latest model matrix stream packet.
for (const ModelMatrix &model_matrix : current_mask_model_matrices_) {
MP_RETURN_IF_ERROR(GlRender(mask_frame, model_matrix.get()));
}
}
glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
int frame_index = GetAnimationFrameIndex(cc->InputTimestamp());
const TriangleMesh ¤t_frame = triangle_meshes_[frame_index];
// Load dynamic texture if it exists
if (cc->Inputs().HasTag("TEXTURE")) {
const auto &input_texture =
cc->Inputs().Tag("TEXTURE").Get<AssetTextureFormat>();
texture_ = helper_.CreateSourceTexture(input_texture);
}
MP_RETURN_IF_ERROR(GlBind(current_frame, texture_));
if (has_model_matrix_stream_) {
// Draw objects using our latest model matrix stream packet.
for (const ModelMatrix &model_matrix : current_model_matrices_) {
MP_RETURN_IF_ERROR(GlRender(current_frame, model_matrix.get()));
}
} else {
// Just draw one object to a static model matrix.
MP_RETURN_IF_ERROR(GlRender(current_frame, kModelMatrix));
}
// Disable vertex attributes
GLCHECK(glDisableVertexAttribArray(ATTRIB_VERTEX));
GLCHECK(glDisableVertexAttribArray(ATTRIB_TEXTURE_POSITION));
GLCHECK(glDisableVertexAttribArray(ATTRIB_NORMAL));
// Disable depth test
GLCHECK(glDisable(GL_DEPTH_TEST));
// Unbind texture
GLCHECK(glActiveTexture(GL_TEXTURE1));
GLCHECK(glBindTexture(texture_.target(), 0));
// Unbind depth buffer
GLCHECK(glBindRenderbuffer(GL_RENDERBUFFER, 0));
GLCHECK(glFlush());
auto output = dst.GetFrame<GpuBuffer>();
dst.Release();
TagOrIndex(&(cc->Outputs()), "OUTPUT", 0)
.Add(output.release(), cc->InputTimestamp());
GLCHECK(glFrontFace(GL_CCW));
return absl::OkStatus();
});
}
absl::Status GlAnimationOverlayCalculator::GlSetup() {
// Load vertex and fragment shaders
const GLint attr_location[NUM_ATTRIBUTES] = {
ATTRIB_VERTEX,
ATTRIB_TEXTURE_POSITION,
ATTRIB_NORMAL,
};
const GLchar *attr_name[NUM_ATTRIBUTES] = {
"position",
"texture_coordinate",
"normal",
};
const GLchar *vert_src = R"(
// Perspective projection matrix for rendering / clipping
uniform mat4 perspectiveMatrix;
// Matrix defining the currently rendered object model
uniform mat4 modelMatrix;
// vertex position in threespace
attribute vec4 position;
attribute vec3 normal;
// texture coordinate for each vertex in normalized texture space (0..1)
attribute mediump vec4 texture_coordinate;
// texture coordinate for fragment shader (will be interpolated)
varying mediump vec2 sampleCoordinate;
varying mediump vec3 vNormal;
void main() {
sampleCoordinate = texture_coordinate.xy;
mat4 mvpMatrix = perspectiveMatrix * modelMatrix;
gl_Position = mvpMatrix * position;
// TODO: Pass in rotation submatrix with no scaling or transforms to prevent
// breaking vNormal in case of model matrix having non-uniform scaling
vec4 tmpNormal = mvpMatrix * vec4(normal, 1.0);
vec4 transformedZero = mvpMatrix * vec4(0.0, 0.0, 0.0, 1.0);
tmpNormal = tmpNormal - transformedZero;
vNormal = normalize(tmpNormal.xyz);
}
)";
const GLchar *frag_src = R"(
precision mediump float;
varying vec2 sampleCoordinate; // texture coordinate (0..1)
varying vec3 vNormal;
uniform sampler2D texture; // texture to shade with
const float kPi = 3.14159265359;
// Define ambient lighting factor that is applied to our texture in order to
// generate ambient lighting of the scene on the object. Range is [0.0-1.0],
// with the factor being proportional to the brightness of the lighting in the
// scene being applied to the object
const float kAmbientLighting = 0.75;
// Define RGB values for light source
const vec3 kLightColor = vec3(0.25);
// Exponent for directional lighting that governs diffusion of surface light
const float kExponent = 1.0;
// Define direction of lighting effect source
const vec3 lightDir = vec3(0.0, -1.0, -0.6);
// Hard-coded view direction
const vec3 viewDir = vec3(0.0, 0.0, -1.0);
// DirectionalLighting procedure imported from Lullaby @ https://github.com/google/lullaby
// Calculate and return the color (diffuse and specular together) reflected by
// a directional light.
vec3 GetDirectionalLight(vec3 pos, vec3 normal, vec3 viewDir, vec3 lightDir, vec3 lightColor, float exponent) {
// Intensity of the diffuse light. Saturate to keep within the 0-1 range.
float normal_dot_light_dir = dot(-normal, -lightDir);
float intensity = clamp(normal_dot_light_dir, 0.0, 1.0);
// Calculate the diffuse light
vec3 diffuse = intensity * lightColor;
// http://www.rorydriscoll.com/2009/01/25/energy-conservation-in-games/
float kEnergyConservation = (2.0 + exponent) / (2.0 * kPi);
vec3 reflect_dir = reflect(lightDir, -normal);
// Intensity of the specular light
float view_dot_reflect = dot(-viewDir, reflect_dir);
// Use an epsilon for pow because pow(x,y) is undefined if x < 0 or x == 0
// and y <= 0 (GLSL Spec 8.2)
const float kEpsilon = 1e-5;
intensity = kEnergyConservation * pow(clamp(view_dot_reflect, kEpsilon, 1.0),
exponent);
// Specular color:
vec3 specular = intensity * lightColor;
return diffuse + specular;
}
void main() {
// Sample the texture, retrieving an rgba pixel value
vec4 pixel = texture2D(texture, sampleCoordinate);
// If the alpha (background) value is near transparent, then discard the
// pixel, this allows the rendering of transparent background GIFs
// TODO: Adding a toggle to perform pixel alpha discarding for transparent
// GIFs (prevent interference with Objectron system).
if (pixel.a < 0.2) discard;
// Generate directional lighting effect
vec3 lighting = GetDirectionalLight(gl_FragCoord.xyz, vNormal, viewDir, lightDir, kLightColor, kExponent);
// Apply both ambient and directional lighting to our texture
gl_FragColor = vec4((vec3(kAmbientLighting) + lighting) * pixel.rgb, 1.0);
}
)";
// Shader program
GLCHECK(GlhCreateProgram(vert_src, frag_src, NUM_ATTRIBUTES,
(const GLchar **)&attr_name[0], attr_location,
&program_));
RET_CHECK(program_) << "Problem initializing the program.";
texture_uniform_ = GLCHECK(glGetUniformLocation(program_, "texture"));
perspective_matrix_uniform_ =
GLCHECK(glGetUniformLocation(program_, "perspectiveMatrix"));
model_matrix_uniform_ =
GLCHECK(glGetUniformLocation(program_, "modelMatrix"));
return absl::OkStatus();
}
absl::Status GlAnimationOverlayCalculator::GlBind(
const TriangleMesh &triangle_mesh, const GlTexture &texture) {
GLCHECK(glUseProgram(program_));
// Disable backface culling to allow occlusion effects.
// Some options for solid arbitrary 3D geometry rendering
GLCHECK(glEnable(GL_BLEND));
GLCHECK(glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA));
GLCHECK(glEnable(GL_DEPTH_TEST));
GLCHECK(glFrontFace(GL_CW));
GLCHECK(glDepthMask(GL_TRUE));
GLCHECK(glDepthFunc(GL_LESS));
// Clear our depth buffer before starting draw calls
GLCHECK(glVertexAttribPointer(ATTRIB_VERTEX, 3, GL_FLOAT, 0, 0,
triangle_mesh.vertices.get()));
GLCHECK(glEnableVertexAttribArray(ATTRIB_VERTEX));
GLCHECK(glVertexAttribPointer(ATTRIB_TEXTURE_POSITION, 2, GL_FLOAT, 0, 0,
triangle_mesh.texture_coords.get()));
GLCHECK(glEnableVertexAttribArray(ATTRIB_TEXTURE_POSITION));
GLCHECK(glVertexAttribPointer(ATTRIB_NORMAL, 3, GL_FLOAT, 0, 0,
triangle_mesh.normals.get()));
GLCHECK(glEnableVertexAttribArray(ATTRIB_NORMAL));
GLCHECK(glActiveTexture(GL_TEXTURE1));
GLCHECK(glBindTexture(texture.target(), texture.name()));
// We previously bound it to GL_TEXTURE1
GLCHECK(glUniform1i(texture_uniform_, 1));
GLCHECK(glUniformMatrix4fv(perspective_matrix_uniform_, 1, GL_FALSE,
perspective_matrix_));
return absl::OkStatus();
}
absl::Status GlAnimationOverlayCalculator::GlRender(
const TriangleMesh &triangle_mesh, const float *model_matrix) {
GLCHECK(glUniformMatrix4fv(model_matrix_uniform_, 1, GL_FALSE, model_matrix));
GLCHECK(glDrawElements(GL_TRIANGLES, triangle_mesh.index_count,
GL_UNSIGNED_SHORT,
triangle_mesh.triangle_indices.get()));
return absl::OkStatus();
}
GlAnimationOverlayCalculator::~GlAnimationOverlayCalculator() {
helper_.RunInGlContext([this] {
if (program_) {
GLCHECK(glDeleteProgram(program_));
program_ = 0;
}
if (depth_buffer_created_) {
GLCHECK(glDeleteRenderbuffers(1, &renderbuffer_));
renderbuffer_ = 0;
}
if (texture_.width() > 0) {
texture_.Release();
}
if (mask_texture_.width() > 0) {
mask_texture_.Release();
}
});
}
} // namespace mediapipe