#include "mediapipe/framework/formats/tensor.h"
#include <algorithm>
#include <cmath>
#include <cstdint>
#include <memory>
#include <utility>
#include "absl/log/absl_check.h"
#include "absl/log/absl_log.h"
#include "absl/status/status.h"
#include "absl/strings/str_cat.h"
#include "absl/synchronization/mutex.h"
#include "mediapipe/framework/memory_manager.h"
#include "mediapipe/framework/port.h"
#include "mediapipe/framework/port/aligned_malloc_and_free.h"
#include "mediapipe/framework/port/ret_check.h"
#include "mediapipe/framework/port/status_macros.h"
#if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_30
#include "mediapipe/gpu/gl_base.h"
#endif
#ifdef MEDIAPIPE_TENSOR_USE_AHWB
#include "mediapipe/framework/formats/hardware_buffer.h"
#endif
#if MEDIAPIPE_METAL_ENABLED
#import <Metal/Metal.h>
#include <mach/mach_init.h>
#include <mach/vm_map.h>
#include "mediapipe/framework/formats/tensor_mtl_buffer_view.h"
#else
#include <cstdlib>
#endif
#if MEDIAPIPE_USE_WEBGPU
#include "mediapipe/gpu/webgpu/webgpu_utils.h"
#endif
namespace mediapipe {
bool IsPowerOfTwo(int v) { … }
int BhwcBatchFromShape(const Tensor::Shape& shape) { … }
int BhwcHeightFromShape(const Tensor::Shape& shape) { … }
int BhwcWidthFromShape(const Tensor::Shape& shape) { … }
int BhwcDepthFromShape(const Tensor::Shape& shape) { … }
#if MEDIAPIPE_METAL_ENABLED
struct MtlResources {
id<MTLCommandBuffer> command_buffer = nil;
id<MTLDevice> device = nil;
id<MTLBuffer> metal_buffer = nil;
};
namespace {
size_t AlignToPageSize(size_t size) {
auto page_size = getpagesize();
return (size + page_size - 1) / page_size * page_size;
}
void* AllocateVirtualMemory(size_t size) {
vm_address_t data;
auto error = vm_allocate(mach_task_self(), &data, AlignToPageSize(size),
VM_FLAGS_ANYWHERE);
ABSL_LOG_IF(FATAL, error != KERN_SUCCESS)
<< "Can't allocate virtual memory for Tensor.";
return reinterpret_cast<void*>(data);
}
void DeallocateVirtualMemory(void* pointer, size_t size) {
vm_deallocate(mach_task_self(), reinterpret_cast<vm_address_t>(pointer),
size);
}
}
void MtlBufferView::AllocateMtlBuffer(const Tensor& tensor,
id<MTLDevice> device) {
tensor.mtl_resources_->device = device;
if (!tensor.cpu_buffer_) {
tensor.cpu_buffer_ = AllocateVirtualMemory(tensor.bytes());
}
if (!tensor.mtl_resources_->metal_buffer) {
tensor.mtl_resources_->metal_buffer = [tensor.mtl_resources_->device
newBufferWithBytesNoCopy:tensor.cpu_buffer_
length:AlignToPageSize(tensor.bytes())
options:MTLResourceStorageModeShared |
MTLResourceCPUCacheModeDefaultCache
deallocator:^(void* pointer, NSUInteger length) {
DeallocateVirtualMemory(pointer, length);
}];
}
}
MtlBufferView MtlBufferView::GetReadView(const Tensor& tensor,
id<MTLCommandBuffer> command_buffer) {
ABSL_LOG_IF(FATAL, tensor.valid_ == Tensor::kValidNone)
<< "Tensor must be written prior to read from.";
ABSL_LOG_IF(
FATAL, !(tensor.valid_ & (Tensor::kValidCpu | Tensor::kValidMetalBuffer)))
<< "Tensor conversion between different GPU backing formats is not "
"supported yet.";
auto lock(std::make_unique<absl::MutexLock>(&tensor.view_mutex_));
tensor.valid_ |= Tensor::kValidMetalBuffer;
AllocateMtlBuffer(tensor, [command_buffer device]);
return {tensor.mtl_resources_->metal_buffer, std::move(lock)};
}
MtlBufferView MtlBufferView::GetWriteView(const Tensor& tensor,
id<MTLCommandBuffer> command_buffer) {
tensor.mtl_resources_->command_buffer = command_buffer;
return GetWriteView(tensor, [command_buffer device]);
}
MtlBufferView MtlBufferView::GetWriteView(const Tensor& tensor,
id<MTLDevice> device) {
auto lock(std::make_unique<absl::MutexLock>(&tensor.view_mutex_));
tensor.valid_ = Tensor::kValidMetalBuffer;
AllocateMtlBuffer(tensor, device);
return {tensor.mtl_resources_->metal_buffer, std::move(lock)};
}
#else
struct MtlResources { … };
#endif
#if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_30
bool Tensor::NeedsHalfFloatRenderTarget() const {
static bool has_color_buffer_float =
gl_context_->HasGlExtension("WEBGL_color_buffer_float") ||
gl_context_->HasGlExtension("EXT_color_buffer_float");
if (!has_color_buffer_float) {
static bool has_color_buffer_half_float =
gl_context_->HasGlExtension("EXT_color_buffer_half_float");
ABSL_LOG_IF(FATAL, !has_color_buffer_half_float)
<< "EXT_color_buffer_half_float or WEBGL_color_buffer_float "
<< "required on web to use MP tensor";
return true;
}
return false;
}
Tensor::OpenGlTexture2dView Tensor::GetOpenGlTexture2dReadView() const {
ABSL_LOG_IF(FATAL, valid_ == kValidNone)
<< "Tensor must be written prior to read from.";
ABSL_LOG_IF(FATAL, !(valid_ & (kValidCpu | kValidOpenGlTexture2d)))
<< "Tensor conversion between different GPU backing formats is not "
"supported yet.";
auto lock = std::make_unique<absl::MutexLock>(&view_mutex_);
AllocateOpenGlTexture2d();
if (!(valid_ & kValidOpenGlTexture2d)) {
const int padded_size =
texture_height_ * texture_width_ * 4 * element_size();
auto temp_buffer = std::make_unique<uint8_t[]>(padded_size);
uint8_t* dest_buffer = temp_buffer.get();
uint8_t* src_buffer = reinterpret_cast<uint8_t*>(cpu_buffer_);
const int num_elements = BhwcWidthFromShape(shape_) *
BhwcHeightFromShape(shape_) *
BhwcBatchFromShape(shape_);
const int actual_depth_size = BhwcDepthFromShape(shape_) * element_size();
const int padded_depth_size =
(BhwcDepthFromShape(shape_) + 3) / 4 * 4 * element_size();
for (int e = 0; e < num_elements; e++) {
std::memcpy(dest_buffer, src_buffer, actual_depth_size);
src_buffer += actual_depth_size;
dest_buffer += padded_depth_size;
}
glBindTexture(GL_TEXTURE_2D, opengl_texture2d_);
glPixelStorei(GL_UNPACK_ALIGNMENT, 4);
#ifdef __EMSCRIPTEN__
if (gl_context_->GetGlVersion() == mediapipe::GlVersion::kGLES2) {
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, texture_width_, texture_height_,
0, GL_RGBA, GL_FLOAT, temp_buffer.get());
texture_is_half_float_ = false;
} else
#endif
{
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, texture_width_, texture_height_,
GL_RGBA, GL_FLOAT, temp_buffer.get());
}
glBindTexture(GL_TEXTURE_2D, 0);
valid_ |= kValidOpenGlTexture2d;
}
return {opengl_texture2d_, std::move(lock)};
}
Tensor::OpenGlTexture2dView Tensor::GetOpenGlTexture2dWriteView() const {
auto lock = std::make_unique<absl::MutexLock>(&view_mutex_);
AllocateOpenGlTexture2d();
#ifdef __EMSCRIPTEN__
if (!texture_is_half_float_ && NeedsHalfFloatRenderTarget()) {
glBindTexture(GL_TEXTURE_2D, opengl_texture2d_);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, texture_width_, texture_height_, 0,
GL_RGBA, GL_HALF_FLOAT_OES, 0 );
glBindTexture(GL_TEXTURE_2D, 0);
texture_is_half_float_ = true;
}
#endif
valid_ = kValidOpenGlTexture2d;
return {opengl_texture2d_, std::move(lock)};
}
Tensor::OpenGlTexture2dView::Layout
Tensor::OpenGlTexture2dView::GetLayoutDimensions(const Tensor::Shape& shape,
int* width, int* height) {
static int max_size = 0;
if (max_size == 0) {
int max_texture_size;
glGetIntegerv(GL_MAX_TEXTURE_SIZE, &max_texture_size);
int max_renderbuffer_size;
glGetIntegerv(GL_MAX_RENDERBUFFER_SIZE, &max_renderbuffer_size);
int max_viewport_dims[2];
glGetIntegerv(GL_MAX_VIEWPORT_DIMS, max_viewport_dims);
max_size = std::min(std::min(max_texture_size, max_renderbuffer_size),
std::min(max_viewport_dims[0], max_viewport_dims[1]));
}
const int num_slices = (BhwcDepthFromShape(shape) + 3) / 4;
const int num_elements = BhwcBatchFromShape(shape) *
BhwcHeightFromShape(shape) *
BhwcWidthFromShape(shape);
const int num_pixels = num_slices * num_elements;
int w = BhwcWidthFromShape(shape) * num_slices;
if (w <= max_size) {
int h = (num_pixels + w - 1) / w;
if (h <= max_size) {
*width = w;
*height = h;
return Tensor::OpenGlTexture2dView::Layout::kAligned;
}
}
float power = std::log2(std::sqrt(static_cast<float>(num_pixels)));
w = 1 << static_cast<int>(power);
int h = (num_pixels + w - 1) / w;
ABSL_LOG_IF(FATAL, w > max_size || h > max_size)
<< "The tensor can't fit into OpenGL Texture2D View.";
*width = w;
*height = h;
return Tensor::OpenGlTexture2dView::Layout::kLinearized;
}
void Tensor::AllocateOpenGlTexture2d() const {
if (opengl_texture2d_ == GL_INVALID_INDEX) {
gl_context_ = mediapipe::GlContext::GetCurrent();
ABSL_LOG_IF(FATAL, !gl_context_) << "GlContext is not bound to the thread.";
glGenTextures(1, &opengl_texture2d_);
glBindTexture(GL_TEXTURE_2D, opengl_texture2d_);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
OpenGlTexture2dView::GetLayoutDimensions(shape_, &texture_width_,
&texture_height_);
if (gl_context_->GetGlVersion() != mediapipe::GlVersion::kGLES2) {
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0);
glTexStorage2D(GL_TEXTURE_2D, 1, GL_RGBA32F, texture_width_,
texture_height_);
} else {
if (!IsPowerOfTwo(texture_width_) || !IsPowerOfTwo(texture_height_)) {
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
}
static bool has_oes_extension =
gl_context_->HasGlExtension("OES_texture_float");
ABSL_LOG_IF(FATAL, !has_oes_extension)
<< "OES_texture_float extension required in order to use MP tensor "
<< "with GLES 2.0";
auto type = GL_FLOAT;
#ifdef __EMSCRIPTEN__
if (NeedsHalfFloatRenderTarget()) {
type = GL_HALF_FLOAT_OES;
texture_is_half_float_ = true;
}
#endif
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, texture_width_, texture_height_,
0, GL_RGBA, type, 0 );
}
glBindTexture(GL_TEXTURE_2D, 0);
glGenFramebuffers(1, &frame_buffer_);
}
}
#endif
#if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
Tensor::OpenGlBufferView Tensor::GetOpenGlBufferReadView() const {
ABSL_LOG_IF(FATAL, valid_ == kValidNone)
<< "Tensor must be written prior to read from.";
ABSL_LOG_IF(FATAL, !(valid_ & (kValidCpu |
#ifdef MEDIAPIPE_TENSOR_USE_AHWB
kValidAHardwareBuffer |
#endif
kValidOpenGlBuffer)))
<< "Tensor conversion between different GPU backing formats is not "
"supported yet.";
auto lock(std::make_unique<absl::MutexLock>(&view_mutex_));
if ((valid_ & kValidOpenGlBuffer) && gl_context_ != nullptr &&
!gl_context_->IsCurrent() && GlContext::IsAnyContextCurrent()) {
ABSL_LOG_FIRST_N(WARNING, 1)
<< "Tensor::GetOpenGlBufferReadView is not executed on the same GL "
"context where GL buffer was created. Note that Tensor has "
"limited synchronization support when sharing OpenGl objects "
"between multiple OpenGL contexts.";
}
AllocateOpenGlBuffer();
if (!(valid_ & kValidOpenGlBuffer)) {
if (!InsertAhwbToSsboFence()) {
glBindBuffer(GL_SHADER_STORAGE_BUFFER, opengl_buffer_);
void* ptr =
glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, bytes(),
GL_MAP_INVALIDATE_BUFFER_BIT | GL_MAP_WRITE_BIT);
ABSL_CHECK(ptr) << "glMapBufferRange failed: " << glGetError();
std::memcpy(ptr, cpu_buffer_, bytes());
glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
}
valid_ |= kValidOpenGlBuffer;
}
return {opengl_buffer_, std::move(lock),
#ifdef MEDIAPIPE_TENSOR_USE_AHWB
use_ahwb_ ? &ssbo_read_ : nullptr
#else
nullptr
#endif
};
}
Tensor::OpenGlBufferView Tensor::GetOpenGlBufferWriteView(
uint64_t source_location_hash) const {
auto lock(std::make_unique<absl::MutexLock>(&view_mutex_));
TrackAhwbUsage(source_location_hash);
if ((valid_ & kValidOpenGlBuffer) && gl_context_ != nullptr &&
!gl_context_->IsCurrent() && GlContext::IsAnyContextCurrent()) {
ABSL_LOG_FIRST_N(WARNING, 1)
<< "Tensor::GetOpenGlBufferWriteView is not executed on the same GL "
"context where GL buffer was created. Note that Tensor has "
"limited synchronization support when sharing OpenGl objects "
"between multiple OpenGL contexts.";
}
AllocateOpenGlBuffer();
valid_ = kValidOpenGlBuffer;
return {opengl_buffer_, std::move(lock), nullptr};
}
void Tensor::AllocateOpenGlBuffer() const {
if (opengl_buffer_ == GL_INVALID_INDEX) {
gl_context_ = mediapipe::GlContext::GetCurrent();
ABSL_LOG_IF(FATAL, !gl_context_) << "GlContext is not bound to the thread.";
glGenBuffers(1, &opengl_buffer_);
glBindBuffer(GL_SHADER_STORAGE_BUFFER, opengl_buffer_);
if (!use_ahwb_ || !AllocateAhwbMapToSsbo()) {
glBufferData(GL_SHADER_STORAGE_BUFFER, bytes(), NULL, GL_STREAM_COPY);
}
glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
}
}
#endif
Tensor& Tensor::operator=(Tensor&& src) { … }
Tensor::Tensor(Tensor&& src) { … }
Tensor::~Tensor() { … }
void Tensor::Move(Tensor* src) { … }
Tensor::Tensor(ElementType element_type, const Shape& shape,
MemoryManager* memory_manager, int memory_alignment)
: … { … }
Tensor::Tensor(ElementType element_type, const Shape& shape,
const QuantizationParameters& quantization_parameters,
MemoryManager* memory_manager, int memory_alignment)
: … { … }
#if MEDIAPIPE_METAL_ENABLED
void Tensor::Invalidate() {
#if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_30
GLuint cleanup_gl_tex = GL_INVALID_INDEX;
GLuint cleanup_gl_fb = GL_INVALID_INDEX;
#endif
{
absl::MutexLock lock(&view_mutex_);
if (cpu_buffer_ && !mtl_resources_->metal_buffer) {
DeallocateVirtualMemory(cpu_buffer_, AlignToPageSize(bytes()));
}
cpu_buffer_ = nullptr;
if (mtl_resources_) {
mtl_resources_->metal_buffer = nil;
mtl_resources_->command_buffer = nil;
mtl_resources_->device = nil;
}
#if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_30
std::swap(cleanup_gl_tex, opengl_texture2d_);
std::swap(cleanup_gl_fb, frame_buffer_);
#endif
}
#if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_30
if (cleanup_gl_tex != GL_INVALID_INDEX || cleanup_gl_fb != GL_INVALID_INDEX) {
gl_context_->RunWithoutWaiting([cleanup_gl_tex, cleanup_gl_fb]() {
glDeleteTextures(1, &cleanup_gl_tex);
glDeleteFramebuffers(1, &cleanup_gl_fb);
});
}
#endif
}
#else
void Tensor::Invalidate() { … }
#endif
absl::Status Tensor::ReadBackGpuToCpu() const { … }
Tensor::CpuReadView Tensor::GetCpuReadView() const { … }
Tensor::CpuWriteView Tensor::GetCpuWriteView(
uint64_t source_location_hash) const { … }
absl::Status Tensor::AllocateCpuBuffer() const { … }
void Tensor::FreeCpuBuffer() const { … }
}