#include "mediapipe/framework/formats/tensor.h"
#ifdef MEDIAPIPE_TENSOR_USE_AHWB
#include <EGL/egl.h>
#include <EGL/eglext.h>
#include <cstdint>
#include <list>
#include <utility>
#include <vector>
#include "absl/base/const_init.h"
#include "absl/functional/any_invocable.h"
#include "absl/log/absl_check.h"
#include "absl/log/absl_log.h"
#include "absl/synchronization/mutex.h"
#include "mediapipe/framework/deps/no_destructor.h"
#include "mediapipe/framework/formats/hardware_buffer.h"
#include "mediapipe/gpu/gl_base.h"
#endif
namespace mediapipe {
#ifdef MEDIAPIPE_TENSOR_USE_AHWB
namespace {
PFNGLBUFFERSTORAGEEXTERNALEXTPROC glBufferStorageExternalEXT;
PFNEGLGETNATIVECLIENTBUFFERANDROIDPROC eglGetNativeClientBufferANDROID;
PFNEGLDUPNATIVEFENCEFDANDROIDPROC eglDupNativeFenceFDANDROID;
PFNEGLCREATESYNCKHRPROC eglCreateSyncKHR;
PFNEGLWAITSYNCKHRPROC eglWaitSyncKHR;
PFNEGLCLIENTWAITSYNCKHRPROC eglClientWaitSyncKHR;
PFNEGLDESTROYSYNCKHRPROC eglDestroySyncKHR;
bool IsGlSupported() {
static const bool extensions_allowed = [] {
eglGetNativeClientBufferANDROID =
reinterpret_cast<PFNEGLGETNATIVECLIENTBUFFERANDROIDPROC>(
eglGetProcAddress("eglGetNativeClientBufferANDROID"));
glBufferStorageExternalEXT =
reinterpret_cast<PFNGLBUFFERSTORAGEEXTERNALEXTPROC>(
eglGetProcAddress("glBufferStorageExternalEXT"));
eglDupNativeFenceFDANDROID =
reinterpret_cast<PFNEGLDUPNATIVEFENCEFDANDROIDPROC>(
eglGetProcAddress("eglDupNativeFenceFDANDROID"));
eglCreateSyncKHR = reinterpret_cast<PFNEGLCREATESYNCKHRPROC>(
eglGetProcAddress("eglCreateSyncKHR"));
eglWaitSyncKHR = reinterpret_cast<PFNEGLWAITSYNCKHRPROC>(
eglGetProcAddress("eglWaitSyncKHR"));
eglClientWaitSyncKHR = reinterpret_cast<PFNEGLCLIENTWAITSYNCKHRPROC>(
eglGetProcAddress("eglClientWaitSyncKHR"));
eglDestroySyncKHR = reinterpret_cast<PFNEGLDESTROYSYNCKHRPROC>(
eglGetProcAddress("eglDestroySyncKHR"));
return eglClientWaitSyncKHR && eglWaitSyncKHR &&
eglGetNativeClientBufferANDROID && glBufferStorageExternalEXT &&
eglCreateSyncKHR && eglDupNativeFenceFDANDROID && eglDestroySyncKHR;
}();
return extensions_allowed;
}
absl::Status MapAHardwareBufferToGlBuffer(AHardwareBuffer* handle,
size_t size) {
if (!IsGlSupported()) {
return absl::UnknownError(
"No GL extension functions found to bind AHardwareBuffer and "
"OpenGL buffer");
}
EGLClientBuffer native_buffer = eglGetNativeClientBufferANDROID(handle);
if (!native_buffer) {
return absl::UnknownError("Can't get native buffer");
}
glBufferStorageExternalEXT(GL_SHADER_STORAGE_BUFFER, 0, size, native_buffer,
GL_MAP_READ_BIT | GL_MAP_WRITE_BIT |
GL_MAP_COHERENT_BIT_EXT |
GL_MAP_PERSISTENT_BIT_EXT);
if (glGetError() == GL_NO_ERROR) {
return absl::OkStatus();
} else {
return absl::InternalError("Error in glBufferStorageExternalEXT");
}
}
static inline int AlignedToPowerOf2(int value, int alignment) {
return ((value - 1) | (alignment - 1)) + 1;
}
class DelayedReleaser {
public:
DelayedReleaser(const DelayedReleaser&) = delete;
DelayedReleaser& operator=(const DelayedReleaser&) = delete;
DelayedReleaser(DelayedReleaser&&) = delete;
DelayedReleaser& operator=(DelayedReleaser&&) = delete;
static void Add(std::shared_ptr<HardwareBuffer> ahwb, GLuint opengl_buffer,
EGLSyncKHR ssbo_sync, GLsync ssbo_read,
std::list<TensorAhwbUsage>&& ahwb_usages,
std::shared_ptr<mediapipe::GlContext> gl_context) {
static absl::Mutex mutex;
std::deque<std::unique_ptr<DelayedReleaser>> to_release_local;
using std::swap;
{
absl::MutexLock lock(&mutex);
swap(to_release_local, *to_release_);
}
to_release_local.emplace_back(absl::WrapUnique(
new DelayedReleaser(std::move(ahwb), opengl_buffer, ssbo_sync,
ssbo_read, std::move(ahwb_usages), gl_context)));
for (auto it = to_release_local.begin(); it != to_release_local.end();) {
if ((*it)->IsSignaled()) {
it = to_release_local.erase(it);
} else {
++it;
}
}
{
absl::MutexLock lock(&mutex);
to_release_->insert(to_release_->end(),
std::make_move_iterator(to_release_local.begin()),
std::make_move_iterator(to_release_local.end()));
to_release_local.clear();
}
}
~DelayedReleaser() { CompleteAndEraseUsages(ahwb_usages_); }
bool IsSignaled() {
bool ready = !HasIncompleteUsages(ahwb_usages_);
if (ssbo_read_ != 0) {
gl_context_->Run([this, &ready]() {
GLenum status = glClientWaitSync(ssbo_read_, 0,
0);
if (status != GL_CONDITION_SATISFIED && status != GL_ALREADY_SIGNALED) {
ready = false;
return;
}
glDeleteSync(ssbo_read_);
ssbo_read_ = 0;
});
}
if (ready && gl_context_) {
gl_context_->Run([this]() {
if (fence_sync_ != EGL_NO_SYNC_KHR && IsGlSupported()) {
auto egl_display = eglGetDisplay(EGL_DEFAULT_DISPLAY);
if (egl_display != EGL_NO_DISPLAY) {
eglDestroySyncKHR(egl_display, fence_sync_);
}
fence_sync_ = EGL_NO_SYNC_KHR;
}
glDeleteBuffers(1, &opengl_buffer_);
opengl_buffer_ = GL_INVALID_INDEX;
});
}
return ready;
}
protected:
std::shared_ptr<HardwareBuffer> ahwb_;
GLuint opengl_buffer_;
EGLSyncKHR fence_sync_;
GLsync ssbo_read_;
std::list<TensorAhwbUsage> ahwb_usages_;
std::shared_ptr<mediapipe::GlContext> gl_context_;
static inline NoDestructor<std::deque<std::unique_ptr<DelayedReleaser>>>
to_release_;
DelayedReleaser(std::shared_ptr<HardwareBuffer> ahwb, GLuint opengl_buffer,
EGLSyncKHR fence_sync, GLsync ssbo_read,
std::list<TensorAhwbUsage>&& ahwb_usages,
std::shared_ptr<mediapipe::GlContext> gl_context)
: ahwb_(std::move(ahwb)),
opengl_buffer_(opengl_buffer),
fence_sync_(fence_sync),
ssbo_read_(ssbo_read),
ahwb_usages_(std::move(ahwb_usages)),
gl_context_(gl_context) {}
};
class AhwbUsageTrack {
public:
static void Insert(uint64_t id) {
absl::MutexLock lock(&mutex_);
ahwb_usage_track_->insert(id);
}
static bool Contains(uint64_t id) {
absl::MutexLock lock(&mutex_);
return ahwb_usage_track_->contains(id);
}
private:
AhwbUsageTrack() = default;
ABSL_CONST_INIT static absl::Mutex mutex_;
static NoDestructor<absl::flat_hash_set<uint64_t>> ahwb_usage_track_
ABSL_GUARDED_BY(mutex_);
};
ABSL_CONST_INIT absl::Mutex AhwbUsageTrack::mutex_(absl::kConstInit);
NoDestructor<absl::flat_hash_set<uint64_t>> AhwbUsageTrack::ahwb_usage_track_;
}
Tensor::AHardwareBufferView Tensor::GetAHardwareBufferReadView() const {
auto lock(std::make_unique<absl::MutexLock>(&view_mutex_));
ABSL_CHECK(valid_ != kValidNone)
<< "Tensor must be written prior to read from.";
ABSL_CHECK(!(valid_ & kValidOpenGlTexture2d))
<< "Tensor conversion between OpenGL texture and AHardwareBuffer is not "
"supported.";
bool transfer = ahwb_ == nullptr;
ABSL_CHECK_OK(AllocateAHardwareBuffer())
<< "AHardwareBuffer is not supported on the target system.";
valid_ |= kValidAHardwareBuffer;
if (transfer) {
MoveCpuOrSsboToAhwb();
} else {
if (valid_ & kValidOpenGlBuffer) CreateEglSyncAndFd();
}
EraseCompletedUsages(ahwb_usages_);
ahwb_usages_.push_back(TensorAhwbUsage());
return {ahwb_.get(),
&write_complete_fence_fd_,
&ahwb_usages_.back(), std::move(lock),
false};
}
void Tensor::CreateEglSyncAndFd() const {
gl_context_->Run([this]() {
if (IsGlSupported()) {
auto egl_display = eglGetDisplay(EGL_DEFAULT_DISPLAY);
if (egl_display != EGL_NO_DISPLAY) {
fence_sync_ = eglCreateSyncKHR(egl_display,
EGL_SYNC_NATIVE_FENCE_ANDROID, nullptr);
if (fence_sync_ != EGL_NO_SYNC_KHR) {
write_complete_fence_fd_ =
UniqueFd(eglDupNativeFenceFDANDROID(egl_display, fence_sync_));
if (!write_complete_fence_fd_.IsValid()) {
eglDestroySyncKHR(egl_display, fence_sync_);
fence_sync_ = EGL_NO_SYNC_KHR;
}
}
}
}
if (fence_sync_ == EGL_NO_SYNC_KHR) glFinish();
});
}
Tensor::AHardwareBufferView Tensor::GetAHardwareBufferWriteView() const {
auto lock(std::make_unique<absl::MutexLock>(&view_mutex_));
ABSL_CHECK_OK(AllocateAHardwareBuffer())
<< "AHardwareBuffer is not supported on the target system.";
valid_ = kValidAHardwareBuffer;
EraseCompletedUsages(ahwb_usages_);
if (!ahwb_usages_.empty()) {
ABSL_LOG(DFATAL) << absl::StrFormat(
"Write attempt while reading or writing AHWB (num usages: %d).",
ahwb_usages_.size());
}
ahwb_usages_.push_back(TensorAhwbUsage());
return {ahwb_.get(),
&write_complete_fence_fd_,
&ahwb_usages_.back(), std::move(lock),
true};
}
absl::Status Tensor::AllocateAHardwareBuffer() const {
AhwbUsageTrack::Insert(ahwb_tracking_key_);
use_ahwb_ = true;
if (ahwb_ == nullptr) {
HardwareBufferSpec spec = {};
if (memory_alignment_ == 0) {
spec.width = bytes();
} else {
spec.width = AlignedToPowerOf2(bytes(), memory_alignment_);
}
spec.height = 1;
spec.layers = 1;
spec.format = HardwareBufferSpec::AHARDWAREBUFFER_FORMAT_BLOB;
spec.usage = HardwareBufferSpec::AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN |
HardwareBufferSpec::AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN |
HardwareBufferSpec::AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER;
if (hardware_buffer_pool_ == nullptr) {
MP_ASSIGN_OR_RETURN(auto new_ahwb, HardwareBuffer::Create(spec));
ahwb_ = std::make_shared<HardwareBuffer>(std::move(new_ahwb));
} else {
MP_ASSIGN_OR_RETURN(ahwb_, hardware_buffer_pool_->GetBuffer(spec));
}
}
return absl::OkStatus();
}
bool Tensor::AllocateAhwbMapToSsbo() const {
if (__builtin_available(android 26, *)) {
if (AllocateAHardwareBuffer().ok()) {
if (MapAHardwareBufferToGlBuffer(ahwb_->GetAHardwareBuffer(), bytes())
.ok()) {
glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
return true;
}
ahwb_.reset();
}
}
return false;
}
void Tensor::MoveCpuOrSsboToAhwb() const {
auto dest =
ahwb_->Lock(HardwareBufferSpec::AHARDWAREBUFFER_USAGE_CPU_WRITE_RARELY);
ABSL_CHECK_OK(dest) << "Lock of AHWB failed";
if (valid_ & kValidCpu) {
std::memcpy(*dest, cpu_buffer_, bytes());
FreeCpuBuffer();
valid_ &= ~kValidCpu;
} else if (valid_ & kValidOpenGlBuffer) {
gl_context_->Run([this, dest]() {
glBindBuffer(GL_SHADER_STORAGE_BUFFER, opengl_buffer_);
const void* src = glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, bytes(),
GL_MAP_READ_BIT);
std::memcpy(*dest, src, bytes());
glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
glDeleteBuffers(1, &opengl_buffer_);
});
opengl_buffer_ = GL_INVALID_INDEX;
gl_context_ = nullptr;
valid_ &= ~kValidOpenGlBuffer;
} else {
ABSL_LOG(FATAL) << "Can't convert tensor with mask " << valid_
<< " into AHWB.";
}
ABSL_CHECK_OK(ahwb_->Unlock()) << "Unlock of AHWB failed";
}
bool Tensor::InsertAhwbToSsboFence() const {
if (!ahwb_) return false;
if (write_complete_fence_fd_.IsValid()) {
if (!IsGlSupported()) return true;
auto egl_display = eglGetDisplay(EGL_DEFAULT_DISPLAY);
if (egl_display == EGL_NO_DISPLAY) return true;
int fd_for_egl = dup(write_complete_fence_fd_.Get());
EGLint sync_attribs[] = {EGL_SYNC_NATIVE_FENCE_FD_ANDROID,
(EGLint)fd_for_egl, EGL_NONE};
fence_sync_ = eglCreateSyncKHR(egl_display, EGL_SYNC_NATIVE_FENCE_ANDROID,
sync_attribs);
if (fence_sync_ != EGL_NO_SYNC_KHR) {
eglWaitSyncKHR(egl_display, fence_sync_, 0);
} else {
close(fd_for_egl);
}
}
return true;
}
void Tensor::MoveAhwbStuff(Tensor* src) {
hardware_buffer_pool_ = std::exchange(src->hardware_buffer_pool_, nullptr);
ahwb_ = std::exchange(src->ahwb_, nullptr);
fence_sync_ = std::exchange(src->fence_sync_, EGL_NO_SYNC_KHR);
ssbo_read_ = std::exchange(src->ssbo_read_, static_cast<GLsync>(0));
write_complete_fence_fd_ = std::move(src->write_complete_fence_fd_);
ahwb_usages_ = std::move(src->ahwb_usages_);
use_ahwb_ = std::exchange(src->use_ahwb_, false);
}
void Tensor::ReleaseAhwbStuff() {
write_complete_fence_fd_.Reset();
if (__builtin_available(android 26, *)) {
if (ahwb_) {
if (ssbo_read_ != 0 || fence_sync_ != EGL_NO_SYNC_KHR ||
HasIncompleteUsages(ahwb_usages_)) {
DelayedReleaser::Add(std::move(ahwb_), opengl_buffer_, fence_sync_,
ssbo_read_, std::move(ahwb_usages_), gl_context_);
opengl_buffer_ = GL_INVALID_INDEX;
} else {
CompleteAndEraseUsages(ahwb_usages_);
ahwb_.reset();
}
}
}
}
void* Tensor::MapAhwbToCpuRead() const {
if (ahwb_ != nullptr) {
if (!(valid_ & kValidCpu)) {
if ((valid_ & kValidOpenGlBuffer) &&
!write_complete_fence_fd_.IsValid()) {
gl_context_->Run([]() { glFinish(); });
} else if (valid_ & kValidAHardwareBuffer) {
ABSL_CHECK_GT(ahwb_usages_.size(), 0);
CompleteAndEraseUsages(ahwb_usages_);
}
}
const int fence_fd = write_complete_fence_fd_.Get();
int duplicate_fence = -1;
if (fence_fd != -1) {
duplicate_fence = dup(fence_fd);
if (duplicate_fence == -1) {
ABSL_LOG(ERROR) << "Failed to duplicate fence fd: " << fence_fd;
}
}
auto ptr =
ahwb_->Lock(HardwareBufferSpec::AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN,
duplicate_fence);
ABSL_CHECK_OK(ptr) << "Lock of AHWB failed";
write_complete_fence_fd_.Reset();
return *ptr;
}
return nullptr;
}
void* Tensor::MapAhwbToCpuWrite() const {
if (ahwb_ != nullptr) {
auto locked_ptr =
ahwb_->Lock(HardwareBufferSpec::AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN);
ABSL_CHECK_OK(locked_ptr) << "Lock of AHWB failed";
return *locked_ptr;
}
return nullptr;
}
void Tensor::TrackAhwbUsage(uint64_t source_location_hash) const {
if (ahwb_tracking_key_ == 0) {
ahwb_tracking_key_ = source_location_hash;
for (int dim : shape_.dims) {
ahwb_tracking_key_ = tensor_internal::FnvHash64(ahwb_tracking_key_, dim);
}
ahwb_tracking_key_ =
tensor_internal::FnvHash64(ahwb_tracking_key_, memory_alignment_);
}
use_ahwb_ = use_ahwb_ || AhwbUsageTrack::Contains(ahwb_tracking_key_);
}
#else
bool Tensor::AllocateAhwbMapToSsbo() const { … }
bool Tensor::InsertAhwbToSsboFence() const { … }
void Tensor::MoveAhwbStuff(Tensor* src) { … }
void Tensor::ReleaseAhwbStuff() { … }
void* Tensor::MapAhwbToCpuRead() const { … }
void* Tensor::MapAhwbToCpuWrite() const { … }
void Tensor::TrackAhwbUsage(uint64_t key) const { … }
#endif
}