#ifndef GEMMLOWP_PROFILING_INSTRUMENTATION_H_
#define GEMMLOWP_PROFILING_INSTRUMENTATION_H_
#include <cstdio>
#ifndef GEMMLOWP_USE_STLPORT
#include <cstdint>
#else
#include <stdint.h>
namespace std {
using ::int16_t;
using ::int32_t;
using ::int8_t;
using ::size_t;
using ::uint16_t;
using ::uint32_t;
using ::uint8_t;
using ::uintptr_t;
}
#endif
#include <algorithm>
#include <cassert>
#include <cstdlib>
#ifdef GEMMLOWP_PROFILING
#include <cstring>
#include <set>
#endif
#include "./pthread_everywhere.h"
namespace gemmlowp {
inline void ReleaseBuildAssertion(bool condition, const char* msg) { … }
class Mutex { … };
class GlobalMutexes { … };
struct ScopedLock { … };
#ifdef GEMMLOWP_PROFILING
struct ProfilingStack {
static const std::size_t kMaxSize = 30;
typedef const char* LabelsArrayType[kMaxSize];
LabelsArrayType labels;
std::size_t size;
Mutex* lock;
ProfilingStack() { memset(this, 0, sizeof(ProfilingStack)); }
~ProfilingStack() { delete lock; }
void Push(const char* label) {
ScopedLock sl(lock);
ReleaseBuildAssertion(size < kMaxSize, "ProfilingStack overflow");
labels[size] = label;
size++;
}
void Pop() {
ScopedLock sl(lock);
ReleaseBuildAssertion(size > 0, "ProfilingStack underflow");
size--;
}
void UpdateTop(const char* new_label) {
ScopedLock sl(lock);
assert(size);
labels[size - 1] = new_label;
}
ProfilingStack& operator=(const ProfilingStack& other) {
memcpy(this, &other, sizeof(ProfilingStack));
return *this;
}
bool operator==(const ProfilingStack& other) const {
return !memcmp(this, &other, sizeof(ProfilingStack));
}
};
static_assert(
!(sizeof(ProfilingStack) & (sizeof(ProfilingStack) - 1)),
"ProfilingStack should have power-of-two size to fit in cache lines");
struct ThreadInfo;
inline std::set<ThreadInfo*>& ThreadsUnderProfiling() {
static std::set<ThreadInfo*> v;
return v;
}
struct ThreadInfo {
pthread_key_t key;
ProfilingStack stack;
ThreadInfo() {
pthread_key_create(&key, ThreadExitCallback);
pthread_setspecific(key, this);
stack.lock = new Mutex();
}
static void ThreadExitCallback(void* ptr) {
ScopedLock sl(GlobalMutexes::Profiler());
ThreadInfo* self = static_cast<ThreadInfo*>(ptr);
ThreadsUnderProfiling().erase(self);
}
};
inline ThreadInfo& ThreadLocalThreadInfo() {
static pthread_key_t key;
static auto DeleteThreadInfo = [](void* threadInfoPtr) {
ThreadInfo* threadInfo = static_cast<ThreadInfo*>(threadInfoPtr);
if (threadInfo) {
delete threadInfo;
}
};
static const int key_result = pthread_key_create(&key, DeleteThreadInfo);
(void)key_result;
ThreadInfo* threadInfo = static_cast<ThreadInfo*>(pthread_getspecific(key));
if (!threadInfo) {
threadInfo = new ThreadInfo();
pthread_setspecific(key, threadInfo);
}
return *threadInfo;
}
class ScopedProfilingLabel {
ProfilingStack* profiling_stack_;
public:
explicit ScopedProfilingLabel(const char* label)
: profiling_stack_(&ThreadLocalThreadInfo().stack) {
profiling_stack_->Push(label);
}
~ScopedProfilingLabel() { profiling_stack_->Pop(); }
void Update(const char* new_label) { profiling_stack_->UpdateTop(new_label); }
};
inline void RegisterCurrentThreadForProfiling() {
ScopedLock sl(GlobalMutexes::Profiler());
ThreadsUnderProfiling().insert(&ThreadLocalThreadInfo());
}
#else
struct ScopedProfilingLabel { … };
inline void RegisterCurrentThreadForProfiling() { … }
#endif
}
#endif