#include "thread.h"
#include "sysinfo.h"
#include "estring.h"
#include <iostream>
#if defined(__ARM_NEON)
#include "../simd/arm/emulation.h"
#else
#include <xmmintrin.h>
#if defined(__EMSCRIPTEN__)
#include "../simd/wasm/emulation.h"
#endif
#endif
#if defined(PTHREADS_WIN32)
#pragma comment (lib, "pthreadVC.lib")
#endif
#if defined(__WIN32__)
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
namespace embree
{
void setAffinity(HANDLE thread, ssize_t affinity)
{
typedef WORD (WINAPI *GetActiveProcessorGroupCountFunc)();
typedef DWORD (WINAPI *GetActiveProcessorCountFunc)(WORD);
typedef BOOL (WINAPI *SetThreadGroupAffinityFunc)(HANDLE, const GROUP_AFFINITY *, PGROUP_AFFINITY);
typedef BOOL (WINAPI *SetThreadIdealProcessorExFunc)(HANDLE, PPROCESSOR_NUMBER, PPROCESSOR_NUMBER);
HMODULE hlib = LoadLibrary("Kernel32");
GetActiveProcessorGroupCountFunc pGetActiveProcessorGroupCount = (GetActiveProcessorGroupCountFunc)GetProcAddress(hlib, "GetActiveProcessorGroupCount");
GetActiveProcessorCountFunc pGetActiveProcessorCount = (GetActiveProcessorCountFunc)GetProcAddress(hlib, "GetActiveProcessorCount");
SetThreadGroupAffinityFunc pSetThreadGroupAffinity = (SetThreadGroupAffinityFunc)GetProcAddress(hlib, "SetThreadGroupAffinity");
SetThreadIdealProcessorExFunc pSetThreadIdealProcessorEx = (SetThreadIdealProcessorExFunc)GetProcAddress(hlib, "SetThreadIdealProcessorEx");
if (pGetActiveProcessorGroupCount && pGetActiveProcessorCount && pSetThreadGroupAffinity && pSetThreadIdealProcessorEx)
{
int groups = pGetActiveProcessorGroupCount();
int totalProcessors = 0, group = 0, number = 0;
for (int i = 0; i<groups; i++) {
int processors = pGetActiveProcessorCount(i);
if (totalProcessors + processors > affinity) {
group = i;
number = (int)affinity - totalProcessors;
break;
}
totalProcessors += processors;
}
GROUP_AFFINITY groupAffinity;
groupAffinity.Group = (WORD)group;
groupAffinity.Mask = (KAFFINITY)(uint64_t(1) << number);
groupAffinity.Reserved[0] = 0;
groupAffinity.Reserved[1] = 0;
groupAffinity.Reserved[2] = 0;
if (!pSetThreadGroupAffinity(thread, &groupAffinity, nullptr))
WARNING("SetThreadGroupAffinity failed");
PROCESSOR_NUMBER processorNumber;
processorNumber.Group = group;
processorNumber.Number = number;
processorNumber.Reserved = 0;
if (!pSetThreadIdealProcessorEx(thread, &processorNumber, nullptr))
WARNING("SetThreadIdealProcessorEx failed");
}
else
{
if (!SetThreadAffinityMask(thread, DWORD_PTR(uint64_t(1) << affinity)))
WARNING("SetThreadAffinityMask failed");
if (SetThreadIdealProcessor(thread, (DWORD)affinity) == (DWORD)-1)
WARNING("SetThreadIdealProcessor failed");
}
}
void setAffinity(ssize_t affinity) {
setAffinity(GetCurrentThread(), affinity);
}
struct ThreadStartupData
{
public:
ThreadStartupData (thread_func f, void* arg)
: f(f), arg(arg) {}
public:
thread_func f;
void* arg;
};
DWORD WINAPI threadStartup(LPVOID ptr)
{
ThreadStartupData* parg = (ThreadStartupData*) ptr;
_mm_setcsr(_mm_getcsr() | (1<<15) | (1<<6));
parg->f(parg->arg);
delete parg;
return 0;
}
#if !defined(PTHREADS_WIN32)
thread_t createThread(thread_func f, void* arg, size_t stack_size, ssize_t threadID)
{
HANDLE thread = CreateThread(nullptr, stack_size, threadStartup, new ThreadStartupData(f,arg), 0, nullptr);
if (thread == nullptr) FATAL("CreateThread failed");
if (threadID >= 0) setAffinity(thread, threadID);
return thread_t(thread);
}
void yield() {
SwitchToThread();
}
void join(thread_t tid) {
WaitForSingleObject(HANDLE(tid), INFINITE);
CloseHandle(HANDLE(tid));
}
void destroyThread(thread_t tid) {
TerminateThread(HANDLE(tid),0);
CloseHandle(HANDLE(tid));
}
tls_t createTls() {
return tls_t(size_t(TlsAlloc()));
}
void setTls(tls_t tls, void* const ptr) {
TlsSetValue(DWORD(size_t(tls)), ptr);
}
void* getTls(tls_t tls) {
return TlsGetValue(DWORD(size_t(tls)));
}
void destroyTls(tls_t tls) {
TlsFree(DWORD(size_t(tls)));
}
#endif
}
#endif
#if defined(__LINUX__) && !defined(__ANDROID__)
#include <fstream>
#include <sstream>
#include <algorithm>
namespace embree
{
static MutexSys mutex;
static std::vector<size_t> threadIDs;
size_t mapThreadID(size_t threadID)
{ … }
void setAffinity(ssize_t affinity)
{ … }
}
#endif
#if defined(__ANDROID__)
namespace embree
{
void setAffinity(ssize_t affinity)
{
cpu_set_t cset;
CPU_ZERO(&cset);
CPU_SET(affinity, &cset);
sched_setaffinity(0, sizeof(cset), &cset);
}
}
#endif
#if defined(__FreeBSD__)
#include <pthread_np.h>
namespace embree
{
void setAffinity(ssize_t affinity)
{
cpuset_t cset;
CPU_ZERO(&cset);
CPU_SET(affinity, &cset);
pthread_setaffinity_np(pthread_self(), sizeof(cset), &cset);
}
}
#endif
#if defined(__EMSCRIPTEN__)
namespace embree
{
void setAffinity(ssize_t affinity)
{
}
}
#endif
#if defined(__MACOSX__)
#include <mach/thread_act.h>
#include <mach/thread_policy.h>
#include <mach/mach_init.h>
namespace embree
{
void setAffinity(ssize_t affinity)
{
#if !defined(__ARM_NEON)
thread_affinity_policy ap;
ap.affinity_tag = affinity;
if (thread_policy_set(mach_thread_self(),THREAD_AFFINITY_POLICY,(thread_policy_t)&ap,THREAD_AFFINITY_POLICY_COUNT) != KERN_SUCCESS)
WARNING("setting thread affinity failed");
#endif
}
}
#endif
#if defined(__UNIX__) || defined(PTHREADS_WIN32)
#include <pthread.h>
#include <sched.h>
#if defined(__USE_NUMA__)
#include <numa.h>
#endif
namespace embree
{
struct ThreadStartupData
{ … };
static void* threadStartup(ThreadStartupData* parg)
{ … }
thread_t createThread(thread_func f, void* arg, size_t stack_size, ssize_t threadID)
{ … }
void yield() { … }
void join(thread_t tid) { … }
void destroyThread(thread_t tid) { … }
tls_t createTls()
{ … }
void* getTls(tls_t tls)
{ … }
void setTls(tls_t tls, void* const ptr)
{ … }
void destroyTls(tls_t tls)
{ … }
}
#endif