#pragma once
#if (MANIFOLD_PAR == 1)
#include <tbb/combinable.h>
#include <tbb/parallel_for.h>
#include <tbb/parallel_invoke.h>
#include <tbb/parallel_reduce.h>
#include <tbb/parallel_scan.h>
#endif
#include <algorithm>
#include <numeric>
#include "manifold/iters.h"
namespace manifold {
enum class ExecutionPolicy { … };
constexpr size_t kSeqThreshold = …;
inline constexpr ExecutionPolicy autoPolicy(size_t size,
size_t threshold = kSeqThreshold) { … }
template <typename Iter,
typename Dummy = std::enable_if_t<!std::is_integral_v<Iter>>>
inline constexpr ExecutionPolicy autoPolicy(Iter first, Iter last,
size_t threshold = kSeqThreshold) { … }
template <typename InputIter, typename OutputIter>
void copy(ExecutionPolicy policy, InputIter first, InputIter last,
OutputIter d_first);
template <typename InputIter, typename OutputIter>
void copy(InputIter first, InputIter last, OutputIter d_first);
#if (MANIFOLD_PAR == 1)
namespace details {
using manifold::kSeqThreshold;
template <typename SrcIter, typename DestIter, typename Comp>
void mergeRec(SrcIter src, DestIter dest, size_t p1, size_t r1, size_t p2,
size_t r2, size_t p3, Comp comp) {
size_t length1 = r1 - p1;
size_t length2 = r2 - p2;
if (length1 < length2) {
std::swap(p1, p2);
std::swap(r1, r2);
std::swap(length1, length2);
}
if (length1 == 0) return;
if (length1 + length2 <= kSeqThreshold) {
std::merge(src + p1, src + r1, src + p2, src + r2, dest + p3, comp);
} else {
size_t q1 = p1 + length1 / 2;
size_t q2 =
std::distance(src, std::lower_bound(src + p2, src + r2, src[q1], comp));
size_t q3 = p3 + (q1 - p1) + (q2 - p2);
dest[q3] = src[q1];
tbb::parallel_invoke(
[=] { mergeRec(src, dest, p1, q1, p2, q2, p3, comp); },
[=] { mergeRec(src, dest, q1 + 1, r1, q2, r2, q3 + 1, comp); });
}
}
template <typename SrcIter, typename DestIter, typename Comp>
void mergeSortRec(SrcIter src, DestIter dest, size_t begin, size_t end,
Comp comp) {
size_t numElements = end - begin;
if (numElements <= kSeqThreshold) {
std::copy(src + begin, src + end, dest + begin);
std::stable_sort(dest + begin, dest + end, comp);
} else {
size_t middle = begin + numElements / 2;
tbb::parallel_invoke([=] { mergeSortRec(dest, src, begin, middle, comp); },
[=] { mergeSortRec(dest, src, middle, end, comp); });
mergeRec(src, dest, begin, middle, middle, end, begin, comp);
}
}
template <typename T, typename InputIter, typename OutputIter, typename BinOp>
struct ScanBody {
T sum;
T identity;
BinOp &f;
InputIter input;
OutputIter output;
ScanBody(T sum, T identity, BinOp &f, InputIter input, OutputIter output)
: sum(sum), identity(identity), f(f), input(input), output(output) {}
ScanBody(ScanBody &b, tbb::split)
: sum(b.identity),
identity(b.identity),
f(b.f),
input(b.input),
output(b.output) {}
template <typename Tag>
void operator()(const tbb::blocked_range<size_t> &r, Tag) {
T temp = sum;
for (size_t i = r.begin(); i < r.end(); ++i) {
T inputTmp = input[i];
if (Tag::is_final_scan()) output[i] = temp;
temp = f(temp, inputTmp);
}
sum = temp;
}
T get_sum() const { return sum; }
void reverse_join(ScanBody &a) { sum = f(a.sum, sum); }
void assign(ScanBody &b) { sum = b.sum; }
};
template <typename InputIter, typename OutputIter, typename P>
struct CopyIfScanBody {
size_t sum;
P &pred;
InputIter input;
OutputIter output;
CopyIfScanBody(P &pred, InputIter input, OutputIter output)
: sum(0), pred(pred), input(input), output(output) {}
CopyIfScanBody(CopyIfScanBody &b, tbb::split)
: sum(0), pred(b.pred), input(b.input), output(b.output) {}
template <typename Tag>
void operator()(const tbb::blocked_range<size_t> &r, Tag) {
size_t temp = sum;
for (size_t i = r.begin(); i < r.end(); ++i) {
if (pred(i)) {
temp += 1;
if (Tag::is_final_scan()) output[temp - 1] = input[i];
}
}
sum = temp;
}
size_t get_sum() const { return sum; }
void reverse_join(CopyIfScanBody &a) { sum = a.sum + sum; }
void assign(CopyIfScanBody &b) { sum = b.sum; }
};
template <typename N, const int K>
struct Hist {
using SizeType = N;
static constexpr int k = K;
N hist[k][256] = {{0}};
void merge(const Hist<N, K> &other) {
for (int i = 0; i < k; ++i)
for (int j = 0; j < 256; ++j) hist[i][j] += other.hist[i][j];
}
void prefixSum(N total, bool *canSkip) {
for (int i = 0; i < k; ++i) {
size_t count = 0;
for (int j = 0; j < 256; ++j) {
N tmp = hist[i][j];
hist[i][j] = count;
count += tmp;
if (tmp == total) canSkip[i] = true;
}
}
}
};
template <typename T, typename H>
void histogram(T *ptr, typename H::SizeType n, H &hist) {
auto worker = [](T *ptr, typename H::SizeType n, H &hist) {
for (typename H::SizeType i = 0; i < n; ++i)
for (int k = 0; k < hist.k; ++k)
++hist.hist[k][(ptr[i] >> (8 * k)) & 0xFF];
};
if (n < kSeqThreshold) {
worker(ptr, n, hist);
} else {
tbb::combinable<H> store;
tbb::parallel_for(
tbb::blocked_range<typename H::SizeType>(0, n, kSeqThreshold),
[&worker, &store, ptr](const auto &r) {
worker(ptr + r.begin(), r.end() - r.begin(), store.local());
});
store.combine_each([&hist](const H &h) { hist.merge(h); });
}
}
template <typename T, typename H>
void shuffle(T *src, T *target, typename H::SizeType n, H &hist, int k) {
for (typename H::SizeType i = 0; i < n; ++i)
target[hist.hist[k][(src[i] >> (8 * k)) & 0xFF]++] = src[i];
}
template <typename T, typename SizeType>
bool LSB_radix_sort(T *input, T *tmp, SizeType n) {
Hist<SizeType, sizeof(T) / sizeof(char)> hist;
if (std::is_sorted(input, input + n)) return false;
histogram(input, n, hist);
bool canSkip[hist.k] = {0};
hist.prefixSum(n, canSkip);
T *a = input, *b = tmp;
for (int k = 0; k < hist.k; ++k) {
if (!canSkip[k]) {
shuffle(a, b, n, hist, k);
std::swap(a, b);
}
}
return a == tmp;
}
template <typename T, typename SizeType>
struct SortedRange {
T *input, *tmp;
SizeType offset = 0, length = 0;
bool inTmp = false;
SortedRange(T *input, T *tmp, SizeType offset = 0, SizeType length = 0)
: input(input), tmp(tmp), offset(offset), length(length) {}
SortedRange(SortedRange<T, SizeType> &r, tbb::split)
: input(r.input), tmp(r.tmp) {}
#if defined(__has_feature)
#if __has_feature(thread_sanitizer)
__attribute__((no_sanitize("thread")))
#endif
#endif
void
operator()(const tbb::blocked_range<SizeType> &range) {
SortedRange<T, SizeType> rhs(input, tmp, range.begin(),
range.end() - range.begin());
rhs.inTmp =
LSB_radix_sort(input + rhs.offset, tmp + rhs.offset, rhs.length);
if (length == 0)
*this = rhs;
else
join(rhs);
}
bool swapBuffer() const {
T *src = input, *target = tmp;
if (inTmp) std::swap(src, target);
copy(src + offset, src + offset + length, target + offset);
return !inTmp;
}
void join(const SortedRange<T, SizeType> &rhs) {
if (inTmp != rhs.inTmp) {
if (length < rhs.length)
inTmp = swapBuffer();
else
rhs.swapBuffer();
}
T *src = input, *target = tmp;
if (inTmp) std::swap(src, target);
if (src[offset + length - 1] > src[rhs.offset]) {
mergeRec(src, target, offset, offset + length, rhs.offset,
rhs.offset + rhs.length, offset, std::less<T>());
inTmp = !inTmp;
}
length += rhs.length;
}
};
template <typename T, typename SizeTy>
void radix_sort(T *input, SizeTy n) {
T *aux = new T[n];
SizeTy blockSize = std::max(n / tbb::this_task_arena::max_concurrency() / 4,
static_cast<SizeTy>(kSeqThreshold / sizeof(T)));
SortedRange<T, SizeTy> result(input, aux);
tbb::parallel_reduce(tbb::blocked_range<SizeTy>(0, n, blockSize), result);
if (result.inTmp) copy(aux, aux + n, input);
delete[] aux;
}
template <typename Iterator,
typename T = typename std::iterator_traits<Iterator>::value_type,
typename Comp = decltype(std::less<T>())>
void mergeSort(ExecutionPolicy policy, Iterator first, Iterator last,
Comp comp) {
#if (MANIFOLD_PAR == 1)
if (policy == ExecutionPolicy::Par) {
tbb::this_task_arena::isolate([&] {
size_t length = std::distance(first, last);
T *tmp = new T[length];
copy(policy, first, last, tmp);
details::mergeSortRec(tmp, first, 0, length, comp);
delete[] tmp;
});
return;
}
#endif
std::stable_sort(first, last, comp);
}
template <typename Iterator,
typename T = typename std::iterator_traits<Iterator>::value_type,
typename Dummy = void>
struct SortFunctor {
void operator()(ExecutionPolicy policy, Iterator first, Iterator last) {
static_assert(
std::is_convertible_v<
typename std::iterator_traits<Iterator>::iterator_category,
std::random_access_iterator_tag>,
"You can only parallelize RandomAccessIterator.");
static_assert(std::is_trivially_destructible_v<T>,
"Our simple implementation does not support types that are "
"not trivially destructable.");
return mergeSort(policy, first, last, std::less<T>());
}
};
template <typename Iterator, typename T>
struct SortFunctor<
Iterator, T,
std::enable_if_t<
std::is_integral_v<T> &&
std::is_pointer_v<typename std::iterator_traits<Iterator>::pointer>>> {
void operator()(ExecutionPolicy policy, Iterator first, Iterator last) {
static_assert(
std::is_convertible_v<
typename std::iterator_traits<Iterator>::iterator_category,
std::random_access_iterator_tag>,
"You can only parallelize RandomAccessIterator.");
static_assert(std::is_trivially_destructible_v<T>,
"Our simple implementation does not support types that are "
"not trivially destructable.");
#if (MANIFOLD_PAR == 1)
if (policy == ExecutionPolicy::Par) {
radix_sort(&*first, static_cast<size_t>(std::distance(first, last)));
return;
}
#endif
stable_sort(policy, first, last, std::less<T>());
}
};
}
#endif
template <typename Iter, typename F>
void for_each(ExecutionPolicy policy, Iter first, Iter last, F f) { … }
template <typename Iter, typename F>
void for_each_n(ExecutionPolicy policy, Iter first, size_t n, F f) { … }
template <typename InputIter, typename BinaryOp,
typename T = typename std::iterator_traits<InputIter>::value_type>
T reduce(ExecutionPolicy policy, InputIter first, InputIter last, T init,
BinaryOp f) { … }
template <typename InputIter, typename BinaryOp,
typename T = typename std::iterator_traits<InputIter>::value_type>
T reduce(InputIter first, InputIter last, T init, BinaryOp f) { … }
template <typename InputIter, typename BinaryOp, typename UnaryOp,
typename T = std::invoke_result_t<
UnaryOp, typename std::iterator_traits<InputIter>::value_type>>
T transform_reduce(ExecutionPolicy policy, InputIter first, InputIter last,
T init, BinaryOp f, UnaryOp g) { … }
template <typename InputIter, typename BinaryOp, typename UnaryOp,
typename T = std::invoke_result_t<
UnaryOp, typename std::iterator_traits<InputIter>::value_type>>
T transform_reduce(InputIter first, InputIter last, T init, BinaryOp f,
UnaryOp g) { … }
template <typename InputIter, typename OutputIter,
typename T = typename std::iterator_traits<InputIter>::value_type>
void inclusive_scan(ExecutionPolicy policy, InputIter first, InputIter last,
OutputIter d_first) { … }
template <typename InputIter, typename OutputIter,
typename T = typename std::iterator_traits<InputIter>::value_type>
void inclusive_scan(InputIter first, InputIter last, OutputIter d_first) { … }
template <typename InputIter, typename OutputIter,
typename BinOp = decltype(std::plus<typename std::iterator_traits<
InputIter>::value_type>()),
typename T = typename std::iterator_traits<InputIter>::value_type>
void exclusive_scan(ExecutionPolicy policy, InputIter first, InputIter last,
OutputIter d_first, T init = static_cast<T>(0),
BinOp f = std::plus<T>(), T identity = static_cast<T>(0)) { … }
template <typename InputIter, typename OutputIter,
typename BinOp = decltype(std::plus<typename std::iterator_traits<
InputIter>::value_type>()),
typename T = typename std::iterator_traits<InputIter>::value_type>
void exclusive_scan(InputIter first, InputIter last, OutputIter d_first,
T init = static_cast<T>(0), BinOp f = std::plus<T>(),
T identity = static_cast<T>(0)) { … }
template <typename InputIter, typename OutputIter, typename F>
void transform(ExecutionPolicy policy, InputIter first, InputIter last,
OutputIter d_first, F f) { … }
template <typename InputIter, typename OutputIter, typename F>
void transform(InputIter first, InputIter last, OutputIter d_first, F f) { … }
template <typename InputIter, typename OutputIter>
void copy(ExecutionPolicy policy, InputIter first, InputIter last,
OutputIter d_first) { … }
template <typename InputIter, typename OutputIter>
void copy(InputIter first, InputIter last, OutputIter d_first) { … }
template <typename InputIter, typename OutputIter>
void copy_n(ExecutionPolicy policy, InputIter first, size_t n,
OutputIter d_first) { … }
template <typename InputIter, typename OutputIter>
void copy_n(InputIter first, size_t n, OutputIter d_first) { … }
template <typename OutputIter, typename T>
void fill(ExecutionPolicy policy, OutputIter first, OutputIter last, T value) { … }
template <typename OutputIter, typename T>
void fill(OutputIter first, OutputIter last, T value) { … }
template <typename InputIter, typename P>
size_t count_if(ExecutionPolicy policy, InputIter first, InputIter last,
P pred) { … }
template <typename InputIter, typename P>
size_t count_if(InputIter first, InputIter last, P pred) { … }
template <typename InputIter, typename P>
bool all_of(ExecutionPolicy policy, InputIter first, InputIter last, P pred) { … }
template <typename InputIter, typename P>
bool all_of(InputIter first, InputIter last, P pred) { … }
template <typename InputIter, typename OutputIter, typename P>
OutputIter copy_if(ExecutionPolicy policy, InputIter first, InputIter last,
OutputIter d_first, P pred) { … }
template <typename InputIter, typename OutputIter, typename P>
OutputIter copy_if(InputIter first, InputIter last, OutputIter d_first,
P pred) { … }
template <typename Iter, typename P,
typename T = typename std::iterator_traits<Iter>::value_type>
Iter remove_if(ExecutionPolicy policy, Iter first, Iter last, P pred) { … }
template <typename Iter, typename P,
typename T = typename std::iterator_traits<Iter>::value_type>
Iter remove_if(Iter first, Iter last, P pred) { … }
template <typename Iter,
typename T = typename std::iterator_traits<Iter>::value_type>
Iter remove(ExecutionPolicy policy, Iter first, Iter last, T value) { … }
template <typename Iter,
typename T = typename std::iterator_traits<Iter>::value_type>
Iter remove(Iter first, Iter last, T value) { … }
template <typename Iter,
typename T = typename std::iterator_traits<Iter>::value_type>
Iter unique(ExecutionPolicy policy, Iter first, Iter last) { … }
template <typename Iter,
typename T = typename std::iterator_traits<Iter>::value_type>
Iter unique(Iter first, Iter last) { … }
template <typename Iterator,
typename T = typename std::iterator_traits<Iterator>::value_type>
void stable_sort(ExecutionPolicy policy, Iterator first, Iterator last) { … }
template <typename Iterator,
typename T = typename std::iterator_traits<Iterator>::value_type>
void stable_sort(Iterator first, Iterator last) { … }
template <typename Iterator,
typename T = typename std::iterator_traits<Iterator>::value_type,
typename Comp = decltype(std::less<T>())>
void stable_sort(ExecutionPolicy policy, Iterator first, Iterator last,
Comp comp) { … }
template <typename Iterator,
typename T = typename std::iterator_traits<Iterator>::value_type,
typename Comp = decltype(std::less<T>())>
void stable_sort(Iterator first, Iterator last, Comp comp) { … }
template <typename InputIterator1, typename InputIterator2,
typename OutputIterator>
void scatter(ExecutionPolicy policy, InputIterator1 first, InputIterator1 last,
InputIterator2 mapFirst, OutputIterator outputFirst) { … }
template <typename InputIterator1, typename InputIterator2,
typename OutputIterator>
void scatter(InputIterator1 first, InputIterator1 last, InputIterator2 mapFirst,
OutputIterator outputFirst) { … }
template <typename InputIterator, typename RandomAccessIterator,
typename OutputIterator>
void gather(ExecutionPolicy policy, InputIterator mapFirst,
InputIterator mapLast, RandomAccessIterator inputFirst,
OutputIterator outputFirst) { … }
template <typename InputIterator, typename RandomAccessIterator,
typename OutputIterator>
void gather(InputIterator mapFirst, InputIterator mapLast,
RandomAccessIterator inputFirst, OutputIterator outputFirst) { … }
template <typename Iterator>
void sequence(ExecutionPolicy policy, Iterator first, Iterator last) { … }
template <typename Iterator>
void sequence(Iterator first, Iterator last) { … }
}