/*!
Wrapper routines for `memchr` and friends.
These routines efficiently dispatch to the best implementation based on what
the CPU supports.
*/
/// Provides a way to run a memchr-like function while amortizing the cost of
/// runtime CPU feature detection.
///
/// This works by loading a function pointer from an atomic global. Initially,
/// this global is set to a function that does CPU feature detection. For
/// example, if AVX2 is enabled, then the AVX2 implementation is used.
/// Otherwise, at least on x86_64, the SSE2 implementation is used. (And
/// in some niche cases, if SSE2 isn't available, then the architecture
/// independent fallback implementation is used.)
///
/// After the first call to this function, the atomic global is replaced with
/// the specific AVX2, SSE2 or fallback routine chosen. Subsequent calls then
/// will directly call the chosen routine instead of needing to go through the
/// CPU feature detection branching again.
///
/// This particular macro is specifically written to provide the implementation
/// of functions with the following signature:
///
/// ```ignore
/// fn memchr(needle1: u8, start: *const u8, end: *const u8) -> Option<usize>;
/// ```
///
/// Where you can also have `memchr2` and `memchr3`, but with `needle2` and
/// `needle3`, respectively. The `start` and `end` parameters correspond to the
/// start and end of the haystack, respectively.
///
/// We use raw pointers here instead of the more obvious `haystack: &[u8]` so
/// that the function is compatible with our lower level iterator logic that
/// operates on raw pointers. We use this macro to implement "raw" memchr
/// routines with the signature above, and then define memchr routines using
/// regular slices on top of them.
///
/// Note that we use `#[cfg(target_feature = "sse2")]` below even though
/// it shouldn't be strictly necessary because without it, it seems to
/// cause the compiler to blow up. I guess it can't handle a function
/// pointer being created with a sse target feature? Dunno. See the
/// `build-for-x86-64-but-non-sse-target` CI job if you want to experiment with
/// this.
///
/// # Safety
///
/// Primarily callers must that `$fnty` is a correct function pointer type and
/// not something else.
///
/// Callers must also ensure that `$memchrty::$memchrfind` corresponds to a
/// routine that returns a valid function pointer when a match is found. That
/// is, a pointer that is `>= start` and `< end`.
///
/// Callers must also ensure that the `$hay_start` and `$hay_end` identifiers
/// correspond to valid pointers.
macro_rules! unsafe_ifunc {
(
$memchrty:ident,
$memchrfind:ident,
$fnty:ty,
$retty:ty,
$hay_start:ident,
$hay_end:ident,
$($needle:ident),+
) => {{
#![allow(unused_unsafe)]
use core::sync::atomic::{AtomicPtr, Ordering};
type Fn = *mut ();
type RealFn = $fnty;
static FN: AtomicPtr<()> = AtomicPtr::new(detect as Fn);
#[cfg(target_feature = "sse2")]
#[target_feature(enable = "sse2", enable = "avx2")]
unsafe fn find_avx2(
$($needle: u8),+,
$hay_start: *const u8,
$hay_end: *const u8,
) -> $retty {
use crate::arch::x86_64::avx2::memchr::$memchrty;
$memchrty::new_unchecked($($needle),+)
.$memchrfind($hay_start, $hay_end)
}
#[cfg(target_feature = "sse2")]
#[target_feature(enable = "sse2")]
unsafe fn find_sse2(
$($needle: u8),+,
$hay_start: *const u8,
$hay_end: *const u8,
) -> $retty {
use crate::arch::x86_64::sse2::memchr::$memchrty;
$memchrty::new_unchecked($($needle),+)
.$memchrfind($hay_start, $hay_end)
}
unsafe fn find_fallback(
$($needle: u8),+,
$hay_start: *const u8,
$hay_end: *const u8,
) -> $retty {
use crate::arch::all::memchr::$memchrty;
$memchrty::new($($needle),+).$memchrfind($hay_start, $hay_end)
}
unsafe fn detect(
$($needle: u8),+,
$hay_start: *const u8,
$hay_end: *const u8,
) -> $retty {
let fun = {
#[cfg(not(target_feature = "sse2"))]
{
debug!(
"no sse2 feature available, using fallback for {}",
stringify!($memchrty),
);
find_fallback as RealFn
}
#[cfg(target_feature = "sse2")]
{
use crate::arch::x86_64::{sse2, avx2};
if avx2::memchr::$memchrty::is_available() {
debug!("chose AVX2 for {}", stringify!($memchrty));
find_avx2 as RealFn
} else if sse2::memchr::$memchrty::is_available() {
debug!("chose SSE2 for {}", stringify!($memchrty));
find_sse2 as RealFn
} else {
debug!("chose fallback for {}", stringify!($memchrty));
find_fallback as RealFn
}
}
};
FN.store(fun as Fn, Ordering::Relaxed);
// SAFETY: The only thing we need to uphold here is the
// `#[target_feature]` requirements. Since we check is_available
// above before using the corresponding implementation, we are
// guaranteed to only call code that is supported on the current
// CPU.
fun($($needle),+, $hay_start, $hay_end)
}
// SAFETY: By virtue of the caller contract, RealFn is a function
// pointer, which is always safe to transmute with a *mut (). Also,
// since we use $memchrty::is_available, it is guaranteed to be safe
// to call $memchrty::$memchrfind.
unsafe {
let fun = FN.load(Ordering::Relaxed);
core::mem::transmute::<Fn, RealFn>(fun)(
$($needle),+,
$hay_start,
$hay_end,
)
}
}};
}
// The routines below dispatch to AVX2, SSE2 or a fallback routine based on
// what's available in the current environment. The secret sauce here is that
// we only check for which one to use approximately once, and then "cache" that
// choice into a global function pointer. Subsequent invocations then just call
// the appropriate function directly.
/// memchr, but using raw pointers to represent the haystack.
///
/// # Safety
///
/// Pointers must be valid. See `One::find_raw`.
#[inline(always)]
pub(crate) fn memchr_raw(
n1: u8,
start: *const u8,
end: *const u8,
) -> Option<*const u8> {
// SAFETY: We provide a valid function pointer type.
unsafe_ifunc!(
One,
find_raw,
unsafe fn(u8, *const u8, *const u8) -> Option<*const u8>,
Option<*const u8>,
start,
end,
n1
)
}
/// memrchr, but using raw pointers to represent the haystack.
///
/// # Safety
///
/// Pointers must be valid. See `One::rfind_raw`.
#[inline(always)]
pub(crate) fn memrchr_raw(
n1: u8,
start: *const u8,
end: *const u8,
) -> Option<*const u8> {
// SAFETY: We provide a valid function pointer type.
unsafe_ifunc!(
One,
rfind_raw,
unsafe fn(u8, *const u8, *const u8) -> Option<*const u8>,
Option<*const u8>,
start,
end,
n1
)
}
/// memchr2, but using raw pointers to represent the haystack.
///
/// # Safety
///
/// Pointers must be valid. See `Two::find_raw`.
#[inline(always)]
pub(crate) fn memchr2_raw(
n1: u8,
n2: u8,
start: *const u8,
end: *const u8,
) -> Option<*const u8> {
// SAFETY: We provide a valid function pointer type.
unsafe_ifunc!(
Two,
find_raw,
unsafe fn(u8, u8, *const u8, *const u8) -> Option<*const u8>,
Option<*const u8>,
start,
end,
n1,
n2
)
}
/// memrchr2, but using raw pointers to represent the haystack.
///
/// # Safety
///
/// Pointers must be valid. See `Two::rfind_raw`.
#[inline(always)]
pub(crate) fn memrchr2_raw(
n1: u8,
n2: u8,
start: *const u8,
end: *const u8,
) -> Option<*const u8> {
// SAFETY: We provide a valid function pointer type.
unsafe_ifunc!(
Two,
rfind_raw,
unsafe fn(u8, u8, *const u8, *const u8) -> Option<*const u8>,
Option<*const u8>,
start,
end,
n1,
n2
)
}
/// memchr3, but using raw pointers to represent the haystack.
///
/// # Safety
///
/// Pointers must be valid. See `Three::find_raw`.
#[inline(always)]
pub(crate) fn memchr3_raw(
n1: u8,
n2: u8,
n3: u8,
start: *const u8,
end: *const u8,
) -> Option<*const u8> {
// SAFETY: We provide a valid function pointer type.
unsafe_ifunc!(
Three,
find_raw,
unsafe fn(u8, u8, u8, *const u8, *const u8) -> Option<*const u8>,
Option<*const u8>,
start,
end,
n1,
n2,
n3
)
}
/// memrchr3, but using raw pointers to represent the haystack.
///
/// # Safety
///
/// Pointers must be valid. See `Three::rfind_raw`.
#[inline(always)]
pub(crate) fn memrchr3_raw(
n1: u8,
n2: u8,
n3: u8,
start: *const u8,
end: *const u8,
) -> Option<*const u8> {
// SAFETY: We provide a valid function pointer type.
unsafe_ifunc!(
Three,
rfind_raw,
unsafe fn(u8, u8, u8, *const u8, *const u8) -> Option<*const u8>,
Option<*const u8>,
start,
end,
n1,
n2,
n3
)
}
/// Count all matching bytes, but using raw pointers to represent the haystack.
///
/// # Safety
///
/// Pointers must be valid. See `One::count_raw`.
#[inline(always)]
pub(crate) fn count_raw(n1: u8, start: *const u8, end: *const u8) -> usize {
// SAFETY: We provide a valid function pointer type.
unsafe_ifunc!(
One,
count_raw,
unsafe fn(u8, *const u8, *const u8) -> usize,
usize,
start,
end,
n1
)
}