/*
* strchr - find a character in a string
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*/
/* Assumptions:
*
* ARMv8-a, AArch64
* Neon Available.
*/
#include "../asmdefs.h"
/* Arguments and results. */
#define srcin x0
#define chrin w1
#define result x0
#define src x2
#define tmp1 x3
#define wtmp2 w4
#define tmp3 x5
#define vrepchr v0
#define qdata q1
#define vdata v1
#define vhas_nul v2
#define vhas_chr v3
#define vrepmask_0 v4
#define vrepmask_c v5
#define vend v6
#define L(l) .L ## l
/* Core algorithm.
For each 16-byte chunk we calculate a 64-bit syndrome value, with
four bits per byte (LSB is always in bits 0 and 1, for both big
and little-endian systems). For each tuple, bit 0 is set if
the relevant byte matched the requested character; bit 1 is set
if the relevant byte matched the NUL end of string (we trigger
off bit0 for the special case of looking for NUL) and bits 2 and 3
are not used.
Since the bits in the syndrome reflect exactly the order in which
things occur in the original string a count_trailing_zeros()
operation will identify exactly which byte is causing the termination,
and why. */
/* Locals and temporaries. */
ENTRY(__strchr_aarch64_mte)
/* Magic constant 0x10011001 to allow us to identify which lane
matches the requested byte. Magic constant 0x20022002 used
similarly for NUL termination. */
mov wtmp2, #0x1001
movk wtmp2, #0x1001, lsl #16
dup vrepchr.16b, chrin
bic src, srcin, #15 /* Work with aligned 16-byte chunks. */
dup vrepmask_c.4s, wtmp2
ands tmp1, srcin, #15
add vrepmask_0.4s, vrepmask_c.4s, vrepmask_c.4s /* equiv: lsl #1 */
b.eq L(loop)
/* Input string is not 16-byte aligned. Rather than forcing
the padding bytes to a safe value, we calculate the syndrome
for all the bytes, but then mask off those bits of the
syndrome that are related to the padding. */
ldr qdata, [src], #16
cmeq vhas_nul.16b, vdata.16b, #0
cmeq vhas_chr.16b, vdata.16b, vrepchr.16b
and vhas_nul.16b, vhas_nul.16b, vrepmask_0.16b
and vhas_chr.16b, vhas_chr.16b, vrepmask_c.16b
lsl tmp1, tmp1, #2
orr vend.16b, vhas_nul.16b, vhas_chr.16b
mov tmp3, #~0
addp vend.16b, vend.16b, vend.16b /* 128->64 */
lsl tmp1, tmp3, tmp1
mov tmp3, vend.d[0]
ands tmp1, tmp3, tmp1 /* Mask padding bits. */
b.ne L(tail)
L(loop):
ldr qdata, [src], #32
cmeq vhas_nul.16b, vdata.16b, #0
cmeq vhas_chr.16b, vdata.16b, vrepchr.16b
/* Use a fast check for the termination condition. */
orr vend.16b, vhas_nul.16b, vhas_chr.16b
addp vend.16b, vend.16b, vend.16b /* 128->64 */
mov tmp1, vend.d[0]
cbnz tmp1, L(end)
ldr qdata, [src, #-16]
cmeq vhas_nul.16b, vdata.16b, #0
cmeq vhas_chr.16b, vdata.16b, vrepchr.16b
/* Use a fast check for the termination condition. */
orr vend.16b, vhas_nul.16b, vhas_chr.16b
addp vend.16b, vend.16b, vend.16b /* 128->64 */
mov tmp1, vend.d[0]
cbz tmp1, L(loop)
/* Adjust src for next two subtractions. */
add src, src, #16
L(end):
/* Termination condition found. Now need to establish exactly why
we terminated. */
and vhas_nul.16b, vhas_nul.16b, vrepmask_0.16b
and vhas_chr.16b, vhas_chr.16b, vrepmask_c.16b
sub src, src, #16
orr vend.16b, vhas_nul.16b, vhas_chr.16b
addp vend.16b, vend.16b, vend.16b /* 128->64 */
mov tmp1, vend.d[0]
L(tail):
/* Count the trailing zeros, by bit reversing... */
rbit tmp1, tmp1
/* Re-bias source. */
sub src, src, #16
clz tmp1, tmp1 /* And counting the leading zeros. */
/* Tmp1 is even if the target character was found first. Otherwise
we've found the end of string and we weren't looking for NUL. */
tst tmp1, #1
add result, src, tmp1, lsr #2
csel result, result, xzr, eq
ret
END(__strchr_aarch64_mte)