// SPDX-License-Identifier: GPL-2.0-only
// Copyright (C) 2021-3 ARM Limited.
//
// Assembly portion of the FP ptrace test
//
// Load values from memory into registers, break on a breakpoint, then
// break on a further breakpoint
//
#include "fp-ptrace.h"
#include "sme-inst.h"
.arch_extension sve
// Load and save register values with pauses for ptrace
//
// x0 - SVE in use
// x1 - SME in use
// x2 - SME2 in use
// x3 - FA64 supported
.globl load_and_save
load_and_save:
stp x11, x12, [sp, #-0x10]!
// This should be redundant in the SVE case
ldr x7, =v_in
ldp q0, q1, [x7]
ldp q2, q3, [x7, #16 * 2]
ldp q4, q5, [x7, #16 * 4]
ldp q6, q7, [x7, #16 * 6]
ldp q8, q9, [x7, #16 * 8]
ldp q10, q11, [x7, #16 * 10]
ldp q12, q13, [x7, #16 * 12]
ldp q14, q15, [x7, #16 * 14]
ldp q16, q17, [x7, #16 * 16]
ldp q18, q19, [x7, #16 * 18]
ldp q20, q21, [x7, #16 * 20]
ldp q22, q23, [x7, #16 * 22]
ldp q24, q25, [x7, #16 * 24]
ldp q26, q27, [x7, #16 * 26]
ldp q28, q29, [x7, #16 * 28]
ldp q30, q31, [x7, #16 * 30]
// SME?
cbz x1, check_sve_in
adrp x7, svcr_in
ldr x7, [x7, :lo12:svcr_in]
// SVCR is 0 by default, avoid triggering SME if not in use
cbz x7, check_sve_in
msr S3_3_C4_C2_2, x7
// ZA?
tbz x7, #SVCR_ZA_SHIFT, check_sm_in
rdsvl 11, 1
mov w12, #0
ldr x6, =za_in
1: _ldr_za 12, 6
add x6, x6, x11
add x12, x12, #1
cmp x11, x12
bne 1b
// ZT?
cbz x2, check_sm_in
adrp x6, zt_in
add x6, x6, :lo12:zt_in
_ldr_zt 6
// In streaming mode?
check_sm_in:
tbz x7, #SVCR_SM_SHIFT, check_sve_in
mov x4, x3 // Load FFR if we have FA64
b load_sve
// SVE?
check_sve_in:
cbz x0, wait_for_writes
mov x4, #1
load_sve:
ldr x7, =z_in
ldr z0, [x7, #0, MUL VL]
ldr z1, [x7, #1, MUL VL]
ldr z2, [x7, #2, MUL VL]
ldr z3, [x7, #3, MUL VL]
ldr z4, [x7, #4, MUL VL]
ldr z5, [x7, #5, MUL VL]
ldr z6, [x7, #6, MUL VL]
ldr z7, [x7, #7, MUL VL]
ldr z8, [x7, #8, MUL VL]
ldr z9, [x7, #9, MUL VL]
ldr z10, [x7, #10, MUL VL]
ldr z11, [x7, #11, MUL VL]
ldr z12, [x7, #12, MUL VL]
ldr z13, [x7, #13, MUL VL]
ldr z14, [x7, #14, MUL VL]
ldr z15, [x7, #15, MUL VL]
ldr z16, [x7, #16, MUL VL]
ldr z17, [x7, #17, MUL VL]
ldr z18, [x7, #18, MUL VL]
ldr z19, [x7, #19, MUL VL]
ldr z20, [x7, #20, MUL VL]
ldr z21, [x7, #21, MUL VL]
ldr z22, [x7, #22, MUL VL]
ldr z23, [x7, #23, MUL VL]
ldr z24, [x7, #24, MUL VL]
ldr z25, [x7, #25, MUL VL]
ldr z26, [x7, #26, MUL VL]
ldr z27, [x7, #27, MUL VL]
ldr z28, [x7, #28, MUL VL]
ldr z29, [x7, #29, MUL VL]
ldr z30, [x7, #30, MUL VL]
ldr z31, [x7, #31, MUL VL]
// FFR is not present in base SME
cbz x4, 1f
ldr x7, =ffr_in
ldr p0, [x7]
ldr x7, [x7, #0]
cbz x7, 1f
wrffr p0.b
1:
ldr x7, =p_in
ldr p0, [x7, #0, MUL VL]
ldr p1, [x7, #1, MUL VL]
ldr p2, [x7, #2, MUL VL]
ldr p3, [x7, #3, MUL VL]
ldr p4, [x7, #4, MUL VL]
ldr p5, [x7, #5, MUL VL]
ldr p6, [x7, #6, MUL VL]
ldr p7, [x7, #7, MUL VL]
ldr p8, [x7, #8, MUL VL]
ldr p9, [x7, #9, MUL VL]
ldr p10, [x7, #10, MUL VL]
ldr p11, [x7, #11, MUL VL]
ldr p12, [x7, #12, MUL VL]
ldr p13, [x7, #13, MUL VL]
ldr p14, [x7, #14, MUL VL]
ldr p15, [x7, #15, MUL VL]
wait_for_writes:
// Wait for the parent
brk #0
// Save values
ldr x7, =v_out
stp q0, q1, [x7]
stp q2, q3, [x7, #16 * 2]
stp q4, q5, [x7, #16 * 4]
stp q6, q7, [x7, #16 * 6]
stp q8, q9, [x7, #16 * 8]
stp q10, q11, [x7, #16 * 10]
stp q12, q13, [x7, #16 * 12]
stp q14, q15, [x7, #16 * 14]
stp q16, q17, [x7, #16 * 16]
stp q18, q19, [x7, #16 * 18]
stp q20, q21, [x7, #16 * 20]
stp q22, q23, [x7, #16 * 22]
stp q24, q25, [x7, #16 * 24]
stp q26, q27, [x7, #16 * 26]
stp q28, q29, [x7, #16 * 28]
stp q30, q31, [x7, #16 * 30]
// SME?
cbz x1, check_sve_out
rdsvl 11, 1
adrp x6, sme_vl_out
str x11, [x6, :lo12:sme_vl_out]
mrs x7, S3_3_C4_C2_2
adrp x6, svcr_out
str x7, [x6, :lo12:svcr_out]
// ZA?
tbz x7, #SVCR_ZA_SHIFT, check_sm_out
mov w12, #0
ldr x6, =za_out
1: _str_za 12, 6
add x6, x6, x11
add x12, x12, #1
cmp x11, x12
bne 1b
// ZT?
cbz x2, check_sm_out
adrp x6, zt_out
add x6, x6, :lo12:zt_out
_str_zt 6
// In streaming mode?
check_sm_out:
tbz x7, #SVCR_SM_SHIFT, check_sve_out
mov x4, x3 // FFR?
b read_sve
// SVE?
check_sve_out:
cbz x0, wait_for_reads
mov x4, #1
rdvl x7, #1
adrp x6, sve_vl_out
str x7, [x6, :lo12:sve_vl_out]
read_sve:
ldr x7, =z_out
str z0, [x7, #0, MUL VL]
str z1, [x7, #1, MUL VL]
str z2, [x7, #2, MUL VL]
str z3, [x7, #3, MUL VL]
str z4, [x7, #4, MUL VL]
str z5, [x7, #5, MUL VL]
str z6, [x7, #6, MUL VL]
str z7, [x7, #7, MUL VL]
str z8, [x7, #8, MUL VL]
str z9, [x7, #9, MUL VL]
str z10, [x7, #10, MUL VL]
str z11, [x7, #11, MUL VL]
str z12, [x7, #12, MUL VL]
str z13, [x7, #13, MUL VL]
str z14, [x7, #14, MUL VL]
str z15, [x7, #15, MUL VL]
str z16, [x7, #16, MUL VL]
str z17, [x7, #17, MUL VL]
str z18, [x7, #18, MUL VL]
str z19, [x7, #19, MUL VL]
str z20, [x7, #20, MUL VL]
str z21, [x7, #21, MUL VL]
str z22, [x7, #22, MUL VL]
str z23, [x7, #23, MUL VL]
str z24, [x7, #24, MUL VL]
str z25, [x7, #25, MUL VL]
str z26, [x7, #26, MUL VL]
str z27, [x7, #27, MUL VL]
str z28, [x7, #28, MUL VL]
str z29, [x7, #29, MUL VL]
str z30, [x7, #30, MUL VL]
str z31, [x7, #31, MUL VL]
ldr x7, =p_out
str p0, [x7, #0, MUL VL]
str p1, [x7, #1, MUL VL]
str p2, [x7, #2, MUL VL]
str p3, [x7, #3, MUL VL]
str p4, [x7, #4, MUL VL]
str p5, [x7, #5, MUL VL]
str p6, [x7, #6, MUL VL]
str p7, [x7, #7, MUL VL]
str p8, [x7, #8, MUL VL]
str p9, [x7, #9, MUL VL]
str p10, [x7, #10, MUL VL]
str p11, [x7, #11, MUL VL]
str p12, [x7, #12, MUL VL]
str p13, [x7, #13, MUL VL]
str p14, [x7, #14, MUL VL]
str p15, [x7, #15, MUL VL]
// Only save FFR if it exists
cbz x4, wait_for_reads
ldr x7, =ffr_out
rdffr p0.b
str p0, [x7]
wait_for_reads:
// Wait for the parent
brk #0
// Ensure we don't leave ourselves in streaming mode
cbz x1, out
msr S3_3_C4_C2_2, xzr
out:
ldp x11, x12, [sp, #-0x10]
ret