/* SPDX-License-Identifier: GPL-2.0-only */
/*
* aes-ce-ccm-core.S - AES-CCM transform for ARMv8 with Crypto Extensions
*
* Copyright (C) 2013 - 2017 Linaro Ltd.
* Copyright (C) 2024 Google LLC
*
* Author: Ard Biesheuvel <[email protected]>
*/
#include <linux/linkage.h>
#include <asm/assembler.h>
.text
.arch armv8-a+crypto
.macro load_round_keys, rk, nr, tmp
sub w\tmp, \nr, #10
add \tmp, \rk, w\tmp, sxtw #4
ld1 {v10.4s-v13.4s}, [\rk]
ld1 {v14.4s-v17.4s}, [\tmp], #64
ld1 {v18.4s-v21.4s}, [\tmp], #64
ld1 {v3.4s-v5.4s}, [\tmp]
.endm
.macro dround, va, vb, vk
aese \va\().16b, \vk\().16b
aesmc \va\().16b, \va\().16b
aese \vb\().16b, \vk\().16b
aesmc \vb\().16b, \vb\().16b
.endm
.macro aes_encrypt, va, vb, nr
tbz \nr, #2, .L\@
dround \va, \vb, v10
dround \va, \vb, v11
tbz \nr, #1, .L\@
dround \va, \vb, v12
dround \va, \vb, v13
.L\@: .irp v, v14, v15, v16, v17, v18, v19, v20, v21, v3
dround \va, \vb, \v
.endr
aese \va\().16b, v4.16b
aese \vb\().16b, v4.16b
.endm
.macro aes_ccm_do_crypt,enc
load_round_keys x3, w4, x10
ld1 {v0.16b}, [x5] /* load mac */
cbz x2, ce_aes_ccm_final
ldr x8, [x6, #8] /* load lower ctr */
CPU_LE( rev x8, x8 ) /* keep swabbed ctr in reg */
0: /* outer loop */
ld1 {v1.8b}, [x6] /* load upper ctr */
prfm pldl1strm, [x1]
add x8, x8, #1
rev x9, x8
ins v1.d[1], x9 /* no carry in lower ctr */
aes_encrypt v0, v1, w4
subs w2, w2, #16
bmi ce_aes_ccm_crypt_tail
ld1 {v2.16b}, [x1], #16 /* load next input block */
.if \enc == 1
eor v2.16b, v2.16b, v5.16b /* final round enc+mac */
eor v6.16b, v1.16b, v2.16b /* xor with crypted ctr */
.else
eor v2.16b, v2.16b, v1.16b /* xor with crypted ctr */
eor v6.16b, v2.16b, v5.16b /* final round enc */
.endif
eor v0.16b, v0.16b, v2.16b /* xor mac with pt ^ rk[last] */
st1 {v6.16b}, [x0], #16 /* write output block */
bne 0b
CPU_LE( rev x8, x8 )
str x8, [x6, #8] /* store lsb end of ctr (BE) */
cbnz x7, ce_aes_ccm_final
st1 {v0.16b}, [x5] /* store mac */
ret
.endm
SYM_FUNC_START_LOCAL(ce_aes_ccm_crypt_tail)
eor v0.16b, v0.16b, v5.16b /* final round mac */
eor v1.16b, v1.16b, v5.16b /* final round enc */
add x1, x1, w2, sxtw /* rewind the input pointer (w2 < 0) */
add x0, x0, w2, sxtw /* rewind the output pointer */
adr_l x8, .Lpermute /* load permute vectors */
add x9, x8, w2, sxtw
sub x8, x8, w2, sxtw
ld1 {v7.16b-v8.16b}, [x9]
ld1 {v9.16b}, [x8]
ld1 {v2.16b}, [x1] /* load a full block of input */
tbl v1.16b, {v1.16b}, v7.16b /* move keystream to end of register */
eor v7.16b, v2.16b, v1.16b /* encrypt partial input block */
bif v2.16b, v7.16b, v22.16b /* select plaintext */
tbx v7.16b, {v6.16b}, v8.16b /* insert output from previous iteration */
tbl v2.16b, {v2.16b}, v9.16b /* copy plaintext to start of v2 */
eor v0.16b, v0.16b, v2.16b /* fold plaintext into mac */
st1 {v7.16b}, [x0] /* store output block */
cbz x7, 0f
SYM_INNER_LABEL(ce_aes_ccm_final, SYM_L_LOCAL)
ld1 {v1.16b}, [x7] /* load 1st ctriv */
aes_encrypt v0, v1, w4
/* final round key cancels out */
eor v0.16b, v0.16b, v1.16b /* en-/decrypt the mac */
0: st1 {v0.16b}, [x5] /* store result */
ret
SYM_FUNC_END(ce_aes_ccm_crypt_tail)
/*
* void ce_aes_ccm_encrypt(u8 out[], u8 const in[], u32 cbytes,
* u8 const rk[], u32 rounds, u8 mac[],
* u8 ctr[], u8 const final_iv[]);
* void ce_aes_ccm_decrypt(u8 out[], u8 const in[], u32 cbytes,
* u8 const rk[], u32 rounds, u8 mac[],
* u8 ctr[], u8 const final_iv[]);
*/
SYM_FUNC_START(ce_aes_ccm_encrypt)
movi v22.16b, #255
aes_ccm_do_crypt 1
SYM_FUNC_END(ce_aes_ccm_encrypt)
SYM_FUNC_START(ce_aes_ccm_decrypt)
movi v22.16b, #0
aes_ccm_do_crypt 0
SYM_FUNC_END(ce_aes_ccm_decrypt)
.section ".rodata", "a"
.align 6
.fill 15, 1, 0xff
.Lpermute:
.byte 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7
.byte 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf
.fill 15, 1, 0xff