linux/arch/powerpc/crypto/aes-spe-modes.S

/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
 * AES modes (ECB/CBC/CTR/XTS) for PPC AES implementation
 *
 * Copyright (c) 2015 Markus Stockhausen <[email protected]>
 */

#include <asm/ppc_asm.h>
#include "aes-spe-regs.h"

#ifdef __BIG_ENDIAN__			/* Macros for big endian builds	*/

#define LOAD_DATA(reg, off) \
	lwz		reg,off(rSP);	/* load with offset		*/
#define SAVE_DATA(reg, off) \
	stw		reg,off(rDP);	/* save with offset		*/
#define NEXT_BLOCK \
	addi		rSP,rSP,16;	/* increment pointers per bloc	*/ \
	addi		rDP,rDP,16;
#define LOAD_IV(reg, off) \
	lwz		reg,off(rIP);	/* IV loading with offset	*/
#define SAVE_IV(reg, off) \
	stw		reg,off(rIP);	/* IV saving with offset	*/
#define START_IV			/* nothing to reset		*/
#define CBC_DEC 16			/* CBC decrement per block	*/
#define CTR_DEC 1			/* CTR decrement one byte	*/

#else					/* Macros for little endian	*/

#define LOAD_DATA(reg, off) \
	lwbrx		reg,0,rSP;	/* load reversed		*/ \
	addi		rSP,rSP,4;	/* and increment pointer	*/
#define SAVE_DATA(reg, off) \
	stwbrx		reg,0,rDP;	/* save reversed		*/ \
	addi		rDP,rDP,4;	/* and increment pointer	*/
#define NEXT_BLOCK			/* nothing todo			*/
#define LOAD_IV(reg, off) \
	lwbrx		reg,0,rIP;	/* load reversed		*/ \
	addi		rIP,rIP,4;	/* and increment pointer	*/
#define SAVE_IV(reg, off) \
	stwbrx		reg,0,rIP;	/* load reversed		*/ \
	addi		rIP,rIP,4;	/* and increment pointer	*/
#define START_IV \
	subi		rIP,rIP,16;	/* must reset pointer		*/
#define CBC_DEC 32			/* 2 blocks because of incs	*/
#define CTR_DEC 17			/* 1 block because of incs	*/

#endif

#define SAVE_0_REGS
#define LOAD_0_REGS

#define SAVE_4_REGS \
	stw		rI0,96(r1);	/* save 32 bit registers	*/ \
	stw		rI1,100(r1);					   \
	stw		rI2,104(r1);					   \
	stw		rI3,108(r1);

#define LOAD_4_REGS \
	lwz		rI0,96(r1);	/* restore 32 bit registers	*/ \
	lwz		rI1,100(r1);					   \
	lwz		rI2,104(r1);					   \
	lwz		rI3,108(r1);

#define SAVE_8_REGS \
	SAVE_4_REGS							   \
	stw		rG0,112(r1);	/* save 32 bit registers	*/ \
	stw		rG1,116(r1);					   \
	stw		rG2,120(r1);					   \
	stw		rG3,124(r1);

#define LOAD_8_REGS \
	LOAD_4_REGS							   \
	lwz		rG0,112(r1);	/* restore 32 bit registers	*/ \
	lwz		rG1,116(r1);					   \
	lwz		rG2,120(r1);					   \
	lwz		rG3,124(r1);

#define INITIALIZE_CRYPT(tab,nr32bitregs) \
	mflr		r0;						   \
	stwu		r1,-160(r1);	/* create stack frame		*/ \
	lis		rT0,tab@h;	/* en-/decryption table pointer	*/ \
	stw		r0,8(r1);	/* save link register		*/ \
	ori		rT0,rT0,tab@l;					   \
	evstdw		r14,16(r1);					   \
	mr		rKS,rKP;					   \
	evstdw		r15,24(r1);	/* We must save non volatile	*/ \
	evstdw		r16,32(r1);	/* registers. Take the chance	*/ \
	evstdw		r17,40(r1);	/* and save the SPE part too	*/ \
	evstdw		r18,48(r1);					   \
	evstdw		r19,56(r1);					   \
	evstdw		r20,64(r1);					   \
	evstdw		r21,72(r1);					   \
	evstdw		r22,80(r1);					   \
	evstdw		r23,88(r1);					   \
	SAVE_##nr32bitregs##_REGS

#define FINALIZE_CRYPT(nr32bitregs) \
	lwz		r0,8(r1);					   \
	evldw		r14,16(r1);	/* restore SPE registers	*/ \
	evldw		r15,24(r1);					   \
	evldw		r16,32(r1);					   \
	evldw		r17,40(r1);					   \
	evldw		r18,48(r1);					   \
	evldw		r19,56(r1);					   \
	evldw		r20,64(r1);					   \
	evldw		r21,72(r1);					   \
	evldw		r22,80(r1);					   \
	evldw		r23,88(r1);					   \
	LOAD_##nr32bitregs##_REGS					   \
	mtlr		r0;		/* restore link register	*/ \
	xor		r0,r0,r0;					   \
	stw		r0,16(r1);	/* delete sensitive data	*/ \
	stw		r0,24(r1);	/* that we might have pushed	*/ \
	stw		r0,32(r1);	/* from other context that runs	*/ \
	stw		r0,40(r1);	/* the same code		*/ \
	stw		r0,48(r1);					   \
	stw		r0,56(r1);					   \
	stw		r0,64(r1);					   \
	stw		r0,72(r1);					   \
	stw		r0,80(r1);					   \
	stw		r0,88(r1);					   \
	addi		r1,r1,160;	/* cleanup stack frame		*/

#define ENDIAN_SWAP(t0, t1, s0, s1) \
	rotrwi		t0,s0,8;	/* swap endianness for 2 GPRs	*/ \
	rotrwi		t1,s1,8;					   \
	rlwimi		t0,s0,8,8,15;					   \
	rlwimi		t1,s1,8,8,15;					   \
	rlwimi		t0,s0,8,24,31;					   \
	rlwimi		t1,s1,8,24,31;

#define GF128_MUL(d0, d1, d2, d3, t0) \
	li		t0,0x87;	/* multiplication in GF128	*/ \
	cmpwi		d3,-1;						   \
	iselgt		t0,0,t0;					   \
	rlwimi		d3,d2,0,0,0;	/* propagate "carry" bits	*/ \
	rotlwi		d3,d3,1;					   \
	rlwimi		d2,d1,0,0,0;					   \
	rotlwi		d2,d2,1;					   \
	rlwimi		d1,d0,0,0,0;					   \
	slwi		d0,d0,1;	/* shift left 128 bit		*/ \
	rotlwi		d1,d1,1;					   \
	xor		d0,d0,t0;

#define START_KEY(d0, d1, d2, d3) \
	lwz		rW0,0(rKP);					   \
	mtctr		rRR;						   \
	lwz		rW1,4(rKP);					   \
	lwz		rW2,8(rKP);					   \
	lwz		rW3,12(rKP);					   \
	xor		rD0,d0,rW0;					   \
	xor		rD1,d1,rW1;					   \
	xor		rD2,d2,rW2;					   \
	xor		rD3,d3,rW3;

/*
 * ppc_encrypt_aes(u8 *out, const u8 *in, u32 *key_enc,
 *		   u32 rounds)
 *
 * called from glue layer to encrypt a single 16 byte block
 * round values are AES128 = 4, AES192 = 5, AES256 = 6
 *
 */
_GLOBAL(ppc_encrypt_aes)
	INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 0)
	LOAD_DATA(rD0, 0)
	LOAD_DATA(rD1, 4)
	LOAD_DATA(rD2, 8)
	LOAD_DATA(rD3, 12)
	START_KEY(rD0, rD1, rD2, rD3)
	bl		ppc_encrypt_block
	xor		rD0,rD0,rW0
	SAVE_DATA(rD0, 0)
	xor		rD1,rD1,rW1
	SAVE_DATA(rD1, 4)
	xor		rD2,rD2,rW2
	SAVE_DATA(rD2, 8)
	xor		rD3,rD3,rW3
	SAVE_DATA(rD3, 12)
	FINALIZE_CRYPT(0)
	blr

/*
 * ppc_decrypt_aes(u8 *out, const u8 *in, u32 *key_dec,
 *		   u32 rounds)
 *
 * called from glue layer to decrypt a single 16 byte block
 * round values are AES128 = 4, AES192 = 5, AES256 = 6
 *
 */
_GLOBAL(ppc_decrypt_aes)
	INITIALIZE_CRYPT(PPC_AES_4K_DECTAB,0)
	LOAD_DATA(rD0, 0)
	addi		rT1,rT0,4096
	LOAD_DATA(rD1, 4)
	LOAD_DATA(rD2, 8)
	LOAD_DATA(rD3, 12)
	START_KEY(rD0, rD1, rD2, rD3)
	bl		ppc_decrypt_block
	xor		rD0,rD0,rW0
	SAVE_DATA(rD0, 0)
	xor		rD1,rD1,rW1
	SAVE_DATA(rD1, 4)
	xor		rD2,rD2,rW2
	SAVE_DATA(rD2, 8)
	xor		rD3,rD3,rW3
	SAVE_DATA(rD3, 12)
	FINALIZE_CRYPT(0)
	blr

/*
 * ppc_encrypt_ecb(u8 *out, const u8 *in, u32 *key_enc,
 *		   u32 rounds, u32 bytes);
 *
 * called from glue layer to encrypt multiple blocks via ECB
 * Bytes must be larger or equal 16 and only whole blocks are
 * processed. round values are AES128 = 4, AES192 = 5 and
 * AES256 = 6
 *
 */
_GLOBAL(ppc_encrypt_ecb)
	INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 0)
ppc_encrypt_ecb_loop:
	LOAD_DATA(rD0, 0)
	mr		rKP,rKS
	LOAD_DATA(rD1, 4)
	subi		rLN,rLN,16
	LOAD_DATA(rD2, 8)
	cmpwi		rLN,15
	LOAD_DATA(rD3, 12)
	START_KEY(rD0, rD1, rD2, rD3)
	bl		ppc_encrypt_block
	xor		rD0,rD0,rW0
	SAVE_DATA(rD0, 0)
	xor		rD1,rD1,rW1
	SAVE_DATA(rD1, 4)
	xor		rD2,rD2,rW2
	SAVE_DATA(rD2, 8)
	xor		rD3,rD3,rW3
	SAVE_DATA(rD3, 12)
	NEXT_BLOCK
	bt		gt,ppc_encrypt_ecb_loop
	FINALIZE_CRYPT(0)
	blr

/*
 * ppc_decrypt_ecb(u8 *out, const u8 *in, u32 *key_dec,
 *		   u32 rounds, u32 bytes);
 *
 * called from glue layer to decrypt multiple blocks via ECB
 * Bytes must be larger or equal 16 and only whole blocks are
 * processed. round values are AES128 = 4, AES192 = 5 and
 * AES256 = 6
 *
 */
_GLOBAL(ppc_decrypt_ecb)
	INITIALIZE_CRYPT(PPC_AES_4K_DECTAB, 0)
	addi		rT1,rT0,4096
ppc_decrypt_ecb_loop:
	LOAD_DATA(rD0, 0)
	mr		rKP,rKS
	LOAD_DATA(rD1, 4)
	subi		rLN,rLN,16
	LOAD_DATA(rD2, 8)
	cmpwi		rLN,15
	LOAD_DATA(rD3, 12)
	START_KEY(rD0, rD1, rD2, rD3)
	bl		ppc_decrypt_block
	xor		rD0,rD0,rW0
	SAVE_DATA(rD0, 0)
	xor		rD1,rD1,rW1
	SAVE_DATA(rD1, 4)
	xor		rD2,rD2,rW2
	SAVE_DATA(rD2, 8)
	xor		rD3,rD3,rW3
	SAVE_DATA(rD3, 12)
	NEXT_BLOCK
	bt		gt,ppc_decrypt_ecb_loop
	FINALIZE_CRYPT(0)
	blr

/*
 * ppc_encrypt_cbc(u8 *out, const u8 *in, u32 *key_enc,
 *		   32 rounds, u32 bytes, u8 *iv);
 *
 * called from glue layer to encrypt multiple blocks via CBC
 * Bytes must be larger or equal 16 and only whole blocks are
 * processed. round values are AES128 = 4, AES192 = 5 and
 * AES256 = 6
 *
 */
_GLOBAL(ppc_encrypt_cbc)
	INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 4)
	LOAD_IV(rI0, 0)
	LOAD_IV(rI1, 4)
	LOAD_IV(rI2, 8)
	LOAD_IV(rI3, 12)
ppc_encrypt_cbc_loop:
	LOAD_DATA(rD0, 0)
	mr		rKP,rKS
	LOAD_DATA(rD1, 4)
	subi		rLN,rLN,16
	LOAD_DATA(rD2, 8)
	cmpwi		rLN,15
	LOAD_DATA(rD3, 12)
	xor		rD0,rD0,rI0
	xor		rD1,rD1,rI1
	xor		rD2,rD2,rI2
	xor		rD3,rD3,rI3
	START_KEY(rD0, rD1, rD2, rD3)
	bl		ppc_encrypt_block
	xor		rI0,rD0,rW0
	SAVE_DATA(rI0, 0)
	xor		rI1,rD1,rW1
	SAVE_DATA(rI1, 4)
	xor		rI2,rD2,rW2
	SAVE_DATA(rI2, 8)
	xor		rI3,rD3,rW3
	SAVE_DATA(rI3, 12)
	NEXT_BLOCK
	bt		gt,ppc_encrypt_cbc_loop
	START_IV
	SAVE_IV(rI0, 0)
	SAVE_IV(rI1, 4)
	SAVE_IV(rI2, 8)
	SAVE_IV(rI3, 12)
	FINALIZE_CRYPT(4)
	blr

/*
 * ppc_decrypt_cbc(u8 *out, const u8 *in, u32 *key_dec,
 *		   u32 rounds, u32 bytes, u8 *iv);
 *
 * called from glue layer to decrypt multiple blocks via CBC
 * round values are AES128 = 4, AES192 = 5, AES256 = 6
 *
 */
_GLOBAL(ppc_decrypt_cbc)
	INITIALIZE_CRYPT(PPC_AES_4K_DECTAB, 4)
	li		rT1,15
	LOAD_IV(rI0, 0)
	andc		rLN,rLN,rT1
	LOAD_IV(rI1, 4)
	subi		rLN,rLN,16
	LOAD_IV(rI2, 8)
	add		rSP,rSP,rLN	/* reverse processing		*/
	LOAD_IV(rI3, 12)
	add		rDP,rDP,rLN
	LOAD_DATA(rD0, 0)
	addi		rT1,rT0,4096
	LOAD_DATA(rD1, 4)
	LOAD_DATA(rD2, 8)
	LOAD_DATA(rD3, 12)
	START_IV
	SAVE_IV(rD0, 0)
	SAVE_IV(rD1, 4)
	SAVE_IV(rD2, 8)
	cmpwi		rLN,16
	SAVE_IV(rD3, 12)
	bt		lt,ppc_decrypt_cbc_end
ppc_decrypt_cbc_loop:
	mr		rKP,rKS
	START_KEY(rD0, rD1, rD2, rD3)
	bl		ppc_decrypt_block
	subi		rLN,rLN,16
	subi		rSP,rSP,CBC_DEC
	xor		rW0,rD0,rW0
	LOAD_DATA(rD0, 0)
	xor		rW1,rD1,rW1
	LOAD_DATA(rD1, 4)
	xor		rW2,rD2,rW2
	LOAD_DATA(rD2, 8)
	xor		rW3,rD3,rW3
	LOAD_DATA(rD3, 12)
	xor		rW0,rW0,rD0
	SAVE_DATA(rW0, 0)
	xor		rW1,rW1,rD1
	SAVE_DATA(rW1, 4)
	xor		rW2,rW2,rD2
	SAVE_DATA(rW2, 8)
	xor		rW3,rW3,rD3
	SAVE_DATA(rW3, 12)
	cmpwi		rLN,15
	subi		rDP,rDP,CBC_DEC
	bt		gt,ppc_decrypt_cbc_loop
ppc_decrypt_cbc_end:
	mr		rKP,rKS
	START_KEY(rD0, rD1, rD2, rD3)
	bl		ppc_decrypt_block
	xor		rW0,rW0,rD0
	xor		rW1,rW1,rD1
	xor		rW2,rW2,rD2
	xor		rW3,rW3,rD3
	xor		rW0,rW0,rI0	/* decrypt with initial IV	*/
	SAVE_DATA(rW0, 0)
	xor		rW1,rW1,rI1
	SAVE_DATA(rW1, 4)
	xor		rW2,rW2,rI2
	SAVE_DATA(rW2, 8)
	xor		rW3,rW3,rI3
	SAVE_DATA(rW3, 12)
	FINALIZE_CRYPT(4)
	blr

/*
 * ppc_crypt_ctr(u8 *out, const u8 *in, u32 *key_enc,
 *		 u32 rounds, u32 bytes, u8 *iv);
 *
 * called from glue layer to encrypt/decrypt multiple blocks
 * via CTR. Number of bytes does not need to be a multiple of
 * 16. Round values are AES128 = 4, AES192 = 5, AES256 = 6
 *
 */
_GLOBAL(ppc_crypt_ctr)
	INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 4)
	LOAD_IV(rI0, 0)
	LOAD_IV(rI1, 4)
	LOAD_IV(rI2, 8)
	cmpwi		rLN,16
	LOAD_IV(rI3, 12)
	START_IV
	bt		lt,ppc_crypt_ctr_partial
ppc_crypt_ctr_loop:
	mr		rKP,rKS
	START_KEY(rI0, rI1, rI2, rI3)
	bl		ppc_encrypt_block
	xor		rW0,rD0,rW0
	xor		rW1,rD1,rW1
	xor		rW2,rD2,rW2
	xor		rW3,rD3,rW3
	LOAD_DATA(rD0, 0)
	subi		rLN,rLN,16
	LOAD_DATA(rD1, 4)
	LOAD_DATA(rD2, 8)
	LOAD_DATA(rD3, 12)
	xor		rD0,rD0,rW0
	SAVE_DATA(rD0, 0)
	xor		rD1,rD1,rW1
	SAVE_DATA(rD1, 4)
	xor		rD2,rD2,rW2
	SAVE_DATA(rD2, 8)
	xor		rD3,rD3,rW3
	SAVE_DATA(rD3, 12)
	addic		rI3,rI3,1	/* increase counter			*/
	addze		rI2,rI2
	addze		rI1,rI1
	addze		rI0,rI0
	NEXT_BLOCK
	cmpwi		rLN,15
	bt		gt,ppc_crypt_ctr_loop
ppc_crypt_ctr_partial:
	cmpwi		rLN,0
	bt		eq,ppc_crypt_ctr_end
	mr		rKP,rKS
	START_KEY(rI0, rI1, rI2, rI3)
	bl		ppc_encrypt_block
	xor		rW0,rD0,rW0
	SAVE_IV(rW0, 0)
	xor		rW1,rD1,rW1
	SAVE_IV(rW1, 4)
	xor		rW2,rD2,rW2
	SAVE_IV(rW2, 8)
	xor		rW3,rD3,rW3
	SAVE_IV(rW3, 12)
	mtctr		rLN
	subi		rIP,rIP,CTR_DEC
	subi		rSP,rSP,1
	subi		rDP,rDP,1
ppc_crypt_ctr_xorbyte:
	lbzu		rW4,1(rIP)	/* bytewise xor for partial block	*/
	lbzu		rW5,1(rSP)
	xor		rW4,rW4,rW5
	stbu		rW4,1(rDP)
	bdnz		ppc_crypt_ctr_xorbyte
	subf		rIP,rLN,rIP
	addi		rIP,rIP,1
	addic		rI3,rI3,1
	addze		rI2,rI2
	addze		rI1,rI1
	addze		rI0,rI0
ppc_crypt_ctr_end:
	SAVE_IV(rI0, 0)
	SAVE_IV(rI1, 4)
	SAVE_IV(rI2, 8)
	SAVE_IV(rI3, 12)
	FINALIZE_CRYPT(4)
	blr

/*
 * ppc_encrypt_xts(u8 *out, const u8 *in, u32 *key_enc,
 *		   u32 rounds, u32 bytes, u8 *iv, u32 *key_twk);
 *
 * called from glue layer to encrypt multiple blocks via XTS
 * If key_twk is given, the initial IV encryption will be
 * processed too. Round values are AES128 = 4, AES192 = 5,
 * AES256 = 6
 *
 */
_GLOBAL(ppc_encrypt_xts)
	INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 8)
	LOAD_IV(rI0, 0)
	LOAD_IV(rI1, 4)
	LOAD_IV(rI2, 8)
	cmpwi		rKT,0
	LOAD_IV(rI3, 12)
	bt		eq,ppc_encrypt_xts_notweak
	mr		rKP,rKT
	START_KEY(rI0, rI1, rI2, rI3)
	bl		ppc_encrypt_block
	xor		rI0,rD0,rW0
	xor		rI1,rD1,rW1
	xor		rI2,rD2,rW2
	xor		rI3,rD3,rW3
ppc_encrypt_xts_notweak:
	ENDIAN_SWAP(rG0, rG1, rI0, rI1)
	ENDIAN_SWAP(rG2, rG3, rI2, rI3)
ppc_encrypt_xts_loop:
	LOAD_DATA(rD0, 0)
	mr		rKP,rKS
	LOAD_DATA(rD1, 4)
	subi		rLN,rLN,16
	LOAD_DATA(rD2, 8)
	LOAD_DATA(rD3, 12)
	xor		rD0,rD0,rI0
	xor		rD1,rD1,rI1
	xor		rD2,rD2,rI2
	xor		rD3,rD3,rI3
	START_KEY(rD0, rD1, rD2, rD3)
	bl		ppc_encrypt_block
	xor		rD0,rD0,rW0
	xor		rD1,rD1,rW1
	xor		rD2,rD2,rW2
	xor		rD3,rD3,rW3
	xor		rD0,rD0,rI0
	SAVE_DATA(rD0, 0)
	xor		rD1,rD1,rI1
	SAVE_DATA(rD1, 4)
	xor		rD2,rD2,rI2
	SAVE_DATA(rD2, 8)
	xor		rD3,rD3,rI3
	SAVE_DATA(rD3, 12)
	GF128_MUL(rG0, rG1, rG2, rG3, rW0)
	ENDIAN_SWAP(rI0, rI1, rG0, rG1)
	ENDIAN_SWAP(rI2, rI3, rG2, rG3)
	cmpwi		rLN,0
	NEXT_BLOCK
	bt		gt,ppc_encrypt_xts_loop
	START_IV
	SAVE_IV(rI0, 0)
	SAVE_IV(rI1, 4)
	SAVE_IV(rI2, 8)
	SAVE_IV(rI3, 12)
	FINALIZE_CRYPT(8)
	blr

/*
 * ppc_decrypt_xts(u8 *out, const u8 *in, u32 *key_dec,
 *		   u32 rounds, u32 blocks, u8 *iv, u32 *key_twk);
 *
 * called from glue layer to decrypt multiple blocks via XTS
 * If key_twk is given, the initial IV encryption will be
 * processed too. Round values are AES128 = 4, AES192 = 5,
 * AES256 = 6
 *
 */
_GLOBAL(ppc_decrypt_xts)
	INITIALIZE_CRYPT(PPC_AES_4K_DECTAB, 8)
	LOAD_IV(rI0, 0)
	addi		rT1,rT0,4096
	LOAD_IV(rI1, 4)
	LOAD_IV(rI2, 8)
	cmpwi		rKT,0
	LOAD_IV(rI3, 12)
	bt		eq,ppc_decrypt_xts_notweak
	subi		rT0,rT0,4096
	mr		rKP,rKT
	START_KEY(rI0, rI1, rI2, rI3)
	bl		ppc_encrypt_block
	xor		rI0,rD0,rW0
	xor		rI1,rD1,rW1
	xor		rI2,rD2,rW2
	xor		rI3,rD3,rW3
	addi		rT0,rT0,4096
ppc_decrypt_xts_notweak:
	ENDIAN_SWAP(rG0, rG1, rI0, rI1)
	ENDIAN_SWAP(rG2, rG3, rI2, rI3)
ppc_decrypt_xts_loop:
	LOAD_DATA(rD0, 0)
	mr		rKP,rKS
	LOAD_DATA(rD1, 4)
	subi		rLN,rLN,16
	LOAD_DATA(rD2, 8)
	LOAD_DATA(rD3, 12)
	xor		rD0,rD0,rI0
	xor		rD1,rD1,rI1
	xor		rD2,rD2,rI2
	xor		rD3,rD3,rI3
	START_KEY(rD0, rD1, rD2, rD3)
	bl		ppc_decrypt_block
	xor		rD0,rD0,rW0
	xor		rD1,rD1,rW1
	xor		rD2,rD2,rW2
	xor		rD3,rD3,rW3
	xor		rD0,rD0,rI0
	SAVE_DATA(rD0, 0)
	xor		rD1,rD1,rI1
	SAVE_DATA(rD1, 4)
	xor		rD2,rD2,rI2
	SAVE_DATA(rD2, 8)
	xor		rD3,rD3,rI3
	SAVE_DATA(rD3, 12)
	GF128_MUL(rG0, rG1, rG2, rG3, rW0)
	ENDIAN_SWAP(rI0, rI1, rG0, rG1)
	ENDIAN_SWAP(rI2, rI3, rG2, rG3)
	cmpwi		rLN,0
	NEXT_BLOCK
	bt		gt,ppc_decrypt_xts_loop
	START_IV
	SAVE_IV(rI0, 0)
	SAVE_IV(rI1, 4)
	SAVE_IV(rI2, 8)
	SAVE_IV(rI3, 12)
	FINALIZE_CRYPT(8)
	blr