twofish_common.c | Explore in Territory

// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * Common Twofish algorithm parts shared between the c and assembler
 * implementations
 *
 * Originally Twofish for GPG
 * By Matthew Skala <[email protected]>, July 26, 1998
 * 256-bit key length added March 20, 1999
 * Some modifications to reduce the text size by Werner Koch, April, 1998
 * Ported to the kerneli patch by Marc Mutz <[email protected]>
 * Ported to CryptoAPI by Colin Slater <[email protected]>
 *
 * The original author has disclaimed all copyright interest in this
 * code and thus put it in the public domain. The subsequent authors
 * have put this under the GNU General Public License.
 *
 * This code is a "clean room" implementation, written from the paper
 * _Twofish: A 128-Bit Block Cipher_ by Bruce Schneier, John Kelsey,
 * Doug Whiting, David Wagner, Chris Hall, and Niels Ferguson, available
 * through http://www.counterpane.com/twofish.html
 *
 * For background information on multiplication in finite fields, used for
 * the matrix operations in the key schedule, see the book _Contemporary
 * Abstract Algebra_ by Joseph A. Gallian, especially chapter 22 in the
 * Third Edition.
 */

#include <crypto/algapi.h>
#include <crypto/twofish.h>
#include <linux/bitops.h>
#include <linux/errno.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/types.h>


/* The large precomputed tables for the Twofish cipher (twofish.c)
 * Taken from the same source as twofish.c
 * Marc Mutz <[email protected]>
 */

/* These two tables are the q0 and q1 permutations, exactly as described in
 * the Twofish paper. */

static const u8 q0[256] = …;

static const u8 q1[256] = …;

/* These MDS tables are actually tables of MDS composed with q0 and q1,
 * because it is only ever used that way and we can save some time by
 * precomputing.  Of course the main saving comes from precomputing the
 * GF(2^8) multiplication involved in the MDS matrix multiply; by looking
 * things up in these tables we reduce the matrix multiply to four lookups
 * and three XORs.  Semi-formally, the definition of these tables is:
 * mds[0][i] = MDS (q1[i] 0 0 0)^T  mds[1][i] = MDS (0 q0[i] 0 0)^T
 * mds[2][i] = MDS (0 0 q1[i] 0)^T  mds[3][i] = MDS (0 0 0 q0[i])^T
 * where ^T means "transpose", the matrix multiply is performed in GF(2^8)
 * represented as GF(2)[x]/v(x) where v(x)=x^8+x^6+x^5+x^3+1 as described
 * by Schneier et al, and I'm casually glossing over the byte/word
 * conversion issues. */

static const u32 mds[4][256] = …;

/* The exp_to_poly and poly_to_exp tables are used to perform efficient
 * operations in GF(2^8) represented as GF(2)[x]/w(x) where
 * w(x)=x^8+x^6+x^3+x^2+1.  We care about doing that because it's part of the
 * definition of the RS matrix in the key schedule.  Elements of that field
 * are polynomials of degree not greater than 7 and all coefficients 0 or 1,
 * which can be represented naturally by bytes (just substitute x=2).  In that
 * form, GF(2^8) addition is the same as bitwise XOR, but GF(2^8)
 * multiplication is inefficient without hardware support.  To multiply
 * faster, I make use of the fact x is a generator for the nonzero elements,
 * so that every element p of GF(2)[x]/w(x) is either 0 or equal to (x)^n for
 * some n in 0..254.  Note that caret is exponentiation in GF(2^8),
 * *not* polynomial notation.  So if I want to compute pq where p and q are
 * in GF(2^8), I can just say:
 *    1. if p=0 or q=0 then pq=0
 *    2. otherwise, find m and n such that p=x^m and q=x^n
 *    3. pq=(x^m)(x^n)=x^(m+n), so add m and n and find pq
 * The translations in steps 2 and 3 are looked up in the tables
 * poly_to_exp (for step 2) and exp_to_poly (for step 3).  To see this
 * in action, look at the CALC_S macro.  As additional wrinkles, note that
 * one of my operands is always a constant, so the poly_to_exp lookup on it
 * is done in advance; I included the original values in the comments so
 * readers can have some chance of recognizing that this *is* the RS matrix
 * from the Twofish paper.  I've only included the table entries I actually
 * need; I never do a lookup on a variable input of zero and the biggest
 * exponents I'll ever see are 254 (variable) and 237 (constant), so they'll
 * never sum to more than 491.	I'm repeating part of the exp_to_poly table
 * so that I don't have to do mod-255 reduction in the exponent arithmetic.
 * Since I know my constant operands are never zero, I only have to worry
 * about zero values in the variable operand, and I do it with a simple
 * conditional branch.	I know conditionals are expensive, but I couldn't
 * see a non-horrible way of avoiding them, and I did manage to group the
 * statements so that each if covers four group multiplications. */

static const u8 poly_to_exp[255] = …;

static const u8 exp_to_poly[492] = …;


/* The table constants are indices of
 * S-box entries, preprocessed through q0 and q1. */
static const u8 calc_sb_tbl[512] = …;

/* Macro to perform one column of the RS matrix multiplication.  The
 * parameters a, b, c, and d are the four bytes of output; i is the index
 * of the key bytes, and w, x, y, and z, are the column of constants from
 * the RS matrix, preprocessed through the poly_to_exp table. */

#define CALC_S(a, b, c, d, i, w, x, y, z) …

/* Macros to calculate the key-dependent S-boxes for a 128-bit key using
 * the S vector from CALC_S.  CALC_SB_2 computes a single entry in all
 * four S-boxes, where i is the index of the entry to compute, and a and b
 * are the index numbers preprocessed through the q0 and q1 tables
 * respectively. */

#define CALC_SB_2(i, a, b) …

/* Macro exactly like CALC_SB_2, but for 192-bit keys. */

#define CALC_SB192_2(i, a, b) …

/* Macro exactly like CALC_SB_2, but for 256-bit keys. */

#define CALC_SB256_2(i, a, b) …

/* Macros to calculate the whitening and round subkeys.  CALC_K_2 computes the
 * last two stages of the h() function for a given index (either 2i or 2i+1).
 * a, b, c, and d are the four bytes going into the last two stages.  For
 * 128-bit keys, this is the entire h() function and a and c are the index
 * preprocessed through q0 and q1 respectively; for longer keys they are the
 * output of previous stages.  j is the index of the first key byte to use.
 * CALC_K computes a pair of subkeys for 128-bit Twofish, by calling CALC_K_2
 * twice, doing the Pseudo-Hadamard Transform, and doing the necessary
 * rotations.  Its parameters are: a, the array to write the results into,
 * j, the index of the first output entry, k and l, the preprocessed indices
 * for index 2i, and m and n, the preprocessed indices for index 2i+1.
 * CALC_K192_2 expands CALC_K_2 to handle 192-bit keys, by doing an
 * additional lookup-and-XOR stage.  The parameters a, b, c and d are the
 * four bytes going into the last three stages.  For 192-bit keys, c = d
 * are the index preprocessed through q0, and a = b are the index
 * preprocessed through q1; j is the index of the first key byte to use.
 * CALC_K192 is identical to CALC_K but for using the CALC_K192_2 macro
 * instead of CALC_K_2.
 * CALC_K256_2 expands CALC_K192_2 to handle 256-bit keys, by doing an
 * additional lookup-and-XOR stage.  The parameters a and b are the index
 * preprocessed through q0 and q1 respectively; j is the index of the first
 * key byte to use.  CALC_K256 is identical to CALC_K but for using the
 * CALC_K256_2 macro instead of CALC_K_2. */

#define CALC_K_2(a, b, c, d, j) …

#define CALC_K(a, j, k, l, m, n) …

#define CALC_K192_2(a, b, c, d, j) …

#define CALC_K192(a, j, k, l, m, n) …

#define CALC_K256_2(a, b, j) …

#define CALC_K256(a, j, k, l, m, n) …

/* Perform the key setup. */
int __twofish_setkey(struct twofish_ctx *ctx, const u8 *key,
		     unsigned int key_len)
{ … }
EXPORT_SYMBOL_GPL(…);

int twofish_setkey(struct crypto_tfm *tfm, const u8 *key, unsigned int key_len)
{ … }
EXPORT_SYMBOL_GPL(…);

MODULE_LICENSE(…) …;
MODULE_DESCRIPTION(…) …;
linux/crypto/twofish_common.c