/* Copyright (c) 2023, Google Inc. * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #define OPENSSL_UNSTABLE_EXPERIMENTAL_KYBER #include <openssl/experimental/kyber.h> #include <assert.h> #include <stdlib.h> #include <openssl/bytestring.h> #include <openssl/rand.h> #include "../internal.h" #include "../keccak/internal.h" #include "./internal.h" // See // https://pq-crystals.org/kyber/data/kyber-specification-round3-20210804.pdf static void prf(uint8_t *out, size_t out_len, const uint8_t in[33]) { … } static void hash_h(uint8_t out[32], const uint8_t *in, size_t len) { … } static void hash_g(uint8_t out[64], const uint8_t *in, size_t len) { … } static void kdf(uint8_t *out, size_t out_len, const uint8_t *in, size_t len) { … } #define DEGREE … #define RANK … static const size_t kBarrettMultiplier = …; static const unsigned kBarrettShift = …; static const uint16_t kPrime = …; static const int kLog2Prime = …; static const uint16_t kHalfPrime = …; static const int kDU = …; static const int kDV = …; // kInverseDegree is 128^-1 mod 3329; 128 because kPrime does not have a 512th // root of unity. static const uint16_t kInverseDegree = …; static const size_t kEncodedVectorSize = …; static const size_t kCompressedVectorSize = …/*kDU=*/10 * RANK * DEGREE / 8; scalar; vector; matrix; // This bit of Python will be referenced in some of the following comments: // // p = 3329 // // def bitreverse(i): // ret = 0 // for n in range(7): // bit = i & 1 // ret <<= 1 // ret |= bit // i >>= 1 // return ret // kNTTRoots = [pow(17, bitreverse(i), p) for i in range(128)] static const uint16_t kNTTRoots[128] = …; // kInverseNTTRoots = [pow(17, -bitreverse(i), p) for i in range(128)] static const uint16_t kInverseNTTRoots[128] = …; // kModRoots = [pow(17, 2*bitreverse(i) + 1, p) for i in range(128)] static const uint16_t kModRoots[128] = …; // reduce_once reduces 0 <= x < 2*kPrime, mod kPrime. static uint16_t reduce_once(uint16_t x) { … } // constant time reduce x mod kPrime using Barrett reduction. x must be less // than kPrime + 2×kPrime². static uint16_t reduce(uint32_t x) { … } static void scalar_zero(scalar *out) { … } static void vector_zero(vector *out) { … } // In place number theoretic transform of a given scalar. // Note that Kyber's kPrime 3329 does not have a 512th root of unity, so this // transform leaves off the last iteration of the usual FFT code, with the 128 // relevant roots of unity being stored in |kNTTRoots|. This means the output // should be seen as 128 elements in GF(3329^2), with the coefficients of the // elements being consecutive entries in |s->c|. static void scalar_ntt(scalar *s) { … } static void vector_ntt(vector *a) { … } // In place inverse number theoretic transform of a given scalar, with pairs of // entries of s->v being interpreted as elements of GF(3329^2). Just as with the // number theoretic transform, this leaves off the first step of the normal iFFT // to account for the fact that 3329 does not have a 512th root of unity, using // the precomputed 128 roots of unity stored in |kInverseNTTRoots|. static void scalar_inverse_ntt(scalar *s) { … } static void vector_inverse_ntt(vector *a) { … } static void scalar_add(scalar *lhs, const scalar *rhs) { … } static void scalar_sub(scalar *lhs, const scalar *rhs) { … } // Multiplying two scalars in the number theoretically transformed state. Since // 3329 does not have a 512th root of unity, this means we have to interpret // the 2*ith and (2*i+1)th entries of the scalar as elements of GF(3329)[X]/(X^2 // - 17^(2*bitreverse(i)+1)) The value of 17^(2*bitreverse(i)+1) mod 3329 is // stored in the precomputed |kModRoots| table. Note that our Barrett transform // only allows us to multipy two reduced numbers together, so we need some // intermediate reduction steps, even if an uint64_t could hold 3 multiplied // numbers. static void scalar_mult(scalar *out, const scalar *lhs, const scalar *rhs) { … } static void vector_add(vector *lhs, const vector *rhs) { … } static void matrix_mult(vector *out, const matrix *m, const vector *a) { … } static void matrix_mult_transpose(vector *out, const matrix *m, const vector *a) { … } static void scalar_inner_product(scalar *out, const vector *lhs, const vector *rhs) { … } // Algorithm 1 of the Kyber spec. Rejection samples a Keccak stream to get // uniformly distributed elements. This is used for matrix expansion and only // operates on public inputs. static void scalar_from_keccak_vartime(scalar *out, struct BORINGSSL_keccak_st *keccak_ctx) { … } // Algorithm 2 of the Kyber spec, with eta fixed to two and the PRF call // included. Creates binominally distributed elements by sampling 2*|eta| bits, // and setting the coefficient to the count of the first bits minus the count of // the second bits, resulting in a centered binomial distribution. Since eta is // two this gives -2/2 with a probability of 1/16, -1/1 with probability 1/4, // and 0 with probability 3/8. static void scalar_centered_binomial_distribution_eta_2_with_prf( scalar *out, const uint8_t input[33]) { … } // Generates a secret vector by using // |scalar_centered_binomial_distribution_eta_2_with_prf|, using the given seed // appending and incrementing |counter| for entry of the vector. static void vector_generate_secret_eta_2(vector *out, uint8_t *counter, const uint8_t seed[32]) { … } // Expands the matrix of a seed for key generation and for encaps-CPA. static void matrix_expand(matrix *out, const uint8_t rho[32]) { … } static const uint8_t kMasks[8] = …; static void scalar_encode(uint8_t *out, const scalar *s, int bits) { … } // scalar_encode_1 is |scalar_encode| specialised for |bits| == 1. static void scalar_encode_1(uint8_t out[32], const scalar *s) { … } // Encodes an entire vector into 32*|RANK|*|bits| bytes. Note that since 256 // (DEGREE) is divisible by 8, the individual vector entries will always fill a // whole number of bytes, so we do not need to worry about bit packing here. static void vector_encode(uint8_t *out, const vector *a, int bits) { … } // scalar_decode parses |DEGREE * bits| bits from |in| into |DEGREE| values in // |out|. It returns one on success and zero if any parsed value is >= // |kPrime|. static int scalar_decode(scalar *out, const uint8_t *in, int bits) { … } // scalar_decode_1 is |scalar_decode| specialised for |bits| == 1. static void scalar_decode_1(scalar *out, const uint8_t in[32]) { … } // Decodes 32*|RANK|*|bits| bytes from |in| into |out|. It returns one on // success or zero if any parsed value is >= |kPrime|. static int vector_decode(vector *out, const uint8_t *in, int bits) { … } // Compresses (lossily) an input |x| mod 3329 into |bits| many bits by grouping // numbers close to each other together. The formula used is // round(2^|bits|/kPrime*x) mod 2^|bits|. // Uses Barrett reduction to achieve constant time. Since we need both the // remainder (for rounding) and the quotient (as the result), we cannot use // |reduce| here, but need to do the Barrett reduction directly. static uint16_t compress(uint16_t x, int bits) { … } // Decompresses |x| by using an equi-distant representative. The formula is // round(kPrime/2^|bits|*x). Note that 2^|bits| being the divisor allows us to // implement this logic using only bit operations. static uint16_t decompress(uint16_t x, int bits) { … } static void scalar_compress(scalar *s, int bits) { … } static void scalar_decompress(scalar *s, int bits) { … } static void vector_compress(vector *a, int bits) { … } static void vector_decompress(vector *a, int bits) { … } struct public_key { … }; static struct public_key *public_key_from_external( const struct KYBER_public_key *external) { … } struct private_key { … }; static struct private_key *private_key_from_external( const struct KYBER_private_key *external) { … } // Calls |KYBER_generate_key_external_entropy| with random bytes from // |RAND_bytes|. void KYBER_generate_key(uint8_t out_encoded_public_key[KYBER_PUBLIC_KEY_BYTES], struct KYBER_private_key *out_private_key) { … } static int kyber_marshal_public_key(CBB *out, const struct public_key *pub) { … } // Algorithms 4 and 7 of the Kyber spec. Algorithms are combined since key // generation is not part of the FO transform, and the spec uses Algorithm 7 to // specify the actual key format. void KYBER_generate_key_external_entropy( uint8_t out_encoded_public_key[KYBER_PUBLIC_KEY_BYTES], struct KYBER_private_key *out_private_key, const uint8_t entropy[KYBER_GENERATE_KEY_ENTROPY]) { … } void KYBER_public_from_private(struct KYBER_public_key *out_public_key, const struct KYBER_private_key *private_key) { … } // Algorithm 5 of the Kyber spec. Encrypts a message with given randomness to // the ciphertext in |out|. Without applying the Fujisaki-Okamoto transform this // would not result in a CCA secure scheme, since lattice schemes are vulnerable // to decryption failure oracles. static void encrypt_cpa(uint8_t out[KYBER_CIPHERTEXT_BYTES], const struct public_key *pub, const uint8_t message[32], const uint8_t randomness[32]) { … } // Calls KYBER_encap_external_entropy| with random bytes from |RAND_bytes| void KYBER_encap(uint8_t out_ciphertext[KYBER_CIPHERTEXT_BYTES], uint8_t out_shared_secret[KYBER_SHARED_SECRET_BYTES], const struct KYBER_public_key *public_key) { … } // Algorithm 8 of the Kyber spec, safe for line 2 of the spec. The spec there // hashes the output of the system's random number generator, since the FO // transform will reveal it to the decrypting party. There is no reason to do // this when a secure random number generator is used. When an insecure random // number generator is used, the caller should switch to a secure one before // calling this method. void KYBER_encap_external_entropy( uint8_t out_ciphertext[KYBER_CIPHERTEXT_BYTES], uint8_t out_shared_secret[KYBER_SHARED_SECRET_BYTES], const struct KYBER_public_key *public_key, const uint8_t entropy[KYBER_ENCAP_ENTROPY]) { … } // Algorithm 6 of the Kyber spec. static void decrypt_cpa(uint8_t out[32], const struct private_key *priv, const uint8_t ciphertext[KYBER_CIPHERTEXT_BYTES]) { … } // Algorithm 9 of the Kyber spec, performing the FO transform by running // encrypt_cpa on the decrypted message. The spec does not allow the decryption // failure to be passed on to the caller, and instead returns a result that is // deterministic but unpredictable to anyone without knowledge of the private // key. void KYBER_decap(uint8_t out_shared_secret[KYBER_SHARED_SECRET_BYTES], const uint8_t ciphertext[KYBER_CIPHERTEXT_BYTES], const struct KYBER_private_key *private_key) { … } int KYBER_marshal_public_key(CBB *out, const struct KYBER_public_key *public_key) { … } // kyber_parse_public_key_no_hash parses |in| into |pub| but doesn't calculate // the value of |pub->public_key_hash|. static int kyber_parse_public_key_no_hash(struct public_key *pub, CBS *in) { … } int KYBER_parse_public_key(struct KYBER_public_key *public_key, CBS *in) { … } int KYBER_marshal_private_key(CBB *out, const struct KYBER_private_key *private_key) { … } int KYBER_parse_private_key(struct KYBER_private_key *out_private_key, CBS *in) { … }