#ifndef LIBMPDEC_TYPEARITH_H_
#define LIBMPDEC_TYPEARITH_H_
#include "mpdecimal.h"
#include <assert.h>
#if defined(CONFIG_64)
#if defined(ANSI)
#if defined(HAVE_UINT128_T)
static inline void
_mpd_mul_words(mpd_uint_t *hi, mpd_uint_t *lo, mpd_uint_t a, mpd_uint_t b)
{ … }
static inline void
_mpd_div_words(mpd_uint_t *q, mpd_uint_t *r, mpd_uint_t hi, mpd_uint_t lo,
mpd_uint_t d)
{ … }
#else
static inline void
_mpd_mul_words(mpd_uint_t *hi, mpd_uint_t *lo, mpd_uint_t a, mpd_uint_t b)
{
uint32_t w[4], carry;
uint32_t ah, al, bh, bl;
uint64_t hl;
ah = (uint32_t)(a>>32); al = (uint32_t)a;
bh = (uint32_t)(b>>32); bl = (uint32_t)b;
hl = (uint64_t)al * bl;
w[0] = (uint32_t)hl;
carry = (uint32_t)(hl>>32);
hl = (uint64_t)ah * bl + carry;
w[1] = (uint32_t)hl;
w[2] = (uint32_t)(hl>>32);
hl = (uint64_t)al * bh + w[1];
w[1] = (uint32_t)hl;
carry = (uint32_t)(hl>>32);
hl = ((uint64_t)ah * bh + w[2]) + carry;
w[2] = (uint32_t)hl;
w[3] = (uint32_t)(hl>>32);
*hi = ((uint64_t)w[3]<<32) + w[2];
*lo = ((uint64_t)w[1]<<32) + w[0];
}
static inline int
nlz(uint64_t x)
{
int n;
if (x == 0) return(64);
n = 0;
if (x <= 0x00000000FFFFFFFF) {n = n +32; x = x <<32;}
if (x <= 0x0000FFFFFFFFFFFF) {n = n +16; x = x <<16;}
if (x <= 0x00FFFFFFFFFFFFFF) {n = n + 8; x = x << 8;}
if (x <= 0x0FFFFFFFFFFFFFFF) {n = n + 4; x = x << 4;}
if (x <= 0x3FFFFFFFFFFFFFFF) {n = n + 2; x = x << 2;}
if (x <= 0x7FFFFFFFFFFFFFFF) {n = n + 1;}
return n;
}
static inline void
_mpd_div_words(mpd_uint_t *q, mpd_uint_t *r, mpd_uint_t u1, mpd_uint_t u0,
mpd_uint_t v)
{
const mpd_uint_t b = 4294967296;
mpd_uint_t un1, un0,
vn1, vn0,
q1, q0,
un32, un21, un10,
rhat, t;
int s;
assert(u1 < v);
s = nlz(v);
v = v << s;
vn1 = v >> 32;
vn0 = v & 0xFFFFFFFF;
t = (s == 0) ? 0 : u0 >> (64 - s);
un32 = (u1 << s) | t;
un10 = u0 << s;
un1 = un10 >> 32;
un0 = un10 & 0xFFFFFFFF;
q1 = un32 / vn1;
rhat = un32 - q1*vn1;
again1:
if (q1 >= b || q1*vn0 > b*rhat + un1) {
q1 = q1 - 1;
rhat = rhat + vn1;
if (rhat < b) goto again1;
}
un21 = un32*b + un1 - q1*v;
q0 = un21 / vn1;
rhat = un21 - q0*vn1;
again2:
if (q0 >= b || q0*vn0 > b*rhat + un0) {
q0 = q0 - 1;
rhat = rhat + vn1;
if (rhat < b) goto again2;
}
*q = q1*b + q0;
*r = (un21*b + un0 - q0*v) >> s;
}
#endif
#elif defined(ASM)
static inline void
_mpd_mul_words(mpd_uint_t *hi, mpd_uint_t *lo, mpd_uint_t a, mpd_uint_t b)
{
mpd_uint_t h, l;
__asm__ ( "mulq %3\n\t"
: "=d" (h), "=a" (l)
: "%a" (a), "rm" (b)
: "cc"
);
*hi = h;
*lo = l;
}
static inline void
_mpd_div_words(mpd_uint_t *q, mpd_uint_t *r, mpd_uint_t hi, mpd_uint_t lo,
mpd_uint_t d)
{
mpd_uint_t qq, rr;
__asm__ ( "divq %4\n\t"
: "=a" (qq), "=d" (rr)
: "a" (lo), "d" (hi), "rm" (d)
: "cc"
);
*q = qq;
*r = rr;
}
#elif defined(MASM)
#include <intrin.h>
#pragma intrinsic(_umul128)
static inline void
_mpd_mul_words(mpd_uint_t *hi, mpd_uint_t *lo, mpd_uint_t a, mpd_uint_t b)
{
*lo = _umul128(a, b, hi);
}
void _mpd_div_words(mpd_uint_t *q, mpd_uint_t *r, mpd_uint_t hi, mpd_uint_t lo,
mpd_uint_t d);
#else
#error "need platform specific 128 bit multiplication and division"
#endif
#define DIVMOD(q, r, v, d) …
static inline void
_mpd_divmod_pow10(mpd_uint_t *q, mpd_uint_t *r, mpd_uint_t v, mpd_uint_t exp)
{ … }
#elif defined(CONFIG_32)
#if defined(ANSI)
#if !defined(LEGACY_COMPILER)
static inline void
_mpd_mul_words(mpd_uint_t *hi, mpd_uint_t *lo, mpd_uint_t a, mpd_uint_t b)
{
mpd_uuint_t hl;
hl = (mpd_uuint_t)a * b;
*hi = hl >> 32;
*lo = (mpd_uint_t)hl;
}
static inline void
_mpd_div_words(mpd_uint_t *q, mpd_uint_t *r, mpd_uint_t hi, mpd_uint_t lo,
mpd_uint_t d)
{
mpd_uuint_t hl;
hl = ((mpd_uuint_t)hi<<32) + lo;
*q = (mpd_uint_t)(hl / d);
*r = (mpd_uint_t)(hl - (mpd_uuint_t)(*q) * d);
}
#else
static inline void
_mpd_mul_words(mpd_uint_t *hi, mpd_uint_t *lo, mpd_uint_t a, mpd_uint_t b)
{
uint16_t w[4], carry;
uint16_t ah, al, bh, bl;
uint32_t hl;
ah = (uint16_t)(a>>16); al = (uint16_t)a;
bh = (uint16_t)(b>>16); bl = (uint16_t)b;
hl = (uint32_t)al * bl;
w[0] = (uint16_t)hl;
carry = (uint16_t)(hl>>16);
hl = (uint32_t)ah * bl + carry;
w[1] = (uint16_t)hl;
w[2] = (uint16_t)(hl>>16);
hl = (uint32_t)al * bh + w[1];
w[1] = (uint16_t)hl;
carry = (uint16_t)(hl>>16);
hl = ((uint32_t)ah * bh + w[2]) + carry;
w[2] = (uint16_t)hl;
w[3] = (uint16_t)(hl>>16);
*hi = ((uint32_t)w[3]<<16) + w[2];
*lo = ((uint32_t)w[1]<<16) + w[0];
}
static inline int
nlz(uint32_t x)
{
int n;
if (x == 0) return(32);
n = 0;
if (x <= 0x0000FFFF) {n = n +16; x = x <<16;}
if (x <= 0x00FFFFFF) {n = n + 8; x = x << 8;}
if (x <= 0x0FFFFFFF) {n = n + 4; x = x << 4;}
if (x <= 0x3FFFFFFF) {n = n + 2; x = x << 2;}
if (x <= 0x7FFFFFFF) {n = n + 1;}
return n;
}
static inline void
_mpd_div_words(mpd_uint_t *q, mpd_uint_t *r, mpd_uint_t u1, mpd_uint_t u0,
mpd_uint_t v)
{
const mpd_uint_t b = 65536;
mpd_uint_t un1, un0,
vn1, vn0,
q1, q0,
un32, un21, un10,
rhat, t;
int s;
assert(u1 < v);
s = nlz(v);
v = v << s;
vn1 = v >> 16;
vn0 = v & 0xFFFF;
t = (s == 0) ? 0 : u0 >> (32 - s);
un32 = (u1 << s) | t;
un10 = u0 << s;
un1 = un10 >> 16;
un0 = un10 & 0xFFFF;
q1 = un32 / vn1;
rhat = un32 - q1*vn1;
again1:
if (q1 >= b || q1*vn0 > b*rhat + un1) {
q1 = q1 - 1;
rhat = rhat + vn1;
if (rhat < b) goto again1;
}
un21 = un32*b + un1 - q1*v;
q0 = un21 / vn1;
rhat = un21 - q0*vn1;
again2:
if (q0 >= b || q0*vn0 > b*rhat + un0) {
q0 = q0 - 1;
rhat = rhat + vn1;
if (rhat < b) goto again2;
}
*q = q1*b + q0;
*r = (un21*b + un0 - q0*v) >> s;
}
#endif
#elif defined(ASM)
static inline void
_mpd_mul_words(mpd_uint_t *hi, mpd_uint_t *lo, mpd_uint_t a, mpd_uint_t b)
{
mpd_uint_t h, l;
__asm__ ( "mull %3\n\t"
: "=d" (h), "=a" (l)
: "%a" (a), "rm" (b)
: "cc"
);
*hi = h;
*lo = l;
}
static inline void
_mpd_div_words(mpd_uint_t *q, mpd_uint_t *r, mpd_uint_t hi, mpd_uint_t lo,
mpd_uint_t d)
{
mpd_uint_t qq, rr;
__asm__ ( "divl %4\n\t"
: "=a" (qq), "=d" (rr)
: "a" (lo), "d" (hi), "rm" (d)
: "cc"
);
*q = qq;
*r = rr;
}
#elif defined(MASM)
static inline void __cdecl
_mpd_mul_words(mpd_uint_t *hi, mpd_uint_t *lo, mpd_uint_t a, mpd_uint_t b)
{
mpd_uint_t h, l;
__asm {
mov eax, a
mul b
mov h, edx
mov l, eax
}
*hi = h;
*lo = l;
}
static inline void __cdecl
_mpd_div_words(mpd_uint_t *q, mpd_uint_t *r, mpd_uint_t hi, mpd_uint_t lo,
mpd_uint_t d)
{
mpd_uint_t qq, rr;
__asm {
mov eax, lo
mov edx, hi
div d
mov qq, eax
mov rr, edx
}
*q = qq;
*r = rr;
}
#else
#error "need platform specific 64 bit multiplication and division"
#endif
#define DIVMOD …
static inline void
_mpd_divmod_pow10(mpd_uint_t *q, mpd_uint_t *r, mpd_uint_t v, mpd_uint_t exp)
{
assert(exp <= 9);
if (exp <= 4) {
switch (exp) {
case 0: *q = v; *r = 0; break;
case 1: DIVMOD(q, r, v, 10UL); break;
case 2: DIVMOD(q, r, v, 100UL); break;
case 3: DIVMOD(q, r, v, 1000UL); break;
case 4: DIVMOD(q, r, v, 10000UL); break;
}
}
else {
switch (exp) {
case 5: DIVMOD(q, r, v, 100000UL); break;
case 6: DIVMOD(q, r, v, 1000000UL); break;
case 7: DIVMOD(q, r, v, 10000000UL); break;
case 8: DIVMOD(q, r, v, 100000000UL); break;
case 9: DIVMOD(q, r, v, 1000000000UL); break;
}
}
}
#else
#error "define CONFIG_64 or CONFIG_32"
#endif
static inline void
_mpd_div_word(mpd_uint_t *q, mpd_uint_t *r, mpd_uint_t v, mpd_uint_t d)
{ … }
static inline void
_mpd_idiv_word(mpd_ssize_t *q, mpd_ssize_t *r, mpd_ssize_t v, mpd_ssize_t d)
{ … }
static inline mpd_size_t
add_size_t(mpd_size_t a, mpd_size_t b)
{ … }
static inline mpd_size_t
sub_size_t(mpd_size_t a, mpd_size_t b)
{ … }
#if MPD_SIZE_MAX != MPD_UINT_MAX
#error "adapt mul_size_t() and mulmod_size_t()"
#endif
static inline mpd_size_t
mul_size_t(mpd_size_t a, mpd_size_t b)
{ … }
static inline mpd_size_t
add_size_t_overflow(mpd_size_t a, mpd_size_t b, mpd_size_t *overflow)
{ … }
static inline mpd_size_t
mul_size_t_overflow(mpd_size_t a, mpd_size_t b, mpd_size_t *overflow)
{ … }
static inline mpd_ssize_t
mod_mpd_ssize_t(mpd_ssize_t a, mpd_ssize_t m)
{ … }
static inline mpd_size_t
mulmod_size_t(mpd_size_t a, mpd_size_t b, mpd_size_t m)
{ … }
#endif