#include <stdio.h>
#include <stdlib.h>
#include <memory.h>
#include <math.h>
#include <assert.h>
#include "config/av1_rtcd.h"
#include "av1/common/av1_common_int.h"
#include "av1/common/warped_motion.h"
#include "av1/common/scale.h"
const int16_t av1_warped_filter[WARPEDPIXEL_PREC_SHIFTS * 3 + 1][8] = …;
#define DIV_LUT_PREC_BITS …
#define DIV_LUT_BITS …
#define DIV_LUT_NUM …
static const uint16_t div_lut[DIV_LUT_NUM + 1] = …;
static int16_t resolve_divisor_64(uint64_t D, int16_t *shift) { … }
static int16_t resolve_divisor_32(uint32_t D, int16_t *shift) { … }
static int is_affine_valid(const WarpedMotionParams *const wm) { … }
static int is_affine_shear_allowed(int16_t alpha, int16_t beta, int16_t gamma,
int16_t delta) { … }
#ifndef NDEBUG
static void check_model_consistency(WarpedMotionParams *wm) { … }
#endif
int av1_get_shear_params(WarpedMotionParams *wm) { … }
#if CONFIG_AV1_HIGHBITDEPTH
void av1_highbd_warp_affine_c(const int32_t *mat, const uint16_t *ref,
int width, int height, int stride, uint16_t *pred,
int p_col, int p_row, int p_width, int p_height,
int p_stride, int subsampling_x,
int subsampling_y, int bd,
ConvolveParams *conv_params, int16_t alpha,
int16_t beta, int16_t gamma, int16_t delta) {
int32_t tmp[15 * 8];
const int reduce_bits_horiz = conv_params->round_0;
const int reduce_bits_vert = conv_params->is_compound
? conv_params->round_1
: 2 * FILTER_BITS - reduce_bits_horiz;
const int max_bits_horiz = bd + FILTER_BITS + 1 - reduce_bits_horiz;
const int offset_bits_horiz = bd + FILTER_BITS - 1;
const int offset_bits_vert = bd + 2 * FILTER_BITS - reduce_bits_horiz;
const int round_bits =
2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
(void)max_bits_horiz;
assert(IMPLIES(conv_params->is_compound, conv_params->dst != NULL));
assert(bd + FILTER_BITS + 2 - conv_params->round_0 <= 16);
for (int i = p_row; i < p_row + p_height; i += 8) {
for (int j = p_col; j < p_col + p_width; j += 8) {
const int32_t src_x = (j + 4) << subsampling_x;
const int32_t src_y = (i + 4) << subsampling_y;
const int64_t dst_x =
(int64_t)mat[2] * src_x + (int64_t)mat[3] * src_y + (int64_t)mat[0];
const int64_t dst_y =
(int64_t)mat[4] * src_x + (int64_t)mat[5] * src_y + (int64_t)mat[1];
const int64_t x4 = dst_x >> subsampling_x;
const int64_t y4 = dst_y >> subsampling_y;
const int32_t ix4 = (int32_t)(x4 >> WARPEDMODEL_PREC_BITS);
int32_t sx4 = x4 & ((1 << WARPEDMODEL_PREC_BITS) - 1);
const int32_t iy4 = (int32_t)(y4 >> WARPEDMODEL_PREC_BITS);
int32_t sy4 = y4 & ((1 << WARPEDMODEL_PREC_BITS) - 1);
sx4 += alpha * (-4) + beta * (-4);
sy4 += gamma * (-4) + delta * (-4);
sx4 &= ~((1 << WARP_PARAM_REDUCE_BITS) - 1);
sy4 &= ~((1 << WARP_PARAM_REDUCE_BITS) - 1);
for (int k = -7; k < 8; ++k) {
const int iy = clamp(iy4 + k, 0, height - 1);
int sx = sx4 + beta * (k + 4);
for (int l = -4; l < 4; ++l) {
int ix = ix4 + l - 3;
const int offs = ROUND_POWER_OF_TWO(sx, WARPEDDIFF_PREC_BITS) +
WARPEDPIXEL_PREC_SHIFTS;
assert(offs >= 0 && offs <= WARPEDPIXEL_PREC_SHIFTS * 3);
const int16_t *coeffs = av1_warped_filter[offs];
int32_t sum = 1 << offset_bits_horiz;
for (int m = 0; m < 8; ++m) {
const int sample_x = clamp(ix + m, 0, width - 1);
sum += ref[iy * stride + sample_x] * coeffs[m];
}
sum = ROUND_POWER_OF_TWO(sum, reduce_bits_horiz);
assert(0 <= sum && sum < (1 << max_bits_horiz));
tmp[(k + 7) * 8 + (l + 4)] = sum;
sx += alpha;
}
}
for (int k = -4; k < AOMMIN(4, p_row + p_height - i - 4); ++k) {
int sy = sy4 + delta * (k + 4);
for (int l = -4; l < AOMMIN(4, p_col + p_width - j - 4); ++l) {
const int offs = ROUND_POWER_OF_TWO(sy, WARPEDDIFF_PREC_BITS) +
WARPEDPIXEL_PREC_SHIFTS;
assert(offs >= 0 && offs <= WARPEDPIXEL_PREC_SHIFTS * 3);
const int16_t *coeffs = av1_warped_filter[offs];
int32_t sum = 1 << offset_bits_vert;
for (int m = 0; m < 8; ++m) {
sum += tmp[(k + m + 4) * 8 + (l + 4)] * coeffs[m];
}
if (conv_params->is_compound) {
CONV_BUF_TYPE *p =
&conv_params
->dst[(i - p_row + k + 4) * conv_params->dst_stride +
(j - p_col + l + 4)];
sum = ROUND_POWER_OF_TWO(sum, reduce_bits_vert);
if (conv_params->do_average) {
uint16_t *dst16 =
&pred[(i - p_row + k + 4) * p_stride + (j - p_col + l + 4)];
int32_t tmp32 = *p;
if (conv_params->use_dist_wtd_comp_avg) {
tmp32 = tmp32 * conv_params->fwd_offset +
sum * conv_params->bck_offset;
tmp32 = tmp32 >> DIST_PRECISION_BITS;
} else {
tmp32 += sum;
tmp32 = tmp32 >> 1;
}
tmp32 = tmp32 - (1 << (offset_bits - conv_params->round_1)) -
(1 << (offset_bits - conv_params->round_1 - 1));
*dst16 =
clip_pixel_highbd(ROUND_POWER_OF_TWO(tmp32, round_bits), bd);
} else {
*p = sum;
}
} else {
uint16_t *p =
&pred[(i - p_row + k + 4) * p_stride + (j - p_col + l + 4)];
sum = ROUND_POWER_OF_TWO(sum, reduce_bits_vert);
assert(0 <= sum && sum < (1 << (bd + 2)));
*p = clip_pixel_highbd(sum - (1 << (bd - 1)) - (1 << bd), bd);
}
sy += gamma;
}
}
}
}
}
void highbd_warp_plane(WarpedMotionParams *wm, const uint16_t *const ref,
int width, int height, int stride, uint16_t *const pred,
int p_col, int p_row, int p_width, int p_height,
int p_stride, int subsampling_x, int subsampling_y,
int bd, ConvolveParams *conv_params) {
const int32_t *const mat = wm->wmmat;
const int16_t alpha = wm->alpha;
const int16_t beta = wm->beta;
const int16_t gamma = wm->gamma;
const int16_t delta = wm->delta;
av1_highbd_warp_affine(mat, ref, width, height, stride, pred, p_col, p_row,
p_width, p_height, p_stride, subsampling_x,
subsampling_y, bd, conv_params, alpha, beta, gamma,
delta);
}
#endif
void av1_warp_affine_c(const int32_t *mat, const uint8_t *ref, int width,
int height, int stride, uint8_t *pred, int p_col,
int p_row, int p_width, int p_height, int p_stride,
int subsampling_x, int subsampling_y,
ConvolveParams *conv_params, int16_t alpha, int16_t beta,
int16_t gamma, int16_t delta) { … }
void warp_plane(WarpedMotionParams *wm, const uint8_t *const ref, int width,
int height, int stride, uint8_t *pred, int p_col, int p_row,
int p_width, int p_height, int p_stride, int subsampling_x,
int subsampling_y, ConvolveParams *conv_params) { … }
void av1_warp_plane(WarpedMotionParams *wm, int use_hbd, int bd,
const uint8_t *ref, int width, int height, int stride,
uint8_t *pred, int p_col, int p_row, int p_width,
int p_height, int p_stride, int subsampling_x,
int subsampling_y, ConvolveParams *conv_params) { … }
#define LS_MV_MAX …
#define LS_STEP …
#define LS_MAT_RANGE_BITS …
#define LS_MAT_DOWN_BITS …
#define LS_MAT_BITS …
#define LS_MAT_MIN …
#define LS_MAT_MAX …
#define LS_SQUARE(a) …
#define LS_PRODUCT1(a, b) …
#define LS_PRODUCT2(a, b) …
#define USE_LIMITED_PREC_MULT …
#if USE_LIMITED_PREC_MULT
#define MUL_PREC_BITS …
static uint16_t resolve_multiplier_64(uint64_t D, int16_t *shift) {
int msb = 0;
uint16_t mult = 0;
*shift = 0;
if (D != 0) {
msb = (int16_t)((D >> 32) ? get_msb((unsigned int)(D >> 32)) + 32
: get_msb((unsigned int)D));
if (msb >= MUL_PREC_BITS) {
mult = (uint16_t)ROUND_POWER_OF_TWO_64(D, msb + 1 - MUL_PREC_BITS);
*shift = msb + 1 - MUL_PREC_BITS;
} else {
mult = (uint16_t)D;
*shift = 0;
}
}
return mult;
}
static int32_t get_mult_shift_ndiag(int64_t Px, int16_t iDet, int shift) {
int32_t ret;
int16_t mshift;
uint16_t Mul = resolve_multiplier_64(llabs(Px), &mshift);
int32_t v = (int32_t)Mul * (int32_t)iDet * (Px < 0 ? -1 : 1);
shift -= mshift;
if (shift > 0) {
return (int32_t)clamp(ROUND_POWER_OF_TWO_SIGNED(v, shift),
-WARPEDMODEL_NONDIAGAFFINE_CLAMP + 1,
WARPEDMODEL_NONDIAGAFFINE_CLAMP - 1);
} else {
return (int32_t)clamp(v * (1 << (-shift)),
-WARPEDMODEL_NONDIAGAFFINE_CLAMP + 1,
WARPEDMODEL_NONDIAGAFFINE_CLAMP - 1);
}
return ret;
}
static int32_t get_mult_shift_diag(int64_t Px, int16_t iDet, int shift) {
int16_t mshift;
uint16_t Mul = resolve_multiplier_64(llabs(Px), &mshift);
int32_t v = (int32_t)Mul * (int32_t)iDet * (Px < 0 ? -1 : 1);
shift -= mshift;
if (shift > 0) {
return (int32_t)clamp(
ROUND_POWER_OF_TWO_SIGNED(v, shift),
(1 << WARPEDMODEL_PREC_BITS) - WARPEDMODEL_NONDIAGAFFINE_CLAMP + 1,
(1 << WARPEDMODEL_PREC_BITS) + WARPEDMODEL_NONDIAGAFFINE_CLAMP - 1);
} else {
return (int32_t)clamp(
v * (1 << (-shift)),
(1 << WARPEDMODEL_PREC_BITS) - WARPEDMODEL_NONDIAGAFFINE_CLAMP + 1,
(1 << WARPEDMODEL_PREC_BITS) + WARPEDMODEL_NONDIAGAFFINE_CLAMP - 1);
}
}
#else
static int32_t get_mult_shift_ndiag(int64_t Px, int16_t iDet, int shift) { … }
static int32_t get_mult_shift_diag(int64_t Px, int16_t iDet, int shift) { … }
#endif
static int find_affine_int(int np, const int *pts1, const int *pts2,
BLOCK_SIZE bsize, int mvy, int mvx,
WarpedMotionParams *wm, int mi_row, int mi_col) { … }
int av1_find_projection(int np, const int *pts1, const int *pts2,
BLOCK_SIZE bsize, int mvy, int mvx,
WarpedMotionParams *wm_params, int mi_row, int mi_col) { … }