Commit 2bcf280e authored by David Barker's avatar David Barker Committed by Debargha Mukherjee
Browse files

Prepare for vectorizing highbd warp filter

This applies the same refactorings to highbd_warp_plane
which were applied to warp_plane a while ago, and lays the
groundwork for the relevant tests.

Change-Id: Ic4c00bce1accc5a3624bba0c3b4b325e69a42c1a
parent 417f58a6
......@@ -670,6 +670,10 @@ if ((aom_config("CONFIG_WARPED_MOTION") eq "yes") ||
(aom_config("CONFIG_GLOBAL_MOTION") eq "yes")) {
add_proto qw/void av1_warp_affine/, "int32_t *mat, uint8_t *ref, int width, int height, int stride, uint8_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int ref_frm, int32_t alpha, int32_t beta, int32_t gamma, int32_t delta";
specialize qw/av1_warp_affine sse2/;
if (aom_config("CONFIG_AOM_HIGHBITDEPTH") eq "yes") {
add_proto qw/void av1_highbd_warp_affine/, "int32_t *mat, uint16_t *ref, int width, int height, int stride, uint16_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int bd, int ref_frm, int32_t alpha, int32_t beta, int32_t gamma, int32_t delta";
}
}
# LOOP_RESTORATION functions
......
......@@ -618,6 +618,14 @@ static const uint16_t div_lut[DIV_LUT_NUM + 1] = {
8240, 8224, 8208, 8192,
};
static inline int16_t saturate_int16(int32_t v) {
if (v > 32767)
return 32767;
else if (v < -32768)
return -32768;
return v;
}
#if CONFIG_WARPED_MOTION
// Decomposes a divisor D such that 1/D = y/2^shift, where y is returned
// at precision of DIV_LUT_PREC_BITS along with the shift.
......@@ -848,101 +856,162 @@ static void highbd_warp_plane_old(WarpedMotionParams *wm, uint8_t *ref8,
// Note: For an explanation of the warp algorithm, see the comment
// above warp_plane()
static void highbd_warp_plane(WarpedMotionParams *wm, uint8_t *ref8, int width,
int height, int stride, uint8_t *pred8, int p_col,
//
// Note also: The "worst case" in terms of modulus of the data stored into 'tmp'
// (ie, the result of 'sum' in the horizontal filter) occurs when:
// coeffs = { -2, 8, -22, 87, 72, -21, 8, -2}, and
// ref = { 0, 255, 0, 255, 255, 0, 255, 0}
// Before rounding, this gives sum = 716625. After rounding,
// HORSHEAR_REDUCE_PREC_BITS = 4 => sum = 44789 > 2^15
// HORSHEAR_REDUCE_PREC_BITS = 5 => sum = 22395 < 2^15
//
// So, as long as HORSHEAR_REDUCE_PREC_BITS >= 5, we can safely use a 16-bit
// intermediate array.
void av1_highbd_warp_affine_c(int32_t *mat, uint16_t *ref, int width,
int height, int stride, uint16_t *pred, int p_col,
int p_row, int p_width, int p_height,
int p_stride, int subsampling_x,
int subsampling_y, int x_scale, int y_scale,
int bd, int ref_frm) {
if (wm->wmtype == ROTZOOM) {
wm->wmmat[5] = wm->wmmat[2];
wm->wmmat[4] = -wm->wmmat[3];
}
if (wm->wmtype == ROTZOOM || wm->wmtype == AFFINE) {
int32_t tmp[15 * 8];
int i, j, k, l, m;
int32_t *mat = wm->wmmat;
uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
int subsampling_y, int bd, int ref_frm,
int32_t alpha, int32_t beta, int32_t gamma,
int32_t delta) {
#if HORSHEAR_REDUCE_PREC_BITS >= 5
int16_t tmp[15 * 8];
#else
int32_t tmp[15 * 8];
#endif
int i, j, k, l, m;
const int32_t alpha = wm->alpha;
const int32_t beta = wm->beta;
const int32_t gamma = wm->gamma;
const int32_t delta = wm->delta;
/* Note: For this code to work, the left/right frame borders need to be
extended by at least 13 pixels each. By the time we get here, other
code will have set up this border, but we allow an explicit check
for debugging purposes.
*/
/*for (i = 0; i < height; ++i) {
for (j = 0; j < 13; ++j) {
assert(ref[i * stride - 13 + j] == ref[i * stride]);
assert(ref[i * stride + width + j] == ref[i * stride + (width - 1)]);
}
}*/
for (i = p_row; i < p_row + p_height; i += 8) {
for (j = p_col; j < p_col + p_width; j += 8) {
int32_t x4, y4, ix4, sx4, iy4, sy4;
if (subsampling_x)
x4 = ROUND_POWER_OF_TWO_SIGNED(
mat[2] * 2 * (j + 4) + mat[3] * 2 * (i + 4) + mat[0] +
(mat[2] + mat[3] - (1 << WARPEDMODEL_PREC_BITS)) / 2,
1);
else
x4 = mat[2] * (j + 4) + mat[3] * (i + 4) + mat[0];
if (subsampling_y)
y4 = ROUND_POWER_OF_TWO_SIGNED(
mat[4] * 2 * (j + 4) + mat[5] * 2 * (i + 4) + mat[1] +
(mat[4] + mat[5] - (1 << WARPEDMODEL_PREC_BITS)) / 2,
1);
else
y4 = mat[4] * (j + 4) + mat[5] * (i + 4) + mat[1];
ix4 = x4 >> WARPEDMODEL_PREC_BITS;
sx4 = x4 & ((1 << WARPEDMODEL_PREC_BITS) - 1);
iy4 = y4 >> WARPEDMODEL_PREC_BITS;
sy4 = y4 & ((1 << WARPEDMODEL_PREC_BITS) - 1);
// Horizontal filter
for (k = -7; k < 8; ++k) {
int iy = iy4 + k;
if (iy < 0)
iy = 0;
else if (iy > height - 1)
iy = height - 1;
for (i = p_row; i < p_row + p_height; i += 8) {
for (j = p_col; j < p_col + p_width; j += 8) {
int32_t x4, y4, ix4, sx4, iy4, sy4;
if (subsampling_x)
x4 = ROUND_POWER_OF_TWO_SIGNED(
mat[2] * 2 * (j + 4) + mat[3] * 2 * (i + 4) + mat[0] +
(mat[2] + mat[3] - (1 << WARPEDMODEL_PREC_BITS)) / 2,
1);
else
x4 = mat[2] * (j + 4) + mat[3] * (i + 4) + mat[0];
if (subsampling_y)
y4 = ROUND_POWER_OF_TWO_SIGNED(
mat[4] * 2 * (j + 4) + mat[5] * 2 * (i + 4) + mat[1] +
(mat[4] + mat[5] - (1 << WARPEDMODEL_PREC_BITS)) / 2,
1);
else
y4 = mat[4] * (j + 4) + mat[5] * (i + 4) + mat[1];
ix4 = x4 >> WARPEDMODEL_PREC_BITS;
sx4 = x4 & ((1 << WARPEDMODEL_PREC_BITS) - 1);
iy4 = y4 >> WARPEDMODEL_PREC_BITS;
sy4 = y4 & ((1 << WARPEDMODEL_PREC_BITS) - 1);
// Horizontal filter
for (k = -7; k < 8; ++k) {
int iy = iy4 + k;
if (iy < 0)
iy = 0;
else if (iy > height - 1)
iy = height - 1;
if (ix4 <= -7) {
for (l = 0; l < 8; ++l) {
tmp[(k + 7) * 8 + l] =
ref[iy * stride] *
(1 << (WARPEDPIXEL_FILTER_BITS - HORSHEAR_REDUCE_PREC_BITS));
}
} else if (ix4 >= width + 6) {
for (l = 0; l < 8; ++l) {
tmp[(k + 7) * 8 + l] =
ref[iy * stride + (width - 1)] *
(1 << (WARPEDPIXEL_FILTER_BITS - HORSHEAR_REDUCE_PREC_BITS));
}
} else {
int sx = sx4 + alpha * (-4) + beta * k;
for (l = -4; l < 4; ++l) {
int ix = ix4 + l;
int sx = ROUND_POWER_OF_TWO_SIGNED(sx4 + alpha * l + beta * k,
WARPEDDIFF_PREC_BITS);
const int16_t *coeffs = warped_filter[sx + WARPEDPIXEL_PREC_SHIFTS];
int ix = ix4 + l - 3;
const int offs = ROUND_POWER_OF_TWO(sx, WARPEDDIFF_PREC_BITS) +
WARPEDPIXEL_PREC_SHIFTS;
const int16_t *coeffs = warped_filter[offs];
int32_t sum = 0;
// assert(offs >= 0 && offs <= WARPEDPIXEL_PREC_SHIFTS * 3);
for (m = 0; m < 8; ++m) {
if (ix + m - 3 < 0)
sum += ref[iy * stride] * coeffs[m];
else if (ix + m - 3 > width - 1)
sum += ref[iy * stride + width - 1] * coeffs[m];
else
sum += ref[iy * stride + ix + m - 3] * coeffs[m];
sum += ref[iy * stride + ix + m] * coeffs[m];
}
sum = ROUND_POWER_OF_TWO(sum, HORSHEAR_REDUCE_PREC_BITS);
#if HORSHEAR_REDUCE_PREC_BITS >= 5
tmp[(k + 7) * 8 + (l + 4)] = saturate_int16(sum);
#else
tmp[(k + 7) * 8 + (l + 4)] = sum;
#endif
sx += alpha;
}
}
}
// Vertical filter
for (k = -4; k < AOMMIN(4, p_row + p_height - i - 4); ++k) {
for (l = -4; l < AOMMIN(4, p_col + p_width - j - 4); ++l) {
uint16_t *p =
&pred[(i - p_row + k + 4) * p_stride + (j - p_col + l + 4)];
int sy = ROUND_POWER_OF_TWO_SIGNED(sy4 + gamma * l + delta * k,
WARPEDDIFF_PREC_BITS);
const int16_t *coeffs = warped_filter[sy + WARPEDPIXEL_PREC_SHIFTS];
int32_t sum = 0;
for (m = 0; m < 8; ++m) {
sum += tmp[(k + m + 4) * 8 + (l + 4)] * coeffs[m];
}
sum = clip_pixel_highbd(
ROUND_POWER_OF_TWO_SIGNED(sum, VERSHEAR_REDUCE_PREC_BITS), bd);
if (ref_frm)
*p = ROUND_POWER_OF_TWO_SIGNED(*p + sum, 1);
else
*p = sum;
// Vertical filter
for (k = -4; k < AOMMIN(4, p_row + p_height - i - 4); ++k) {
int sy = sy4 + gamma * (-4) + delta * k;
for (l = -4; l < 4; ++l) {
uint16_t *p =
&pred[(i - p_row + k + 4) * p_stride + (j - p_col + l + 4)];
const int offs = ROUND_POWER_OF_TWO(sy, WARPEDDIFF_PREC_BITS) +
WARPEDPIXEL_PREC_SHIFTS;
const int16_t *coeffs = warped_filter[offs];
int32_t sum = 0;
// assert(offs >= 0 && offs <= WARPEDPIXEL_PREC_SHIFTS * 3);
for (m = 0; m < 8; ++m) {
sum += tmp[(k + m + 4) * 8 + (l + 4)] * coeffs[m];
}
sum = clip_pixel_highbd(
ROUND_POWER_OF_TWO(sum, VERSHEAR_REDUCE_PREC_BITS), bd);
if (ref_frm)
*p = ROUND_POWER_OF_TWO(*p + sum, 1);
else
*p = sum;
sy += gamma;
}
}
}
}
}
static void highbd_warp_plane(WarpedMotionParams *wm, uint8_t *ref8, int width,
int height, int stride, uint8_t *pred8, int p_col,
int p_row, int p_width, int p_height,
int p_stride, int subsampling_x,
int subsampling_y, int x_scale, int y_scale,
int bd, int ref_frm) {
if (wm->wmtype == ROTZOOM) {
wm->wmmat[5] = wm->wmmat[2];
wm->wmmat[4] = -wm->wmmat[3];
}
if ((wm->wmtype == ROTZOOM || wm->wmtype == AFFINE) && x_scale == 16 &&
y_scale == 16) {
int32_t *mat = wm->wmmat;
const int32_t alpha = wm->alpha;
const int32_t beta = wm->beta;
const int32_t gamma = wm->gamma;
const int32_t delta = wm->delta;
uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
av1_highbd_warp_affine(mat, ref, width, height, stride, pred, p_col, p_row,
p_width, p_height, p_stride, subsampling_x,
subsampling_y, bd, ref_frm, alpha, beta, gamma,
delta);
} else {
highbd_warp_plane_old(wm, ref8, width, height, stride, pred8, p_col, p_row,
p_width, p_height, p_stride, subsampling_x,
......@@ -1048,14 +1117,6 @@ static void warp_plane_old(WarpedMotionParams *wm, uint8_t *ref, int width,
TODO(david.barker): Maybe support scaled references?
*/
static inline int16_t saturate_int16(int32_t v) {
if (v > 32767)
return 32767;
else if (v < -32768)
return -32768;
return v;
}
void av1_warp_affine_c(int32_t *mat, uint8_t *ref, int width, int height,
int stride, uint8_t *pred, int p_col, int p_row,
int p_width, int p_height, int p_stride,
......
......@@ -16,6 +16,9 @@ using std::tr1::tuple;
using std::tr1::make_tuple;
using libaom_test::ACMRandom;
using libaom_test::AV1WarpFilter::AV1WarpFilterTest;
#if CONFIG_AOM_HIGHBITDEPTH
using libaom_test::AV1HighbdWarpFilter::AV1HighbdWarpFilterTest;
#endif
namespace {
......
......@@ -17,6 +17,10 @@ using std::vector;
using libaom_test::ACMRandom;
using libaom_test::AV1WarpFilter::AV1WarpFilterTest;
using libaom_test::AV1WarpFilter::WarpTestParam;
#if CONFIG_AOM_HIGHBITDEPTH
using libaom_test::AV1HighbdWarpFilter::AV1HighbdWarpFilterTest;
using libaom_test::AV1HighbdWarpFilter::HighbdWarpTestParam;
#endif
::testing::internal::ParamGenerator<WarpTestParam>
libaom_test::AV1WarpFilter::GetDefaultParams() {
......@@ -42,6 +46,7 @@ int32_t AV1WarpFilterTest::random_param(int bits) {
if ((rnd_.Rand8()) & 1) return -v;
return v;
}
void AV1WarpFilterTest::generate_model(int32_t *mat, int32_t *alpha,
int32_t *beta, int32_t *gamma,
int32_t *delta) {
......@@ -73,7 +78,7 @@ void AV1WarpFilterTest::generate_model(int32_t *mat, int32_t *alpha,
(1 << WARPEDMODEL_PREC_BITS);
if ((4 * abs(*alpha) + 7 * abs(*beta) > (1 << WARPEDMODEL_PREC_BITS)) ||
(4 * abs(*gamma) + 7 * abs(*delta) > (1 << WARPEDMODEL_PREC_BITS)))
(4 * abs(*gamma) + 4 * abs(*delta) > (1 << WARPEDMODEL_PREC_BITS)))
continue;
// We have a valid model, so finish
......@@ -103,7 +108,6 @@ void AV1WarpFilterTest::RunCheckOutput(warp_affine_func test_impl) {
memset(input + i * stride + w, input[i * stride + (w - 1)], border);
}
/* Try different sizes of prediction block */
for (i = 0; i < num_iters; ++i) {
for (sub_x = 0; sub_x < 2; ++sub_x)
for (sub_y = 0; sub_y < 2; ++sub_y) {
......@@ -121,3 +125,122 @@ void AV1WarpFilterTest::RunCheckOutput(warp_affine_func test_impl) {
}
}
}
#if CONFIG_AOM_HIGHBITDEPTH
::testing::internal::ParamGenerator<HighbdWarpTestParam>
libaom_test::AV1HighbdWarpFilter::GetDefaultParams() {
const HighbdWarpTestParam defaultParams[] = {
make_tuple(4, 4, 50000, 8), make_tuple(8, 8, 50000, 8),
make_tuple(64, 64, 1000, 8), make_tuple(4, 16, 20000, 8),
make_tuple(32, 8, 10000, 8), make_tuple(4, 4, 50000, 10),
make_tuple(8, 8, 50000, 10), make_tuple(64, 64, 1000, 10),
make_tuple(4, 16, 20000, 10), make_tuple(32, 8, 10000, 10),
make_tuple(4, 4, 50000, 12), make_tuple(8, 8, 50000, 12),
make_tuple(64, 64, 1000, 12), make_tuple(4, 16, 20000, 12),
make_tuple(32, 8, 10000, 12),
};
return ::testing::ValuesIn(defaultParams);
}
AV1HighbdWarpFilterTest::~AV1HighbdWarpFilterTest() {}
void AV1HighbdWarpFilterTest::SetUp() {
rnd_.Reset(ACMRandom::DeterministicSeed());
}
void AV1HighbdWarpFilterTest::TearDown() { libaom_test::ClearSystemState(); }
int32_t AV1HighbdWarpFilterTest::random_param(int bits) {
// 1 in 8 chance of generating zero (arbitrarily chosen)
if (((rnd_.Rand8()) & 7) == 0) return 0;
// Otherwise, enerate uniform values in the range
// [-(1 << bits), 1] U [1, 1<<bits]
int32_t v = 1 + (rnd_.Rand16() & ((1 << bits) - 1));
if ((rnd_.Rand8()) & 1) return -v;
return v;
}
void AV1HighbdWarpFilterTest::generate_model(int32_t *mat, int32_t *alpha,
int32_t *beta, int32_t *gamma,
int32_t *delta) {
while (1) {
mat[0] = random_param(WARPEDMODEL_PREC_BITS + 6);
mat[1] = random_param(WARPEDMODEL_PREC_BITS + 6);
mat[2] = (random_param(WARPEDMODEL_PREC_BITS - 3)) +
(1 << WARPEDMODEL_PREC_BITS);
mat[3] = random_param(WARPEDMODEL_PREC_BITS - 3);
// 50/50 chance of generating ROTZOOM vs. AFFINE models
if (rnd_.Rand8() & 1) {
// AFFINE
mat[4] = random_param(WARPEDMODEL_PREC_BITS - 3);
mat[5] = (random_param(WARPEDMODEL_PREC_BITS - 3)) +
(1 << WARPEDMODEL_PREC_BITS);
} else {
mat[4] = -mat[3];
mat[5] = mat[2];
}
// Calculate the derived parameters and check that they are suitable
// for the warp filter.
assert(mat[2] != 0);
*alpha = mat[2] - (1 << WARPEDMODEL_PREC_BITS);
*beta = mat[3];
*gamma = ((int64_t)mat[4] << WARPEDMODEL_PREC_BITS) / mat[2];
*delta = mat[5] - (((int64_t)mat[3] * mat[4] + (mat[2] / 2)) / mat[2]) -
(1 << WARPEDMODEL_PREC_BITS);
if ((4 * abs(*alpha) + 7 * abs(*beta) > (1 << WARPEDMODEL_PREC_BITS)) ||
(4 * abs(*gamma) + 4 * abs(*delta) > (1 << WARPEDMODEL_PREC_BITS)))
continue;
// We have a valid model, so finish
return;
}
}
void AV1HighbdWarpFilterTest::RunCheckOutput(
highbd_warp_affine_func test_impl) {
const int w = 128, h = 128;
const int border = 16;
const int stride = w + 2 * border;
const int out_w = GET_PARAM(0), out_h = GET_PARAM(1);
const int num_iters = GET_PARAM(2);
const int bd = GET_PARAM(3);
const int mask = (1 << bd) - 1;
int i, j, sub_x, sub_y;
uint16_t *input_ = new uint16_t[h * stride];
uint16_t *input = input_ + border;
uint16_t *output = new uint16_t[out_w * out_h];
uint16_t *output2 = new uint16_t[out_w * out_h];
int32_t mat[8], alpha, beta, gamma, delta;
// Generate an input block and extend its borders horizontally
for (i = 0; i < h; ++i)
for (j = 0; j < w; ++j) input[i * stride + j] = rnd_.Rand16() & mask;
for (i = 0; i < h; ++i) {
for (j = 0; j < border; ++j) {
input[i * stride - border + j] = input[i * stride];
input[i * stride + w + j] = input[i * stride + (w - 1)];
}
}
for (i = 0; i < num_iters; ++i) {
for (sub_x = 0; sub_x < 2; ++sub_x)
for (sub_y = 0; sub_y < 2; ++sub_y) {
generate_model(mat, &alpha, &beta, &gamma, &delta);
av1_highbd_warp_affine_c(mat, input, w, h, stride, output, 32, 32,
out_w, out_h, out_w, sub_x, sub_y, bd, 0,
alpha, beta, gamma, delta);
test_impl(mat, input, w, h, stride, output2, 32, 32, out_w, out_h,
out_w, sub_x, sub_y, bd, 0, alpha, beta, gamma, delta);
for (j = 0; j < out_w * out_h; ++j)
ASSERT_EQ(output[j], output2[j])
<< "Pixel mismatch at index " << j << " = (" << (j % out_w)
<< ", " << (j / out_w) << ") on iteration " << i;
}
}
}
#endif // CONFIG_AOM_HIGHBITDEPTH
......@@ -56,6 +56,39 @@ class AV1WarpFilterTest : public ::testing::TestWithParam<WarpTestParam> {
} // namespace AV1WarpFilter
#if CONFIG_AOM_HIGHBITDEPTH
namespace AV1HighbdWarpFilter {
typedef void (*highbd_warp_affine_func)(
int32_t *mat, uint16_t *ref, int width, int height, int stride,
uint16_t *pred, int p_col, int p_row, int p_width, int p_height,
int p_stride, int subsampling_x, int subsampling_y, int bd, int ref_frm,
int32_t alpha, int32_t beta, int32_t gamma, int32_t delta);
typedef std::tr1::tuple<int, int, int, int> HighbdWarpTestParam;
::testing::internal::ParamGenerator<HighbdWarpTestParam> GetDefaultParams();
class AV1HighbdWarpFilterTest
: public ::testing::TestWithParam<HighbdWarpTestParam> {
public:
virtual ~AV1HighbdWarpFilterTest();
virtual void SetUp();
virtual void TearDown();
protected:
int32_t random_param(int bits);
void generate_model(int32_t *mat, int32_t *alpha, int32_t *beta,
int32_t *gamma, int32_t *delta);
void RunCheckOutput(highbd_warp_affine_func test_impl);
libaom_test::ACMRandom rnd_;
};
} // namespace AV1HighbdWarpFilter
#endif // CONFIG_AOM_HIGHBITDEPTH
} // namespace libaom_test
#endif // TEST_WARP_FILTER_TEST_UTIL_H_
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment