Commit 266db85d authored by Sean Purser-Haskell's avatar Sean Purser-Haskell Committed by Sean Purser-haskell
Browse files

Limit to 192 filters for warp, clamp index since in some cases index 192

is accessed.

Change-Id: I3d65123893663cc7d303056e46934aec153bc35b
parent 239f06b3
......@@ -498,7 +498,7 @@ static uint8_t warp_interpolate(uint8_t *ref, int x, int y, int width,
// [-1, 2) * WARPEDPIXEL_PREC_SHIFTS.
// We need an extra 2 taps to fit this in, for a total of 8 taps.
/* clang-format off */
const int16_t warped_filter[WARPEDPIXEL_PREC_SHIFTS * 3 + 1][8] = {
const int16_t warped_filter_taps[WARPEDPIXEL_PREC_SHIFTS * 3][8] = {
#if WARPEDPIXEL_PREC_BITS == 6
// [-1, 0)
{ 0, 0, 127, 1, 0, 0, 0, 0 }, { 0, - 1, 127, 2, 0, 0, 0, 0 },
......@@ -656,11 +656,12 @@ const int16_t warped_filter[WARPEDPIXEL_PREC_SHIFTS * 3 + 1][8] = {
{0, 0, 1, -3, 8, 126, -5, 1}, {0, 0, 0, -1, 4, 127, -3, 1},
#endif // WARPEDPIXEL_PREC_BITS == 6
// dummy
{ 0, 0, 0, 0, 1, 127, 0, 0 },
};
const int16_t *av1_get_warped_filter(int offs) {
return warped_filter_taps[(offs >= 192) ? 191 : offs];
}
/* clang-format on */
#define DIV_LUT_PREC_BITS 14
......@@ -1023,7 +1024,7 @@ void av1_highbd_warp_affine_c(int32_t *mat, uint16_t *ref, int width,
int ix = ix4 + l - 3;
const int offs = ROUND_POWER_OF_TWO(sx, WARPEDDIFF_PREC_BITS) +
WARPEDPIXEL_PREC_SHIFTS;
const int16_t *coeffs = warped_filter[offs];
const int16_t *coeffs = av1_get_warped_filter(offs);
int32_t sum = 0;
// assert(offs >= 0 && offs <= WARPEDPIXEL_PREC_SHIFTS * 3);
for (m = 0; m < 8; ++m) {
......@@ -1048,7 +1049,7 @@ void av1_highbd_warp_affine_c(int32_t *mat, uint16_t *ref, int width,
&pred[(i - p_row + k + 4) * p_stride + (j - p_col + l + 4)];
const int offs = ROUND_POWER_OF_TWO(sy, WARPEDDIFF_PREC_BITS) +
WARPEDPIXEL_PREC_SHIFTS;
const int16_t *coeffs = warped_filter[offs];
const int16_t *coeffs = av1_get_warped_filter(offs);
int32_t sum = 0;
// assert(offs >= 0 && offs <= WARPEDPIXEL_PREC_SHIFTS * 3);
for (m = 0; m < 8; ++m) {
......@@ -1284,7 +1285,7 @@ void av1_warp_affine_c(int32_t *mat, uint8_t *ref, int width, int height,
// At this point, sx = sx4 + alpha * l + beta * k
const int offs = ROUND_POWER_OF_TWO(sx, WARPEDDIFF_PREC_BITS) +
WARPEDPIXEL_PREC_SHIFTS;
const int16_t *coeffs = warped_filter[offs];
const int16_t *coeffs = av1_get_warped_filter(offs);
int32_t sum = 0;
// assert(offs >= 0 && offs <= WARPEDPIXEL_PREC_SHIFTS * 3);
for (m = 0; m < 8; ++m) {
......@@ -1306,7 +1307,7 @@ void av1_warp_affine_c(int32_t *mat, uint8_t *ref, int width, int height,
// At this point, sy = sy4 + gamma * l + delta * k
const int offs = ROUND_POWER_OF_TWO(sy, WARPEDDIFF_PREC_BITS) +
WARPEDPIXEL_PREC_SHIFTS;
const int16_t *coeffs = warped_filter[offs];
const int16_t *coeffs = av1_get_warped_filter(offs);
int32_t sum = 0;
// assert(offs >= 0 && offs <= WARPEDPIXEL_PREC_SHIFTS * 3);
for (m = 0; m < 8; ++m) {
......
......@@ -33,7 +33,7 @@
#define DEFAULT_WMTYPE AFFINE
#endif // CONFIG_WARPED_MOTION
const int16_t warped_filter[WARPEDPIXEL_PREC_SHIFTS * 3 + 1][8];
const int16_t *av1_get_warped_filter(int offs);
typedef void (*ProjectPointsFunc)(int32_t *mat, int *points, int *proj,
const int n, const int stride_points,
......
......@@ -14,8 +14,6 @@
#include "./av1_rtcd.h"
#include "av1/common/warped_motion.h"
static const __m128i *const filter = (const __m128i *const)warped_filter;
/* SSE2 version of the rotzoom/affine warp filter */
void av1_warp_affine_sse2(int32_t *mat, uint8_t *ref, int width, int height,
int stride, uint8_t *pred, int p_col, int p_row,
......@@ -98,10 +96,14 @@ void av1_warp_affine_sse2(int32_t *mat, uint8_t *ref, int width, int height,
_mm_loadu_si128((__m128i *)(ref + iy * stride + ix4 - 7));
// Filter even-index pixels
__m128i tmp_0 = filter[(sx + 0 * alpha) >> WARPEDDIFF_PREC_BITS];
__m128i tmp_2 = filter[(sx + 2 * alpha) >> WARPEDDIFF_PREC_BITS];
__m128i tmp_4 = filter[(sx + 4 * alpha) >> WARPEDDIFF_PREC_BITS];
__m128i tmp_6 = filter[(sx + 6 * alpha) >> WARPEDDIFF_PREC_BITS];
__m128i tmp_0 = *(__m128i const *)av1_get_warped_filter(
(sx + 0 * alpha) >> WARPEDDIFF_PREC_BITS);
__m128i tmp_2 = *(__m128i const *)av1_get_warped_filter(
(sx + 2 * alpha) >> WARPEDDIFF_PREC_BITS);
__m128i tmp_4 = *(__m128i const *)av1_get_warped_filter(
(sx + 4 * alpha) >> WARPEDDIFF_PREC_BITS);
__m128i tmp_6 = *(__m128i const *)av1_get_warped_filter(
(sx + 6 * alpha) >> WARPEDDIFF_PREC_BITS);
// coeffs 0 1 0 1 2 3 2 3 for pixels 0, 2
__m128i tmp_8 = _mm_unpacklo_epi32(tmp_0, tmp_2);
......@@ -140,10 +142,14 @@ void av1_warp_affine_sse2(int32_t *mat, uint8_t *ref, int width, int height,
HORSHEAR_REDUCE_PREC_BITS);
// Filter odd-index pixels
__m128i tmp_1 = filter[(sx + 1 * alpha) >> WARPEDDIFF_PREC_BITS];
__m128i tmp_3 = filter[(sx + 3 * alpha) >> WARPEDDIFF_PREC_BITS];
__m128i tmp_5 = filter[(sx + 5 * alpha) >> WARPEDDIFF_PREC_BITS];
__m128i tmp_7 = filter[(sx + 7 * alpha) >> WARPEDDIFF_PREC_BITS];
__m128i tmp_1 = *(__m128i const *)av1_get_warped_filter(
(sx + 1 * alpha) >> WARPEDDIFF_PREC_BITS);
__m128i tmp_3 = *(__m128i const *)av1_get_warped_filter(
(sx + 3 * alpha) >> WARPEDDIFF_PREC_BITS);
__m128i tmp_5 = *(__m128i const *)av1_get_warped_filter(
(sx + 5 * alpha) >> WARPEDDIFF_PREC_BITS);
__m128i tmp_7 = *(__m128i const *)av1_get_warped_filter(
(sx + 7 * alpha) >> WARPEDDIFF_PREC_BITS);
__m128i tmp_9 = _mm_unpacklo_epi32(tmp_1, tmp_3);
__m128i tmp_11 = _mm_unpacklo_epi32(tmp_5, tmp_7);
......@@ -191,10 +197,14 @@ void av1_warp_affine_sse2(int32_t *mat, uint8_t *ref, int width, int height,
__m128i src_6 = _mm_unpacklo_epi16(src[6], src[7]);
// Filter even-index pixels
__m128i tmp_0 = filter[(sy + 0 * gamma) >> WARPEDDIFF_PREC_BITS];
__m128i tmp_2 = filter[(sy + 2 * gamma) >> WARPEDDIFF_PREC_BITS];
__m128i tmp_4 = filter[(sy + 4 * gamma) >> WARPEDDIFF_PREC_BITS];
__m128i tmp_6 = filter[(sy + 6 * gamma) >> WARPEDDIFF_PREC_BITS];
__m128i tmp_0 = *(__m128i const *)av1_get_warped_filter(
(sy + 0 * gamma) >> WARPEDDIFF_PREC_BITS);
__m128i tmp_2 = *(__m128i const *)av1_get_warped_filter(
(sy + 2 * gamma) >> WARPEDDIFF_PREC_BITS);
__m128i tmp_4 = *(__m128i const *)av1_get_warped_filter(
(sy + 4 * gamma) >> WARPEDDIFF_PREC_BITS);
__m128i tmp_6 = *(__m128i const *)av1_get_warped_filter(
(sy + 6 * gamma) >> WARPEDDIFF_PREC_BITS);
__m128i tmp_8 = _mm_unpacklo_epi32(tmp_0, tmp_2);
__m128i tmp_10 = _mm_unpacklo_epi32(tmp_4, tmp_6);
......@@ -220,10 +230,14 @@ void av1_warp_affine_sse2(int32_t *mat, uint8_t *ref, int width, int height,
__m128i src_5 = _mm_unpackhi_epi16(src[4], src[5]);
__m128i src_7 = _mm_unpackhi_epi16(src[6], src[7]);
__m128i tmp_1 = filter[(sy + 1 * gamma) >> WARPEDDIFF_PREC_BITS];
__m128i tmp_3 = filter[(sy + 3 * gamma) >> WARPEDDIFF_PREC_BITS];
__m128i tmp_5 = filter[(sy + 5 * gamma) >> WARPEDDIFF_PREC_BITS];
__m128i tmp_7 = filter[(sy + 7 * gamma) >> WARPEDDIFF_PREC_BITS];
__m128i tmp_1 = *(__m128i const *)av1_get_warped_filter(
(sy + 1 * gamma) >> WARPEDDIFF_PREC_BITS);
__m128i tmp_3 = *(__m128i const *)av1_get_warped_filter(
(sy + 3 * gamma) >> WARPEDDIFF_PREC_BITS);
__m128i tmp_5 = *(__m128i const *)av1_get_warped_filter(
(sy + 5 * gamma) >> WARPEDDIFF_PREC_BITS);
__m128i tmp_7 = *(__m128i const *)av1_get_warped_filter(
(sy + 7 * gamma) >> WARPEDDIFF_PREC_BITS);
__m128i tmp_9 = _mm_unpacklo_epi32(tmp_1, tmp_3);
__m128i tmp_11 = _mm_unpacklo_epi32(tmp_5, tmp_7);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment