Commit 8d8638a1 authored by Yue Chen's avatar Yue Chen

Use 3-tap spatial filter in FILTER_INTRA experiment

3-tap recursive intra prediction filters are added.
Macro USE_3TAP_INTRA_FILTER is set to 1 to use 3-tap by default.
Coding gain of FILTER_INTRA experiment in AWCY, high delay 150f
3-tap: 0.51%
4-tap: 0.68%

Change-Id: I44192dd08bfd8155f58a9b0b5cf1de88fceb762e
parent ae7c458a
......@@ -274,47 +274,47 @@ if (aom_config("CONFIG_NEW_QUANT") eq "yes") {
# FILTER_INTRA predictor functions
if (aom_config("CONFIG_FILTER_INTRA") eq "yes") {
add_proto qw/void av1_dc_filter_predictor/, "uint8_t *dst, ptrdiff_t stride, int bs, const uint8_t *above, const uint8_t *left";
specialize qw/av1_dc_filter_predictor sse4_1/;
specialize qw/av1_dc_filter_predictor/;
add_proto qw/void av1_v_filter_predictor/, "uint8_t *dst, ptrdiff_t stride, int bs, const uint8_t *above, const uint8_t *left";
specialize qw/av1_v_filter_predictor sse4_1/;
specialize qw/av1_v_filter_predictor/;
add_proto qw/void av1_h_filter_predictor/, "uint8_t *dst, ptrdiff_t stride, int bs, const uint8_t *above, const uint8_t *left";
specialize qw/av1_h_filter_predictor sse4_1/;
specialize qw/av1_h_filter_predictor/;
add_proto qw/void av1_d45_filter_predictor/, "uint8_t *dst, ptrdiff_t stride, int bs, const uint8_t *above, const uint8_t *left";
specialize qw/av1_d45_filter_predictor sse4_1/;
specialize qw/av1_d45_filter_predictor/;
add_proto qw/void av1_d135_filter_predictor/, "uint8_t *dst, ptrdiff_t stride, int bs, const uint8_t *above, const uint8_t *left";
specialize qw/av1_d135_filter_predictor sse4_1/;
specialize qw/av1_d135_filter_predictor/;
add_proto qw/void av1_d117_filter_predictor/, "uint8_t *dst, ptrdiff_t stride, int bs, const uint8_t *above, const uint8_t *left";
specialize qw/av1_d117_filter_predictor sse4_1/;
specialize qw/av1_d117_filter_predictor/;
add_proto qw/void av1_d153_filter_predictor/, "uint8_t *dst, ptrdiff_t stride, int bs, const uint8_t *above, const uint8_t *left";
specialize qw/av1_d153_filter_predictor sse4_1/;
specialize qw/av1_d153_filter_predictor/;
add_proto qw/void av1_d207_filter_predictor/, "uint8_t *dst, ptrdiff_t stride, int bs, const uint8_t *above, const uint8_t *left";
specialize qw/av1_d207_filter_predictor sse4_1/;
specialize qw/av1_d207_filter_predictor/;
add_proto qw/void av1_d63_filter_predictor/, "uint8_t *dst, ptrdiff_t stride, int bs, const uint8_t *above, const uint8_t *left";
specialize qw/av1_d63_filter_predictor sse4_1/;
specialize qw/av1_d63_filter_predictor/;
add_proto qw/void av1_tm_filter_predictor/, "uint8_t *dst, ptrdiff_t stride, int bs, const uint8_t *above, const uint8_t *left";
specialize qw/av1_tm_filter_predictor sse4_1/;
specialize qw/av1_tm_filter_predictor/;
# High bitdepth functions
if (aom_config("CONFIG_AOM_HIGHBITDEPTH") eq "yes") {
add_proto qw/void av1_highbd_dc_filter_predictor/, "uint16_t *dst, ptrdiff_t stride, int bs, const uint16_t *above, const uint16_t *left, int bd";
specialize qw/av1_highbd_dc_filter_predictor sse4_1/;
specialize qw/av1_highbd_dc_filter_predictor/;
add_proto qw/void av1_highbd_v_filter_predictor/, "uint16_t *dst, ptrdiff_t stride, int bs, const uint16_t *above, const uint16_t *left, int bd";
specialize qw/av1_highbd_v_filter_predictor sse4_1/;
specialize qw/av1_highbd_v_filter_predictor/;
add_proto qw/void av1_highbd_h_filter_predictor/, "uint16_t *dst, ptrdiff_t stride, int bs, const uint16_t *above, const uint16_t *left, int bd";
specialize qw/av1_highbd_h_filter_predictor sse4_1/;
specialize qw/av1_highbd_h_filter_predictor/;
add_proto qw/void av1_highbd_d45_filter_predictor/, "uint16_t *dst, ptrdiff_t stride, int bs, const uint16_t *above, const uint16_t *left, int bd";
specialize qw/av1_highbd_d45_filter_predictor sse4_1/;
specialize qw/av1_highbd_d45_filter_predictor/;
add_proto qw/void av1_highbd_d135_filter_predictor/, "uint16_t *dst, ptrdiff_t stride, int bs, const uint16_t *above, const uint16_t *left, int bd";
specialize qw/av1_highbd_d135_filter_predictor sse4_1/;
specialize qw/av1_highbd_d135_filter_predictor/;
add_proto qw/void av1_highbd_d117_filter_predictor/, "uint16_t *dst, ptrdiff_t stride, int bs, const uint16_t *above, const uint16_t *left, int bd";
specialize qw/av1_highbd_d117_filter_predictor sse4_1/;
specialize qw/av1_highbd_d117_filter_predictor/;
add_proto qw/void av1_highbd_d153_filter_predictor/, "uint16_t *dst, ptrdiff_t stride, int bs, const uint16_t *above, const uint16_t *left, int bd";
specialize qw/av1_highbd_d153_filter_predictor sse4_1/;
specialize qw/av1_highbd_d153_filter_predictor/;
add_proto qw/void av1_highbd_d207_filter_predictor/, "uint16_t *dst, ptrdiff_t stride, int bs, const uint16_t *above, const uint16_t *left, int bd";
specialize qw/av1_highbd_d207_filter_predictor sse4_1/;
specialize qw/av1_highbd_d207_filter_predictor/;
add_proto qw/void av1_highbd_d63_filter_predictor/, "uint16_t *dst, ptrdiff_t stride, int bs, const uint16_t *above, const uint16_t *left, int bd";
specialize qw/av1_highbd_d63_filter_predictor sse4_1/;
specialize qw/av1_highbd_d63_filter_predictor/;
add_proto qw/void av1_highbd_tm_filter_predictor/, "uint16_t *dst, ptrdiff_t stride, int bs, const uint16_t *above, const uint16_t *left, int bd";
specialize qw/av1_highbd_tm_filter_predictor sse4_1/;
specialize qw/av1_highbd_tm_filter_predictor/;
}
}
......
......@@ -256,6 +256,7 @@ typedef struct {
#endif // CONFIG_PALETTE
#if CONFIG_FILTER_INTRA
#define USE_3TAP_INTRA_FILTER 1 // 0: 4-tap; 1: 3-tap
typedef struct {
// 1: an ext intra mode is used; 0: otherwise.
uint8_t use_filter_intra_mode[PLANE_TYPES];
......
......@@ -1162,6 +1162,86 @@ static void highbd_dr_predictor(uint16_t *dst, ptrdiff_t stride, int bs,
#endif // CONFIG_EXT_INTRA
#if CONFIG_FILTER_INTRA
#if USE_3TAP_INTRA_FILTER
int av1_filter_intra_taps_3[TX_SIZES][INTRA_MODES][3] = {
#if CONFIG_CB4X4
{
{ 697, 836, -509 },
{ 993, 513, -482 },
{ 381, 984, -341 },
{ 642, 1169, -787 },
{ 590, 553, -119 },
{ 762, 385, -123 },
{ 358, 687, -21 },
{ 411, 1083, -470 },
{ 912, 814, -702 },
{ 883, 902, 761 },
},
#endif
{
{ 697, 836, -509 },
{ 993, 513, -482 },
{ 381, 984, -341 },
{ 642, 1169, -787 },
{ 590, 553, -119 },
{ 762, 385, -123 },
{ 358, 687, -21 },
{ 411, 1083, -470 },
{ 912, 814, -702 },
{ 883, 902, 761 },
},
{
{ 659, 816, -451 },
{ 980, 625, -581 },
{ 558, 962, -496 },
{ 681, 888, -545 },
{ 591, 613, 180 },
{ 778, 399, -153 },
{ 495, 641, -112 },
{ 671, 937, -584 },
{ 745, 940, -661 },
{ 839, 911, -726 },
},
{
{ 539, 927, -442 },
{ 1003, 714, -693 },
{ 349, 1271, -596 },
{ 820, 764, -560 },
{ 524, 816, -316 },
{ 780, 681, -437 },
{ 586, 795, -357 },
{ 551, 1135, -663 },
{ 593, 1061, -630 },
{ 974, 970, -920 },
},
{
{ 595, 919, -490 },
{ 945, 668, -579 },
{ 495, 962, -433 },
{ 385, 1551, -912 },
{ 455, 554, 15 },
{ 852, 478, -306 },
{ 177, 760, -87 },
{ -65, 1611, -522 },
{ 815, 894, -685 },
{ 846, 1010, -832 },
},
#if CONFIG_TX64X64
{
{ 595, 919, -490 },
{ 945, 668, -579 },
{ 495, 962, -433 },
{ 385, 1551, -912 },
{ 455, 554, 15 },
{ 852, 478, -306 },
{ 177, 760, -87 },
{ -65, 1611, -522 },
{ 815, 894, -685 },
{ 846, 1010, -832 },
},
#endif // CONFIG_TX64X64
};
#else
int av1_filter_intra_taps_4[TX_SIZES][INTRA_MODES][4] = {
#if CONFIG_CB4X4
{
......@@ -1240,6 +1320,7 @@ int av1_filter_intra_taps_4[TX_SIZES][INTRA_MODES][4] = {
},
#endif // CONFIG_TX64X64
};
#endif
static INLINE TX_SIZE get_txsize_from_blocklen(int bs) {
switch (bs) {
......@@ -1254,6 +1335,51 @@ static INLINE TX_SIZE get_txsize_from_blocklen(int bs) {
}
}
#if USE_3TAP_INTRA_FILTER
static void filter_intra_predictors_3tap(uint8_t *dst, ptrdiff_t stride, int bs,
const uint8_t *above,
const uint8_t *left, int mode) {
int k, r, c;
int mean, ipred;
#if CONFIG_TX64X64
int buffer[65][65];
#else
int buffer[33][33];
#endif // CONFIG_TX64X64
const TX_SIZE tx_size = get_txsize_from_blocklen(bs);
const int c0 = av1_filter_intra_taps_3[tx_size][mode][0];
const int c1 = av1_filter_intra_taps_3[tx_size][mode][1];
const int c2 = av1_filter_intra_taps_3[tx_size][mode][2];
k = 0;
mean = 0;
while (k < bs) {
mean = mean + (int)left[k];
mean = mean + (int)above[k];
k++;
}
mean = (mean + bs) / (2 * bs);
for (r = 0; r < bs; ++r) buffer[r + 1][0] = (int)left[r] - mean;
for (c = 0; c < bs + 1; ++c) buffer[0][c] = (int)above[c - 1] - mean;
for (r = 1; r < bs + 1; ++r)
for (c = 1; c < bs + 1; ++c) {
ipred = c0 * buffer[r - 1][c] + c1 * buffer[r][c - 1] +
c2 * buffer[r - 1][c - 1];
buffer[r][c] = ROUND_POWER_OF_TWO_SIGNED(ipred, FILTER_INTRA_PREC_BITS);
}
for (r = 0; r < bs; ++r) {
for (c = 0; c < bs; ++c) {
ipred = buffer[r + 1][c + 1] + mean;
dst[c] = clip_pixel(ipred);
}
dst += stride;
}
}
#else
static void filter_intra_predictors_4tap(uint8_t *dst, ptrdiff_t stride, int bs,
const uint8_t *above,
const uint8_t *left, int mode) {
......@@ -1298,55 +1424,96 @@ static void filter_intra_predictors_4tap(uint8_t *dst, ptrdiff_t stride, int bs,
dst += stride;
}
}
#endif
void av1_dc_filter_predictor_c(uint8_t *dst, ptrdiff_t stride, int bs,
const uint8_t *above, const uint8_t *left) {
#if USE_3TAP_INTRA_FILTER
filter_intra_predictors_3tap(dst, stride, bs, above, left, DC_PRED);
#else
filter_intra_predictors_4tap(dst, stride, bs, above, left, DC_PRED);
#endif
}
void av1_v_filter_predictor_c(uint8_t *dst, ptrdiff_t stride, int bs,
const uint8_t *above, const uint8_t *left) {
#if USE_3TAP_INTRA_FILTER
filter_intra_predictors_3tap(dst, stride, bs, above, left, V_PRED);
#else
filter_intra_predictors_4tap(dst, stride, bs, above, left, V_PRED);
#endif
}
void av1_h_filter_predictor_c(uint8_t *dst, ptrdiff_t stride, int bs,
const uint8_t *above, const uint8_t *left) {
#if USE_3TAP_INTRA_FILTER
filter_intra_predictors_3tap(dst, stride, bs, above, left, H_PRED);
#else
filter_intra_predictors_4tap(dst, stride, bs, above, left, H_PRED);
#endif
}
void av1_d45_filter_predictor_c(uint8_t *dst, ptrdiff_t stride, int bs,
const uint8_t *above, const uint8_t *left) {
#if USE_3TAP_INTRA_FILTER
filter_intra_predictors_3tap(dst, stride, bs, above, left, D45_PRED);
#else
filter_intra_predictors_4tap(dst, stride, bs, above, left, D45_PRED);
#endif
}
void av1_d135_filter_predictor_c(uint8_t *dst, ptrdiff_t stride, int bs,
const uint8_t *above, const uint8_t *left) {
#if USE_3TAP_INTRA_FILTER
filter_intra_predictors_3tap(dst, stride, bs, above, left, D135_PRED);
#else
filter_intra_predictors_4tap(dst, stride, bs, above, left, D135_PRED);
#endif
}
void av1_d117_filter_predictor_c(uint8_t *dst, ptrdiff_t stride, int bs,
const uint8_t *above, const uint8_t *left) {
#if USE_3TAP_INTRA_FILTER
filter_intra_predictors_3tap(dst, stride, bs, above, left, D117_PRED);
#else
filter_intra_predictors_4tap(dst, stride, bs, above, left, D117_PRED);
#endif
}
void av1_d153_filter_predictor_c(uint8_t *dst, ptrdiff_t stride, int bs,
const uint8_t *above, const uint8_t *left) {
#if USE_3TAP_INTRA_FILTER
filter_intra_predictors_3tap(dst, stride, bs, above, left, D153_PRED);
#else
filter_intra_predictors_4tap(dst, stride, bs, above, left, D153_PRED);
#endif
}
void av1_d207_filter_predictor_c(uint8_t *dst, ptrdiff_t stride, int bs,
const uint8_t *above, const uint8_t *left) {
#if USE_3TAP_INTRA_FILTER
filter_intra_predictors_3tap(dst, stride, bs, above, left, D207_PRED);
#else
filter_intra_predictors_4tap(dst, stride, bs, above, left, D207_PRED);
#endif
}
void av1_d63_filter_predictor_c(uint8_t *dst, ptrdiff_t stride, int bs,
const uint8_t *above, const uint8_t *left) {
#if USE_3TAP_INTRA_FILTER
filter_intra_predictors_3tap(dst, stride, bs, above, left, D63_PRED);
#else
filter_intra_predictors_4tap(dst, stride, bs, above, left, D63_PRED);
#endif
}
void av1_tm_filter_predictor_c(uint8_t *dst, ptrdiff_t stride, int bs,
const uint8_t *above, const uint8_t *left) {
#if USE_3TAP_INTRA_FILTER
filter_intra_predictors_3tap(dst, stride, bs, above, left, TM_PRED);
#else
filter_intra_predictors_4tap(dst, stride, bs, above, left, TM_PRED);
#endif
}
static void filter_intra_predictors(FILTER_INTRA_MODE mode, uint8_t *dst,
......@@ -1387,6 +1554,52 @@ static void filter_intra_predictors(FILTER_INTRA_MODE mode, uint8_t *dst,
}
}
#if CONFIG_AOM_HIGHBITDEPTH
#if USE_3TAP_INTRA_FILTER
static void highbd_filter_intra_predictors_3tap(uint16_t *dst, ptrdiff_t stride,
int bs, const uint16_t *above,
const uint16_t *left, int mode,
int bd) {
int k, r, c;
int mean, ipred;
#if CONFIG_TX64X64
int preds[65][65];
#else
int preds[33][33];
#endif // CONFIG_TX64X64
const TX_SIZE tx_size = get_txsize_from_blocklen(bs);
const int c0 = av1_filter_intra_taps_3[tx_size][mode][0];
const int c1 = av1_filter_intra_taps_3[tx_size][mode][1];
const int c2 = av1_filter_intra_taps_3[tx_size][mode][2];
k = 0;
mean = 0;
while (k < bs) {
mean = mean + (int)left[k];
mean = mean + (int)above[k];
k++;
}
mean = (mean + bs) / (2 * bs);
for (r = 0; r < bs; ++r) preds[r + 1][0] = (int)left[r] - mean;
for (c = 0; c < bs + 1; ++c) preds[0][c] = (int)above[c - 1] - mean;
for (r = 1; r < bs + 1; ++r)
for (c = 1; c < bs + 1; ++c) {
ipred = c0 * preds[r - 1][c] + c1 * preds[r][c - 1] +
c2 * preds[r - 1][c - 1];
preds[r][c] = ROUND_POWER_OF_TWO_SIGNED(ipred, FILTER_INTRA_PREC_BITS);
}
for (r = 0; r < bs; ++r) {
for (c = 0; c < bs; ++c) {
ipred = preds[r + 1][c + 1] + mean;
dst[c] = clip_pixel_highbd(ipred, bd);
}
dst += stride;
}
}
#else
static void highbd_filter_intra_predictors_4tap(uint16_t *dst, ptrdiff_t stride,
int bs, const uint16_t *above,
const uint16_t *left, int mode,
......@@ -1432,73 +1645,122 @@ static void highbd_filter_intra_predictors_4tap(uint16_t *dst, ptrdiff_t stride,
dst += stride;
}
}
#endif
void av1_highbd_dc_filter_predictor_c(uint16_t *dst, ptrdiff_t stride, int bs,
const uint16_t *above,
const uint16_t *left, int bd) {
#if USE_3TAP_INTRA_FILTER
highbd_filter_intra_predictors_3tap(dst, stride, bs, above, left, DC_PRED,
bd);
#else
highbd_filter_intra_predictors_4tap(dst, stride, bs, above, left, DC_PRED,
bd);
#endif
}
void av1_highbd_v_filter_predictor_c(uint16_t *dst, ptrdiff_t stride, int bs,
const uint16_t *above,
const uint16_t *left, int bd) {
#if USE_3TAP_INTRA_FILTER
highbd_filter_intra_predictors_3tap(dst, stride, bs, above, left, V_PRED, bd);
#else
highbd_filter_intra_predictors_4tap(dst, stride, bs, above, left, V_PRED, bd);
#endif
}
void av1_highbd_h_filter_predictor_c(uint16_t *dst, ptrdiff_t stride, int bs,
const uint16_t *above,
const uint16_t *left, int bd) {
#if USE_3TAP_INTRA_FILTER
highbd_filter_intra_predictors_3tap(dst, stride, bs, above, left, H_PRED, bd);
#else
highbd_filter_intra_predictors_4tap(dst, stride, bs, above, left, H_PRED, bd);
#endif
}
void av1_highbd_d45_filter_predictor_c(uint16_t *dst, ptrdiff_t stride, int bs,
const uint16_t *above,
const uint16_t *left, int bd) {
#if USE_3TAP_INTRA_FILTER
highbd_filter_intra_predictors_3tap(dst, stride, bs, above, left, D45_PRED,
bd);
#else
highbd_filter_intra_predictors_4tap(dst, stride, bs, above, left, D45_PRED,
bd);
#endif
}
void av1_highbd_d135_filter_predictor_c(uint16_t *dst, ptrdiff_t stride, int bs,
const uint16_t *above,
const uint16_t *left, int bd) {
#if USE_3TAP_INTRA_FILTER
highbd_filter_intra_predictors_3tap(dst, stride, bs, above, left, D135_PRED,
bd);
#else
highbd_filter_intra_predictors_4tap(dst, stride, bs, above, left, D135_PRED,
bd);
#endif
}
void av1_highbd_d117_filter_predictor_c(uint16_t *dst, ptrdiff_t stride, int bs,
const uint16_t *above,
const uint16_t *left, int bd) {
#if USE_3TAP_INTRA_FILTER
highbd_filter_intra_predictors_3tap(dst, stride, bs, above, left, D117_PRED,
bd);
#else
highbd_filter_intra_predictors_4tap(dst, stride, bs, above, left, D117_PRED,
bd);
#endif
}
void av1_highbd_d153_filter_predictor_c(uint16_t *dst, ptrdiff_t stride, int bs,
const uint16_t *above,
const uint16_t *left, int bd) {
#if USE_3TAP_INTRA_FILTER
highbd_filter_intra_predictors_3tap(dst, stride, bs, above, left, D153_PRED,
bd);
#else
highbd_filter_intra_predictors_4tap(dst, stride, bs, above, left, D153_PRED,
bd);
#endif
}
void av1_highbd_d207_filter_predictor_c(uint16_t *dst, ptrdiff_t stride, int bs,
const uint16_t *above,
const uint16_t *left, int bd) {
#if USE_3TAP_INTRA_FILTER
highbd_filter_intra_predictors_3tap(dst, stride, bs, above, left, D207_PRED,
bd);
#else
highbd_filter_intra_predictors_4tap(dst, stride, bs, above, left, D207_PRED,
bd);
#endif
}
void av1_highbd_d63_filter_predictor_c(uint16_t *dst, ptrdiff_t stride, int bs,
const uint16_t *above,
const uint16_t *left, int bd) {
#if USE_3TAP_INTRA_FILTER
highbd_filter_intra_predictors_3tap(dst, stride, bs, above, left, D63_PRED,
bd);
#else
highbd_filter_intra_predictors_4tap(dst, stride, bs, above, left, D63_PRED,
bd);
#endif
}
void av1_highbd_tm_filter_predictor_c(uint16_t *dst, ptrdiff_t stride, int bs,
const uint16_t *above,
const uint16_t *left, int bd) {
#if USE_3TAP_INTRA_FILTER
highbd_filter_intra_predictors_3tap(dst, stride, bs, above, left, TM_PRED,
bd);
#else
highbd_filter_intra_predictors_4tap(dst, stride, bs, above, left, TM_PRED,
bd);
#endif
}
static void highbd_filter_intra_predictors(FILTER_INTRA_MODE mode,
......
......@@ -172,9 +172,11 @@ LIBAOM_TEST_SRCS-$(HAVE_SSSE3) += masked_sad_test.cc
LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += av1_wedge_utils_test.cc
endif
ifeq ($(CONFIG_FILTER_INTRA),yes)
LIBAOM_TEST_SRCS-$(HAVE_SSE4_1) += filterintra_predictors_test.cc
endif
## Skip the unit test written for 4-tap filter intra predictor, because we
## revert to 3-tap filter.
## ifeq ($(CONFIG_FILTER_INTRA),yes)
## LIBAOM_TEST_SRCS-$(HAVE_SSE4_1) += filterintra_predictors_test.cc
## endif
ifeq ($(CONFIG_MOTION_VAR),yes)
LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += obmc_sad_test.cc
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment