Commit 11bac017 authored by Yue Chen's avatar Yue Chen
Browse files

Improve filter_intra throughput

The prediction can be done in 2x2 or 4x4 processing unit, within
which there is no dependency and the computation can be fully
parallelized.
Also turn < 8x8 filter_intra on, and disable it in > 32x32 txbs.

Change-Id: I4f8a3104019cbb35e88f342d97516f81b19152b0
parent 9c1f92ba
......@@ -198,7 +198,7 @@ static const PREDICTION_MODE fimode_to_intradir[FILTER_INTRA_MODES] = {
DC_PRED, V_PRED, H_PRED, D117_PRED, D153_PRED, DC_PRED
};
#define DISABLE_SUB8X8_FILTER_INTRA 1
#define DISABLE_SUB8X8_FILTER_INTRA 0
static INLINE int av1_filter_intra_allowed_bsize(BLOCK_SIZE bs) {
(void)bs;
......@@ -212,9 +212,10 @@ static INLINE int av1_filter_intra_allowed_bsize(BLOCK_SIZE bs) {
static INLINE int av1_filter_intra_allowed_txsize(TX_SIZE tx) {
(void)tx;
#if DISABLE_SUB8X8_FILTER_INTRA
return tx_size_wide[tx] >= 8 && tx_size_high[tx] >= 8;
return tx_size_wide[tx] >= 8 && tx_size_high[tx] >= 8 &&
tx_size_wide[tx] <= 32 && tx_size_high[tx] <= 32;
#else
return 1;
return tx_size_wide[tx] <= 32 && tx_size_high[tx] <= 32;
#endif
}
#endif // CONFIG_FILTER_INTRA
......
......@@ -1110,6 +1110,268 @@ static void highbd_dr_predictor(uint16_t *dst, ptrdiff_t stride,
#endif // CONFIG_EXT_INTRA
#if CONFIG_FILTER_INTRA
#if FILTER_INTRA_PROC_UNIT_SIZE == 2
static int filter_intra_taps_2x2procunit[FILTER_INTRA_MODES][4][5] = {
{
{ -3, 5, 0, 6, 0 },
{ -2, 1, 5, 4, 0 },
{ -2, 3, 0, 1, 6 },
{ -2, 1, 3, 1, 5 },
},
{
{ -5, 8, 0, 5, 0 },
{ -3, 0, 8, 3, 0 },
{ -5, 8, 0, 0, 5 },
{ -3, 0, 8, 0, 3 },
},
{
{ -4, 4, 0, 8, 0 },
{ -4, 0, 4, 8, 0 },
{ -2, 2, 0, 0, 8 },
{ -2, 0, 2, 0, 8 },
},
{
{ -1, 6, 0, 3, 0 },
{ 0, 1, 6, 1, 0 },
{ -1, 5, 0, 1, 3 },
{ 0, 2, 4, 1, 1 },
},
{
{ -1, 4, 0, 5, 0 },
{ -1, 2, 4, 3, 0 },
{ -1, 2, 0, 2, 5 },
{ -1, 2, 2, 2, 3 },
},
{
{ -6, 7, 0, 7, 0 },
{ -5, 0, 7, 6, 0 },
{ -5, 6, 0, 0, 7 },
{ -4, 0, 6, 0, 6 },
},
};
#elif FILTER_INTRA_PROC_UNIT_SIZE == 4
static int filter_intra_taps_4x4procunit[FILTER_INTRA_MODES][16][9] = {
#if FILTER_INTRA_SCALE_BITS == 4
{
{ -6, 10, 0, 0, 0, 12, 0, 0, 0 },
{ -5, 2, 10, 0, 0, 9, 0, 0, 0 },
{ -3, 1, 1, 10, 0, 7, 0, 0, 0 },
{ -3, 1, 1, 2, 10, 5, 0, 0, 0 },
{ -4, 6, 0, 0, 0, 2, 12, 0, 0 },
{ -3, 2, 6, 0, 0, 2, 9, 0, 0 },
{ -3, 2, 2, 6, 0, 2, 7, 0, 0 },
{ -2, 0, 2, 2, 6, 3, 5, 0, 0 },
{ -2, 4, 0, 0, 0, 1, 1, 12, 0 },
{ -3, 2, 4, 0, 0, 2, 2, 9, 0 },
{ -2, 0, 2, 4, 0, 2, 3, 7, 0 },
{ -2, 0, 0, 2, 4, 3, 3, 6, 0 },
{ -1, 2, 0, 0, 0, 1, 1, 1, 12 },
{ -2, 2, 3, 0, 0, 0, 2, 2, 9 },
{ -1, 0, 2, 3, 0, 0, 2, 3, 7 },
{ -1, 0, 0, 2, 3, 0, 3, 3, 6 },
},
{
{ -10, 16, 0, 0, 0, 10, 0, 0, 0 },
{ -6, 0, 16, 0, 0, 6, 0, 0, 0 },
{ -4, 0, 0, 16, 0, 4, 0, 0, 0 },
{ -2, 0, 0, 0, 16, 2, 0, 0, 0 },
{ -10, 16, 0, 0, 0, 0, 10, 0, 0 },
{ -6, 0, 16, 0, 0, 0, 6, 0, 0 },
{ -4, 0, 0, 16, 0, 0, 4, 0, 0 },
{ -2, 0, 0, 0, 16, 0, 2, 0, 0 },
{ -10, 16, 0, 0, 0, 0, 0, 10, 0 },
{ -6, 0, 16, 0, 0, 0, 0, 6, 0 },
{ -4, 0, 0, 16, 0, 0, 0, 4, 0 },
{ -2, 0, 0, 0, 16, 0, 0, 2, 0 },
{ -10, 16, 0, 0, 0, 0, 0, 0, 10 },
{ -6, 0, 16, 0, 0, 0, 0, 0, 6 },
{ -4, 0, 0, 16, 0, 0, 0, 0, 4 },
{ -2, 0, 0, 0, 16, 0, 0, 0, 2 },
},
{
{ -8, 8, 0, 0, 0, 16, 0, 0, 0 },
{ -8, 0, 8, 0, 0, 16, 0, 0, 0 },
{ -8, 0, 0, 8, 0, 16, 0, 0, 0 },
{ -8, 0, 0, 0, 8, 16, 0, 0, 0 },
{ -4, 4, 0, 0, 0, 0, 16, 0, 0 },
{ -4, 0, 4, 0, 0, 0, 16, 0, 0 },
{ -4, 0, 0, 4, 0, 0, 16, 0, 0 },
{ -4, 0, 0, 0, 4, 0, 16, 0, 0 },
{ -2, 2, 0, 0, 0, 0, 0, 16, 0 },
{ -2, 0, 2, 0, 0, 0, 0, 16, 0 },
{ -2, 0, 0, 2, 0, 0, 0, 16, 0 },
{ -2, 0, 0, 0, 2, 0, 0, 16, 0 },
{ -1, 1, 0, 0, 0, 0, 0, 0, 16 },
{ -1, 0, 1, 0, 0, 0, 0, 0, 16 },
{ -1, 0, 0, 1, 0, 0, 0, 0, 16 },
{ -1, 0, 0, 0, 1, 0, 0, 0, 16 },
},
{
{ -2, 12, 0, 0, 0, 6, 0, 0, 0 },
{ -1, 3, 12, 0, 0, 2, 0, 0, 0 },
{ 0, 1, 2, 12, 0, 1, 0, 0, 0 },
{ 0, 0, 1, 3, 12, 0, 0, 0, 0 },
{ -2, 9, 0, 0, 0, 3, 6, 0, 0 },
{ -1, 4, 9, 0, 0, 2, 2, 0, 0 },
{ -1, 2, 4, 9, 0, 1, 1, 0, 0 },
{ 0, 1, 2, 4, 9, 0, 0, 0, 0 },
{ -1, 7, 0, 0, 0, 2, 2, 6, 0 },
{ -1, 4, 7, 0, 0, 2, 2, 2, 0 },
{ 0, 2, 4, 7, 0, 1, 1, 1, 0 },
{ 0, 1, 2, 4, 7, 1, 1, 0, 0 },
{ -1, 5, 0, 0, 0, 1, 2, 3, 6 },
{ 0, 4, 5, 0, 0, 1, 2, 2, 2 },
{ 0, 3, 4, 5, 0, 2, 1, 1, 0 },
{ 0, 2, 3, 4, 5, 1, 1, 0, 0 },
},
{
{ -2, 8, 0, 0, 0, 10, 0, 0, 0 },
{ -1, 3, 8, 0, 0, 6, 0, 0, 0 },
{ -1, 2, 3, 8, 0, 4, 0, 0, 0 },
{ 0, 1, 2, 3, 8, 2, 0, 0, 0 },
{ -1, 4, 0, 0, 0, 3, 10, 0, 0 },
{ -1, 3, 4, 0, 0, 4, 6, 0, 0 },
{ -1, 2, 3, 4, 0, 4, 4, 0, 0 },
{ 0, 2, 2, 3, 4, 3, 2, 0, 0 },
{ -1, 2, 0, 0, 0, 2, 3, 10, 0 },
{ -1, 2, 2, 0, 0, 3, 4, 6, 0 },
{ 0, 2, 2, 2, 0, 3, 3, 4, 0 },
{ 0, 2, 3, 2, 0, 3, 3, 3, 0 },
{ 0, 1, 0, 0, 0, 1, 1, 3, 10 },
{ 0, 1, 1, 0, 0, 2, 2, 4, 6 },
{ 0, 2, 1, 0, 0, 2, 3, 4, 4 },
{ 0, 2, 2, 0, 0, 3, 3, 3, 3 },
},
{
{ -12, 14, 0, 0, 0, 14, 0, 0, 0 },
{ -10, 0, 14, 0, 0, 12, 0, 0, 0 },
{ -9, 0, 0, 14, 0, 11, 0, 0, 0 },
{ -8, 0, 0, 0, 14, 10, 0, 0, 0 },
{ -10, 12, 0, 0, 0, 0, 14, 0, 0 },
{ -9, 1, 12, 0, 0, 0, 12, 0, 0 },
{ -8, 0, 0, 12, 0, 1, 11, 0, 0 },
{ -7, 0, 0, 1, 12, 1, 9, 0, 0 },
{ -9, 11, 0, 0, 0, 0, 0, 14, 0 },
{ -8, 1, 11, 0, 0, 0, 0, 12, 0 },
{ -8, 0, 1, 11, 0, 0, 1, 11, 0 },
{ -7, 0, 0, 1, 11, 1, 1, 9, 0 },
{ -8, 10, 0, 0, 0, 0, 0, 0, 14 },
{ -7, 1, 9, 0, 0, 0, 0, 1, 12 },
{ -7, 1, 1, 9, 0, 0, 0, 1, 11 },
{ -6, 0, 1, 1, 10, 0, 0, 1, 9 },
},
#else
{
{ -3, 5, 0, 0, 0, 6, 0, 0, 0 },
{ -2, 1, 5, 0, 0, 4, 0, 0, 0 },
{ -2, 1, 1, 5, 0, 3, 0, 0, 0 },
{ -1, 0, 1, 1, 5, 2, 0, 0, 0 },
{ -2, 3, 0, 0, 0, 1, 6, 0, 0 },
{ -2, 1, 3, 0, 0, 1, 5, 0, 0 },
{ -1, 0, 1, 3, 0, 1, 4, 0, 0 },
{ -1, 0, 0, 1, 3, 2, 3, 0, 0 },
{ -1, 2, 0, 0, 0, 0, 1, 6, 0 },
{ -1, 1, 2, 0, 0, 0, 1, 5, 0 },
{ -1, 0, 0, 2, 0, 1, 2, 4, 0 },
{ -1, 0, 0, 1, 3, 0, 2, 3, 0 },
{ -1, 1, 0, 0, 0, 1, 0, 1, 6 },
{ -1, 1, 2, 0, 0, 0, 0, 1, 5 },
{ -1, 0, 1, 2, 0, 0, 0, 2, 4 },
{ 0, 0, 0, 1, 2, 0, 0, 2, 3 },
},
{
{ -5, 8, 0, 0, 0, 5, 0, 0, 0 },
{ -3, 0, 8, 0, 0, 3, 0, 0, 0 },
{ -2, 0, 0, 8, 0, 2, 0, 0, 0 },
{ -1, 0, 0, 0, 8, 1, 0, 0, 0 },
{ -5, 8, 0, 0, 0, 0, 5, 0, 0 },
{ -3, 0, 8, 0, 0, 0, 3, 0, 0 },
{ -2, 0, 0, 8, 0, 0, 2, 0, 0 },
{ -1, 0, 0, 0, 8, 0, 1, 0, 0 },
{ -5, 8, 0, 0, 0, 0, 0, 5, 0 },
{ -3, 0, 8, 0, 0, 0, 0, 3, 0 },
{ -2, 0, 0, 8, 0, 0, 0, 2, 0 },
{ -1, 0, 0, 0, 8, 0, 0, 1, 0 },
{ -5, 8, 0, 0, 0, 0, 0, 0, 5 },
{ -3, 0, 8, 0, 0, 0, 0, 0, 3 },
{ -2, 0, 0, 8, 0, 0, 0, 0, 2 },
{ -1, 0, 0, 0, 8, 0, 0, 0, 1 },
},
{
{ -4, 4, 0, 0, 0, 8, 0, 0, 0 },
{ -4, 0, 4, 0, 0, 8, 0, 0, 0 },
{ -4, 0, 0, 4, 0, 8, 0, 0, 0 },
{ -4, 0, 0, 0, 4, 8, 0, 0, 0 },
{ -2, 2, 0, 0, 0, 0, 8, 0, 0 },
{ -2, 0, 2, 0, 0, 0, 8, 0, 0 },
{ -2, 0, 0, 2, 0, 0, 8, 0, 0 },
{ -2, 0, 0, 0, 2, 0, 8, 0, 0 },
{ -1, 1, 0, 0, 0, 0, 0, 8, 0 },
{ -1, 0, 1, 0, 0, 0, 0, 8, 0 },
{ -1, 0, 0, 1, 0, 0, 0, 8, 0 },
{ -1, 0, 0, 0, 1, 0, 0, 8, 0 },
{ -1, 1, 0, 0, 0, 0, 0, 0, 8 },
{ -1, 0, 1, 0, 0, 0, 0, 0, 8 },
{ -1, 0, 0, 1, 0, 0, 0, 0, 8 },
{ -1, 0, 0, 0, 1, 0, 0, 0, 8 },
},
{
{ -1, 6, 0, 0, 0, 3, 0, 0, 0 },
{ 0, 1, 6, 0, 0, 1, 0, 0, 0 },
{ 0, 1, 1, 6, 0, 0, 0, 0, 0 },
{ 0, 0, 1, 1, 6, 0, 0, 0, 0 },
{ -1, 5, 0, 0, 0, 1, 3, 0, 0 },
{ 0, 2, 4, 0, 0, 1, 1, 0, 0 },
{ 0, 1, 2, 4, 0, 1, 0, 0, 0 },
{ 0, 0, 1, 2, 5, 0, 0, 0, 0 },
{ 0, 3, 0, 0, 0, 1, 1, 3, 0 },
{ 0, 2, 3, 0, 0, 1, 1, 1, 0 },
{ 0, 1, 2, 3, 0, 1, 1, 0, 0 },
{ 0, 1, 1, 2, 4, 0, 0, 0, 0 },
{ 0, 3, 0, 0, 0, 1, 0, 1, 3 },
{ 0, 2, 3, 0, 0, 1, 0, 1, 1 },
{ 0, 1, 2, 3, 0, 1, 1, 0, 0 },
{ 0, 1, 2, 2, 3, 0, 0, 0, 0 },
},
{
{ -1, 4, 0, 0, 0, 5, 0, 0, 0 },
{ -1, 2, 4, 0, 0, 3, 0, 0, 0 },
{ 0, 1, 1, 4, 0, 2, 0, 0, 0 },
{ 0, 1, 1, 1, 4, 1, 0, 0, 0 },
{ -1, 2, 0, 0, 0, 2, 5, 0, 0 },
{ -1, 2, 2, 0, 0, 2, 3, 0, 0 },
{ 0, 1, 1, 2, 0, 2, 2, 0, 0 },
{ 0, 1, 0, 2, 2, 2, 1, 0, 0 },
{ 0, 1, 0, 0, 0, 1, 1, 5, 0 },
{ 0, 1, 1, 0, 0, 1, 2, 3, 0 },
{ 0, 1, 1, 0, 0, 2, 2, 2, 0 },
{ 0, 1, 1, 0, 0, 2, 2, 2, 0 },
{ 0, 1, 0, 0, 0, 0, 0, 2, 5 },
{ 0, 1, 1, 0, 0, 1, 0, 2, 3 },
{ 0, 1, 1, 0, 0, 2, 0, 2, 2 },
{ 0, 1, 1, 0, 0, 2, 2, 2, 0 },
},
{
{ -6, 7, 0, 0, 0, 7, 0, 0, 0 },
{ -5, 0, 7, 0, 0, 6, 0, 0, 0 },
{ -4, 0, 0, 7, 0, 5, 0, 0, 0 },
{ -4, 0, 0, 0, 7, 5, 0, 0, 0 },
{ -5, 6, 0, 0, 0, 0, 7, 0, 0 },
{ -4, 0, 6, 0, 0, 0, 6, 0, 0 },
{ -4, 0, 0, 6, 0, 0, 6, 0, 0 },
{ -4, 0, 0, 0, 6, 1, 5, 0, 0 },
{ -4, 5, 0, 0, 0, 0, 0, 7, 0 },
{ -4, 0, 6, 0, 0, 0, 0, 6, 0 },
{ -4, 0, 0, 6, 0, 0, 0, 6, 0 },
{ -3, 0, 0, 0, 6, 0, 0, 5, 0 },
{ -4, 5, 0, 0, 0, 0, 0, 0, 7 },
{ -4, 1, 5, 0, 0, 0, 0, 0, 6 },
{ -3, 0, 0, 5, 0, 0, 0, 0, 6 },
{ -3, 0, 0, 1, 5, 0, 0, 0, 5 },
},
#endif
};
#else
static int filter_intra_taps_3[TX_SIZES_ALL][FILTER_INTRA_MODES][3] = {
{
{ 5, 7, -4 },
......@@ -1252,27 +1514,79 @@ static int filter_intra_taps_3[TX_SIZES_ALL][FILTER_INTRA_MODES][3] = {
{ 7, 7, -6 },
},
};
#endif
static void filter_intra_predictors_3tap(uint8_t *dst, ptrdiff_t stride,
TX_SIZE tx_size, const uint8_t *above,
const uint8_t *left, int mode) {
int r, c;
int ipred;
#if CONFIG_TX64X64
int buffer[65][65];
#else
int buffer[33][33];
#endif // CONFIG_TX64X64
const int c0 = filter_intra_taps_3[tx_size][mode][0];
const int c1 = filter_intra_taps_3[tx_size][mode][1];
const int c2 = filter_intra_taps_3[tx_size][mode][2];
const int bw = tx_size_wide[tx_size];
const int bh = tx_size_high[tx_size];
assert(bw <= 32 && bh <= 32);
for (r = 0; r < bh; ++r) buffer[r + 1][0] = (int)left[r];
for (c = 0; c < bw + 1; ++c) buffer[0][c] = (int)above[c - 1];
#if FILTER_INTRA_PROC_UNIT_SIZE == 2
for (r = 1; r < bh + 1; r += 2)
for (c = 1; c < bw + 1; c += 2) {
const int p0 = buffer[r - 1][c - 1];
const int p1 = buffer[r - 1][c];
const int p2 = buffer[r - 1][c + 1];
const int p3 = buffer[r][c - 1];
const int p4 = buffer[r + 1][c - 1];
for (int k = 0; k < 4; ++k) {
int r_offset = k >> 1;
int c_offset = k & 0x01;
buffer[r + r_offset][c + c_offset] =
filter_intra_taps_2x2procunit[mode][k][0] * p0 +
filter_intra_taps_2x2procunit[mode][k][1] * p1 +
filter_intra_taps_2x2procunit[mode][k][2] * p2 +
filter_intra_taps_2x2procunit[mode][k][3] * p3 +
filter_intra_taps_2x2procunit[mode][k][4] * p4;
buffer[r + r_offset][c + c_offset] =
clip_pixel(ROUND_POWER_OF_TWO_SIGNED(
buffer[r + r_offset][c + c_offset], FILTER_INTRA_SCALE_BITS));
}
}
#elif FILTER_INTRA_PROC_UNIT_SIZE == 4
for (r = 1; r < bh + 1; r += 4)
for (c = 1; c < bw + 1; c += 4) {
const int p0 = buffer[r - 1][c - 1];
const int p1 = buffer[r - 1][c];
const int p2 = buffer[r - 1][c + 1];
const int p3 = buffer[r - 1][c + 2];
const int p4 = buffer[r - 1][c + 3];
const int p5 = buffer[r][c - 1];
const int p6 = buffer[r + 1][c - 1];
const int p7 = buffer[r + 2][c - 1];
const int p8 = buffer[r + 3][c - 1];
for (int k = 0; k < 16; ++k) {
int r_offset = k >> 2;
int c_offset = k & 0x03;
buffer[r + r_offset][c + c_offset] =
filter_intra_taps_4x4procunit[mode][k][0] * p0 +
filter_intra_taps_4x4procunit[mode][k][1] * p1 +
filter_intra_taps_4x4procunit[mode][k][2] * p2 +
filter_intra_taps_4x4procunit[mode][k][3] * p3 +
filter_intra_taps_4x4procunit[mode][k][4] * p4 +
filter_intra_taps_4x4procunit[mode][k][5] * p5 +
filter_intra_taps_4x4procunit[mode][k][6] * p6 +
filter_intra_taps_4x4procunit[mode][k][7] * p7 +
filter_intra_taps_4x4procunit[mode][k][8] * p8;
buffer[r + r_offset][c + c_offset] =
clip_pixel(ROUND_POWER_OF_TWO_SIGNED(
buffer[r + r_offset][c + c_offset], FILTER_INTRA_SCALE_BITS));
}
}
#else
int ipred;
const int c0 = filter_intra_taps_3[tx_size][mode][0];
const int c1 = filter_intra_taps_3[tx_size][mode][1];
const int c2 = filter_intra_taps_3[tx_size][mode][2];
for (r = 1; r < bh + 1; ++r)
for (c = 1; c < bw + 1; ++c) {
ipred = c0 * buffer[r - 1][c] + c1 * buffer[r][c - 1] +
......@@ -1280,10 +1594,10 @@ static void filter_intra_predictors_3tap(uint8_t *dst, ptrdiff_t stride,
buffer[r][c] = ROUND_POWER_OF_TWO_SIGNED(ipred, FILTER_INTRA_SCALE_BITS);
buffer[r][c] = clip_pixel(buffer[r][c]);
}
#endif
for (r = 0; r < bh; ++r) {
for (c = 0; c < bw; ++c) {
dst[c] = clip_pixel(buffer[r + 1][c + 1]);
dst[c] = buffer[r + 1][c + 1];
}
dst += stride;
}
......@@ -1360,33 +1674,87 @@ static void highbd_filter_intra_predictors_3tap(uint16_t *dst, ptrdiff_t stride,
const uint16_t *left, int mode,
int bd) {
int r, c;
int ipred;
#if CONFIG_TX64X64
int preds[65][65];
#else
int preds[33][33];
#endif // CONFIG_TX64X64
const int c0 = filter_intra_taps_3[tx_size][mode][0];
const int c1 = filter_intra_taps_3[tx_size][mode][1];
const int c2 = filter_intra_taps_3[tx_size][mode][2];
int buffer[33][33];
const int bw = tx_size_wide[tx_size];
const int bh = tx_size_high[tx_size];
for (r = 0; r < bh; ++r) preds[r + 1][0] = (int)left[r];
assert(bw <= 32 && bh <= 32);
for (c = 0; c < bw + 1; ++c) preds[0][c] = (int)above[c - 1];
for (r = 0; r < bh; ++r) buffer[r + 1][0] = (int)left[r];
for (c = 0; c < bw + 1; ++c) buffer[0][c] = (int)above[c - 1];
#if FILTER_INTRA_PROC_UNIT_SIZE == 2
for (r = 1; r < bh + 1; r += 2)
for (c = 1; c < bw + 1; c += 2) {
const int p0 = buffer[r - 1][c - 1];
const int p1 = buffer[r - 1][c];
const int p2 = buffer[r - 1][c + 1];
const int p3 = buffer[r][c - 1];
const int p4 = buffer[r + 1][c - 1];
for (int k = 0; k < 4; ++k) {
int r_offset = k >> 1;
int c_offset = k & 0x01;
buffer[r + r_offset][c + c_offset] =
filter_intra_taps_2x2procunit[mode][k][0] * p0 +
filter_intra_taps_2x2procunit[mode][k][1] * p1 +
filter_intra_taps_2x2procunit[mode][k][2] * p2 +
filter_intra_taps_2x2procunit[mode][k][3] * p3 +
filter_intra_taps_2x2procunit[mode][k][4] * p4;
buffer[r + r_offset][c + c_offset] = clip_pixel_highbd(
ROUND_POWER_OF_TWO_SIGNED(buffer[r + r_offset][c + c_offset],
FILTER_INTRA_SCALE_BITS),
bd);
}
}
#elif FILTER_INTRA_PROC_UNIT_SIZE == 4
for (r = 1; r < bh + 1; r += 4)
for (c = 1; c < bw + 1; c += 4) {
const int p0 = buffer[r - 1][c - 1];
const int p1 = buffer[r - 1][c];
const int p2 = buffer[r - 1][c + 1];
const int p3 = buffer[r - 1][c + 2];
const int p4 = buffer[r - 1][c + 3];
const int p5 = buffer[r][c - 1];
const int p6 = buffer[r + 1][c - 1];
const int p7 = buffer[r + 2][c - 1];
const int p8 = buffer[r + 3][c - 1];
for (int k = 0; k < 16; ++k) {
int r_offset = k >> 2;
int c_offset = k & 0x03;
buffer[r + r_offset][c + c_offset] =
filter_intra_taps_4x4procunit[mode][k][0] * p0 +
filter_intra_taps_4x4procunit[mode][k][1] * p1 +
filter_intra_taps_4x4procunit[mode][k][2] * p2 +
filter_intra_taps_4x4procunit[mode][k][3] * p3 +
filter_intra_taps_4x4procunit[mode][k][4] * p4 +
filter_intra_taps_4x4procunit[mode][k][5] * p5 +
filter_intra_taps_4x4procunit[mode][k][6] * p6 +
filter_intra_taps_4x4procunit[mode][k][7] * p7 +
filter_intra_taps_4x4procunit[mode][k][8] * p8;
buffer[r + r_offset][c + c_offset] = clip_pixel_highbd(
ROUND_POWER_OF_TWO_SIGNED(buffer[r + r_offset][c + c_offset],
FILTER_INTRA_SCALE_BITS),
bd);
}
}
#else
int ipred;
const int c0 = filter_intra_taps_3[tx_size][mode][0];
const int c1 = filter_intra_taps_3[tx_size][mode][1];
const int c2 = filter_intra_taps_3[tx_size][mode][2];
for (r = 1; r < bh + 1; ++r)
for (c = 1; c < bw + 1; ++c) {
ipred = c0 * preds[r - 1][c] + c1 * preds[r][c - 1] +
c2 * preds[r - 1][c - 1];
preds[r][c] = ROUND_POWER_OF_TWO_SIGNED(ipred, FILTER_INTRA_SCALE_BITS);
preds[r][c] = clip_pixel_highbd(preds[r][c], bd);
ipred = c0 * buffer[r - 1][c] + c1 * buffer[r][c - 1] +
c2 * buffer[r - 1][c - 1];
buffer[r][c] = ROUND_POWER_OF_TWO_SIGNED(ipred, FILTER_INTRA_SCALE_BITS);
buffer[r][c] = clip_pixel_highbd(buffer[r][c], bd);
}
#endif
for (r = 0; r < bh; ++r) {
for (c = 0; c < bw; ++c) {
dst[c] = clip_pixel_highbd(preds[r + 1][c + 1], bd);
dst[c] = buffer[r + 1][c + 1];
}
dst += stride;
}
......
......@@ -42,7 +42,12 @@ static const INTERINTRA_MODE intra_to_interintra_mode[INTRA_MODES] = {
};
#if CONFIG_FILTER_INTRA
#define FILTER_INTRA_PROC_UNIT_SIZE 2
#if FILTER_INTRA_PROC_UNIT_SIZE == 4
#define FILTER_INTRA_SCALE_BITS 4
#else
#define FILTER_INTRA_SCALE_BITS 3
#endif
#endif // CONFIG_FILTER_INTRA
#define CONFIG_INTRA_EDGE_UPSAMPLE CONFIG_INTRA_EDGE
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment