diff --git a/vp10/common/blockd.h b/vp10/common/blockd.h index a1b5683781ddf87a2f31cf3ca204b6492c9be688..d5139f7d1622e1e2e44c632edfbbefed6cf755b1 100644 --- a/vp10/common/blockd.h +++ b/vp10/common/blockd.h @@ -45,6 +45,7 @@ typedef enum { #endif // CONFIG_EXT_INTERP && SUPPORT_NONINTERPOLATING_FILTERS #define MAXTXLEN 32 +#define CU_SIZE 64 static INLINE int is_inter_mode(PREDICTION_MODE mode) { #if CONFIG_EXT_INTER @@ -55,6 +56,23 @@ static INLINE int is_inter_mode(PREDICTION_MODE mode) { } #if CONFIG_EXT_INTER +#define WEDGE_BITS_SML 3 +#define WEDGE_BITS_MED 4 +#define WEDGE_BITS_BIG 5 +#define WEDGE_NONE -1 +#define WEDGE_WEIGHT_BITS 6 + +static INLINE int get_wedge_bits(BLOCK_SIZE sb_type) { + if (sb_type < BLOCK_8X8) + return 0; + if (sb_type <= BLOCK_8X8) + return WEDGE_BITS_SML; + else if (sb_type <= BLOCK_32X32) + return WEDGE_BITS_MED; + else + return WEDGE_BITS_BIG; +} + static INLINE int is_inter_singleref_mode(PREDICTION_MODE mode) { return mode >= NEARESTMV && mode <= NEWFROMNEARMV; } @@ -69,6 +87,11 @@ static INLINE int have_newmv_in_inter_mode(PREDICTION_MODE mode) { mode == NEAREST_NEWMV || mode == NEW_NEARESTMV || mode == NEAR_NEWMV || mode == NEW_NEARMV); } +#else + +static INLINE int have_newmv_in_inter_mode(PREDICTION_MODE mode) { + return (mode == NEWMV); +} #endif // CONFIG_EXT_INTER /* For keyframes, intra block modes are predicted by the (already decoded) @@ -172,6 +195,12 @@ typedef struct { #if CONFIG_EXT_INTER PREDICTION_MODE interintra_mode; PREDICTION_MODE interintra_uv_mode; + // TODO(debargha): Consolidate these flags + int use_wedge_interintra; + int interintra_wedge_index; + int interintra_uv_wedge_index; + int use_wedge_interinter; + int interinter_wedge_index; #endif // CONFIG_EXT_INTER #if CONFIG_OBMC @@ -203,12 +232,6 @@ static INLINE int has_second_ref(const MB_MODE_INFO *mbmi) { return mbmi->ref_frame[1] > INTRA_FRAME; } -#if CONFIG_OBMC -static INLINE int is_obmc_allowed(const MB_MODE_INFO *mbmi) { - return (mbmi->sb_type >= BLOCK_8X8); -} -#endif // CONFIG_OBMC - PREDICTION_MODE vp10_left_block_mode(const MODE_INFO *cur_mi, const MODE_INFO *left_mi, int b); @@ -647,6 +670,23 @@ static INLINE int is_interintra_pred(const MB_MODE_INFO *mbmi) { } #endif // CONFIG_EXT_INTER +#if CONFIG_OBMC +static INLINE int is_obmc_allowed(const MB_MODE_INFO *mbmi) { + return (mbmi->sb_type >= BLOCK_8X8); +} + +static INLINE int is_neighbor_overlappable(const MB_MODE_INFO *mbmi) { +#if CONFIG_EXT_INTER + return (is_inter_block(mbmi) && + !(has_second_ref(mbmi) && get_wedge_bits(mbmi->sb_type) && + mbmi->use_wedge_interinter) && + !(is_interintra_pred(mbmi))); +#else + return (is_inter_block(mbmi)); +#endif // CONFIG_EXT_INTER +} +#endif // CONFIG_OBMC + #ifdef __cplusplus } // extern "C" #endif diff --git a/vp10/common/entropymode.c b/vp10/common/entropymode.c index e4c27a777c4d938f0279888080b67f138783ee07..d5c8f4485c9f4f0e345b15d4587afd7d0e8f4cbd 100644 --- a/vp10/common/entropymode.c +++ b/vp10/common/entropymode.c @@ -10,6 +10,7 @@ #include "vpx_mem/vpx_mem.h" +#include "vp10/common/reconinter.h" #include "vp10/common/onyxc_int.h" #include "vp10/common/seg_common.h" @@ -190,8 +191,8 @@ static const vpx_prob default_drl_prob[DRL_MODE_CONTEXTS] = { #if CONFIG_EXT_INTER static const vpx_prob default_new2mv_prob = 180; -#endif -#endif +#endif // CONFIG_EXT_INTER +#endif // CONFIG_REF_MV static const vpx_prob default_inter_mode_probs[INTER_MODE_CONTEXTS] [INTER_MODES - 1] = { @@ -230,6 +231,14 @@ static const vpx_prob default_inter_compound_mode_probs static const vpx_prob default_interintra_prob[BLOCK_SIZES] = { 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, }; + +static const vpx_prob default_wedge_interintra_prob[BLOCK_SIZES] = { + 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, +}; + +static const vpx_prob default_wedge_interinter_prob[BLOCK_SIZES] = { + 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, +}; #endif // CONFIG_EXT_INTER #if CONFIG_OBMC @@ -1337,6 +1346,8 @@ static void init_mode_probs(FRAME_CONTEXT *fc) { #if CONFIG_EXT_INTER vp10_copy(fc->inter_compound_mode_probs, default_inter_compound_mode_probs); vp10_copy(fc->interintra_prob, default_interintra_prob); + vp10_copy(fc->wedge_interintra_prob, default_wedge_interintra_prob); + vp10_copy(fc->wedge_interinter_prob, default_wedge_interinter_prob); #endif // CONFIG_EXT_INTER #if CONFIG_SUPERTX vp10_copy(fc->supertx_prob, default_supertx_prob); @@ -1445,12 +1456,21 @@ void vp10_adapt_inter_frame_probs(VP10_COMMON *cm) { pre_fc->inter_compound_mode_probs[i], counts->inter_compound_mode[i], fc->inter_compound_mode_probs[i]); - for (i = 0; i < BLOCK_SIZES; ++i) { if (is_interintra_allowed_bsize(i)) fc->interintra_prob[i] = mode_mv_merge_probs(pre_fc->interintra_prob[i], counts->interintra[i]); } + for (i = 0; i < BLOCK_SIZES; ++i) { + if (is_interintra_allowed_bsize(i) && get_wedge_bits(i)) + fc->wedge_interintra_prob[i] = mode_mv_merge_probs( + pre_fc->wedge_interintra_prob[i], counts->wedge_interintra[i]); + } + for (i = 0; i < BLOCK_SIZES; ++i) { + if (get_wedge_bits(i)) + fc->wedge_interinter_prob[i] = mode_mv_merge_probs( + pre_fc->wedge_interinter_prob[i], counts->wedge_interinter[i]); + } #endif // CONFIG_EXT_INTER for (i = 0; i < BLOCK_SIZE_GROUPS; i++) diff --git a/vp10/common/entropymode.h b/vp10/common/entropymode.h index d9858b3d3985edfe7dbf5bd2f2c80ae53118da4c..b208dcf4af082dfa14f40e1e795d6037d114b163 100644 --- a/vp10/common/entropymode.h +++ b/vp10/common/entropymode.h @@ -74,13 +74,15 @@ typedef struct frame_contexts { #if CONFIG_EXT_INTER vpx_prob new2mv_prob; #endif // CONFIG_EXT_INTER -#endif +#endif // CONFIG_REF_MV vpx_prob inter_mode_probs[INTER_MODE_CONTEXTS][INTER_MODES - 1]; #if CONFIG_EXT_INTER vpx_prob inter_compound_mode_probs[INTER_MODE_CONTEXTS] [INTER_COMPOUND_MODES - 1]; vpx_prob interintra_prob[BLOCK_SIZES]; + vpx_prob wedge_interintra_prob[BLOCK_SIZES]; + vpx_prob wedge_interinter_prob[BLOCK_SIZES]; #endif // CONFIG_EXT_INTER #if CONFIG_OBMC vpx_prob obmc_prob[BLOCK_SIZES]; @@ -143,6 +145,8 @@ typedef struct FRAME_COUNTS { #if CONFIG_EXT_INTER unsigned int inter_compound_mode[INTER_MODE_CONTEXTS][INTER_COMPOUND_MODES]; unsigned int interintra[BLOCK_SIZES][2]; + unsigned int wedge_interintra[BLOCK_SIZES][2]; + unsigned int wedge_interinter[BLOCK_SIZES][2]; #endif // CONFIG_EXT_INTER #if CONFIG_OBMC unsigned int obmc[BLOCK_SIZES][2]; diff --git a/vp10/common/enums.h b/vp10/common/enums.h index 4e3a5b174f21b00696931e882e9df7646f944ea1..87bcc8a71cce5934dd9d0ff3418d6b70d68f576e 100644 --- a/vp10/common/enums.h +++ b/vp10/common/enums.h @@ -41,21 +41,32 @@ typedef enum BITSTREAM_PROFILE { MAX_PROFILES } BITSTREAM_PROFILE; -#define BLOCK_4X4 0 -#define BLOCK_4X8 1 -#define BLOCK_8X4 2 -#define BLOCK_8X8 3 -#define BLOCK_8X16 4 -#define BLOCK_16X8 5 -#define BLOCK_16X16 6 -#define BLOCK_16X32 7 -#define BLOCK_32X16 8 -#define BLOCK_32X32 9 -#define BLOCK_32X64 10 -#define BLOCK_64X32 11 -#define BLOCK_64X64 12 -#define BLOCK_SIZES 13 -#define BLOCK_INVALID BLOCK_SIZES +#define BLOCK_4X4 0 +#define BLOCK_4X8 1 +#define BLOCK_8X4 2 +#define BLOCK_8X8 3 +#define BLOCK_8X16 4 +#define BLOCK_16X8 5 +#define BLOCK_16X16 6 +#define BLOCK_16X32 7 +#define BLOCK_32X16 8 +#define BLOCK_32X32 9 +#define BLOCK_32X64 10 +#define BLOCK_64X32 11 +#define BLOCK_64X64 12 + +#if CONFIG_EXT_PARTITION +#define BLOCK_64X128 13 +#define BLOCK_128X64 14 +#define BLOCK_128X128 15 +#define BLOCK_SIZES 16 +#else +#define BLOCK_SIZES 13 +#endif // CONFIG_EXT_PARTITION + +#define BLOCK_INVALID (BLOCK_SIZES) +#define BLOCK_LARGEST (BLOCK_SIZES - 1) + typedef uint8_t BLOCK_SIZE; typedef enum PARTITION_TYPE { diff --git a/vp10/common/reconinter.c b/vp10/common/reconinter.c index 72e6ae0a75a636499622909e927c462bdd116f8e..174ff8074b9eb22680a6fdc5e4ca21d5c1e85a7e 100644 --- a/vp10/common/reconinter.c +++ b/vp10/common/reconinter.c @@ -22,9 +22,490 @@ #include "vp10/common/onyxc_int.h" #endif // CONFIG_OBMC -// TODO(geza.lore) Update this when the extended coding unit size experiment -// have been ported. -#define CU_SIZE 64 +#if CONFIG_EXT_INTER +static int get_masked_weight(int m) { + #define SMOOTHER_LEN 32 + static const uint8_t smoothfn[2 * SMOOTHER_LEN + 1] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 1, 1, 1, + 1, 1, 2, 2, 3, 4, 5, 6, + 8, 9, 12, 14, 17, 21, 24, 28, + 32, + 36, 40, 43, 47, 50, 52, 55, 56, + 58, 59, 60, 61, 62, 62, 63, 63, + 63, 63, 63, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, + }; + if (m < -SMOOTHER_LEN) + return 0; + else if (m > SMOOTHER_LEN) + return (1 << WEDGE_WEIGHT_BITS); + else + return smoothfn[m + SMOOTHER_LEN]; +} + +// [negative][transpose][reverse] +DECLARE_ALIGNED(16, static uint8_t, + wedge_mask_obl[2][2][2][MASK_MASTER_SIZE * MASK_MASTER_SIZE]); +// [negative][transpose] +DECLARE_ALIGNED(16, static uint8_t, + wedge_mask_str[2][2][MASK_MASTER_SIZE * MASK_MASTER_SIZE]); + +void vp10_init_wedge_masks() { + int i, j; + const int w = MASK_MASTER_SIZE; + const int h = MASK_MASTER_SIZE; + const int stride = MASK_MASTER_STRIDE; + const int a[4] = {2, 1, 2, 2}; + for (i = 0; i < h; ++i) + for (j = 0; j < w; ++j) { + int x = (2 * j + 1 - (a[2] * w) / 2); + int y = (2 * i + 1 - (a[3] * h) / 2); + int m = (a[0] * x + a[1] * y) / 2; + wedge_mask_obl[0][0][0][i * stride + j] = + wedge_mask_obl[0][1][0][j * stride + i] = + wedge_mask_obl[0][0][1][i * stride + w - 1 - j] = + wedge_mask_obl[0][1][1][(w - 1 - j) * stride + i] = + get_masked_weight(m); + wedge_mask_obl[1][0][0][i * stride + j] = + wedge_mask_obl[1][1][0][j * stride + i] = + wedge_mask_obl[1][0][1][i * stride + w - 1 - j] = + wedge_mask_obl[1][1][1][(w - 1 - j) * stride + i] = + (1 << WEDGE_WEIGHT_BITS) - get_masked_weight(m); + wedge_mask_str[0][0][i * stride + j] = + wedge_mask_str[0][1][j * stride + i] = + get_masked_weight(x); + wedge_mask_str[1][0][i * stride + j] = + wedge_mask_str[1][1][j * stride + i] = + (1 << WEDGE_WEIGHT_BITS) - get_masked_weight(x); + } +} + +static const uint8_t *get_wedge_mask_inplace(const int *a, + int h, int w) { + const int woff = (a[2] * w) >> 2; + const int hoff = (a[3] * h) >> 2; + const int oblique = (abs(a[0]) + abs(a[1]) == 3); + const uint8_t *master; + int transpose, reverse, negative; + if (oblique) { + negative = (a[0] < 0); + transpose = (abs(a[0]) == 1); + reverse = (a[0] < 0) ^ (a[1] < 0); + } else { + negative = (a[0] < 0 || a[1] < 0); + transpose = (a[0] == 0); + reverse = 0; + } + master = (oblique ? + wedge_mask_obl[negative][transpose][reverse] : + wedge_mask_str[negative][transpose]) + + MASK_MASTER_STRIDE * (MASK_MASTER_SIZE / 2 - hoff) + + MASK_MASTER_SIZE / 2 - woff; + return master; +} + +// Equation of line: f(x, y) = a[0]*(x - a[2]*w/4) + a[1]*(y - a[3]*h/4) = 0 +// The soft mask is obtained by computing f(x, y) and then calling +// get_masked_weight(f(x, y)). +static const int wedge_params_sml[1 << WEDGE_BITS_SML][4] = { + {-1, 2, 2, 2}, + { 1, -2, 2, 2}, + {-2, 1, 2, 2}, + { 2, -1, 2, 2}, + {-2, -1, 2, 2}, + { 2, 1, 2, 2}, + {-1, -2, 2, 2}, + { 1, 2, 2, 2}, +}; + +static const int wedge_params_med_hgtw[1 << WEDGE_BITS_MED][4] = { + {-1, 2, 2, 2}, + { 1, -2, 2, 2}, + {-2, 1, 2, 2}, + { 2, -1, 2, 2}, + {-2, -1, 2, 2}, + { 2, 1, 2, 2}, + {-1, -2, 2, 2}, + { 1, 2, 2, 2}, + + {-1, 2, 2, 1}, + { 1, -2, 2, 1}, + {-1, 2, 2, 3}, + { 1, -2, 2, 3}, + {-1, -2, 2, 1}, + { 1, 2, 2, 1}, + {-1, -2, 2, 3}, + { 1, 2, 2, 3}, +}; + +static const int wedge_params_med_hltw[1 << WEDGE_BITS_MED][4] = { + {-1, 2, 2, 2}, + { 1, -2, 2, 2}, + {-2, 1, 2, 2}, + { 2, -1, 2, 2}, + {-2, -1, 2, 2}, + { 2, 1, 2, 2}, + {-1, -2, 2, 2}, + { 1, 2, 2, 2}, + + {-2, 1, 1, 2}, + { 2, -1, 1, 2}, + {-2, 1, 3, 2}, + { 2, -1, 3, 2}, + {-2, -1, 1, 2}, + { 2, 1, 1, 2}, + {-2, -1, 3, 2}, + { 2, 1, 3, 2}, +}; + +static const int wedge_params_med_heqw[1 << WEDGE_BITS_MED][4] = { + {-1, 2, 2, 2}, + { 1, -2, 2, 2}, + {-2, 1, 2, 2}, + { 2, -1, 2, 2}, + {-2, -1, 2, 2}, + { 2, 1, 2, 2}, + {-1, -2, 2, 2}, + { 1, 2, 2, 2}, + + { 0, -2, 0, 1}, + { 0, 2, 0, 1}, + { 0, -2, 0, 3}, + { 0, 2, 0, 3}, + {-2, 0, 1, 0}, + { 2, 0, 1, 0}, + {-2, 0, 3, 0}, + { 2, 0, 3, 0}, +}; + +static const int wedge_params_big_hgtw[1 << WEDGE_BITS_BIG][4] = { + {-1, 2, 2, 2}, + { 1, -2, 2, 2}, + {-2, 1, 2, 2}, + { 2, -1, 2, 2}, + {-2, -1, 2, 2}, + { 2, 1, 2, 2}, + {-1, -2, 2, 2}, + { 1, 2, 2, 2}, + + {-1, 2, 2, 1}, + { 1, -2, 2, 1}, + {-1, 2, 2, 3}, + { 1, -2, 2, 3}, + {-1, -2, 2, 1}, + { 1, 2, 2, 1}, + {-1, -2, 2, 3}, + { 1, 2, 2, 3}, + + {-2, 1, 1, 2}, + { 2, -1, 1, 2}, + {-2, 1, 3, 2}, + { 2, -1, 3, 2}, + {-2, -1, 1, 2}, + { 2, 1, 1, 2}, + {-2, -1, 3, 2}, + { 2, 1, 3, 2}, + + { 0, -2, 0, 1}, + { 0, 2, 0, 1}, + { 0, -2, 0, 2}, + { 0, 2, 0, 2}, + { 0, -2, 0, 3}, + { 0, 2, 0, 3}, + {-2, 0, 2, 0}, + { 2, 0, 2, 0}, +}; + +static const int wedge_params_big_hltw[1 << WEDGE_BITS_BIG][4] = { + {-1, 2, 2, 2}, + { 1, -2, 2, 2}, + {-2, 1, 2, 2}, + { 2, -1, 2, 2}, + {-2, -1, 2, 2}, + { 2, 1, 2, 2}, + {-1, -2, 2, 2}, + { 1, 2, 2, 2}, + + {-1, 2, 2, 1}, + { 1, -2, 2, 1}, + {-1, 2, 2, 3}, + { 1, -2, 2, 3}, + {-1, -2, 2, 1}, + { 1, 2, 2, 1}, + {-1, -2, 2, 3}, + { 1, 2, 2, 3}, + + {-2, 1, 1, 2}, + { 2, -1, 1, 2}, + {-2, 1, 3, 2}, + { 2, -1, 3, 2}, + {-2, -1, 1, 2}, + { 2, 1, 1, 2}, + {-2, -1, 3, 2}, + { 2, 1, 3, 2}, + + { 0, -2, 0, 2}, + { 0, 2, 0, 2}, + {-2, 0, 1, 0}, + { 2, 0, 1, 0}, + {-2, 0, 2, 0}, + { 2, 0, 2, 0}, + {-2, 0, 3, 0}, + { 2, 0, 3, 0}, +}; + +static const int wedge_params_big_heqw[1 << WEDGE_BITS_BIG][4] = { + {-1, 2, 2, 2}, + { 1, -2, 2, 2}, + {-2, 1, 2, 2}, + { 2, -1, 2, 2}, + {-2, -1, 2, 2}, + { 2, 1, 2, 2}, + {-1, -2, 2, 2}, + { 1, 2, 2, 2}, + + {-1, 2, 2, 1}, + { 1, -2, 2, 1}, + {-1, 2, 2, 3}, + { 1, -2, 2, 3}, + {-1, -2, 2, 1}, + { 1, 2, 2, 1}, + {-1, -2, 2, 3}, + { 1, 2, 2, 3}, + + {-2, 1, 1, 2}, + { 2, -1, 1, 2}, + {-2, 1, 3, 2}, + { 2, -1, 3, 2}, + {-2, -1, 1, 2}, + { 2, 1, 1, 2}, + {-2, -1, 3, 2}, + { 2, 1, 3, 2}, + + { 0, -2, 0, 1}, + { 0, 2, 0, 1}, + { 0, -2, 0, 3}, + { 0, 2, 0, 3}, + {-2, 0, 1, 0}, + { 2, 0, 1, 0}, + {-2, 0, 3, 0}, + { 2, 0, 3, 0}, +}; + +static const int *get_wedge_params(int wedge_index, + BLOCK_SIZE sb_type, + int h, int w) { + const int *a = NULL; + const int wedge_bits = get_wedge_bits(sb_type); + + if (wedge_index == WEDGE_NONE) + return NULL; + + if (wedge_bits == WEDGE_BITS_SML) { + a = wedge_params_sml[wedge_index]; + } else if (wedge_bits == WEDGE_BITS_MED) { + if (h > w) + a = wedge_params_med_hgtw[wedge_index]; + else if (h < w) + a = wedge_params_med_hltw[wedge_index]; + else + a = wedge_params_med_heqw[wedge_index]; + } else if (wedge_bits == WEDGE_BITS_BIG) { + if (h > w) + a = wedge_params_big_hgtw[wedge_index]; + else if (h < w) + a = wedge_params_big_hltw[wedge_index]; + else + a = wedge_params_big_heqw[wedge_index]; + } else { + assert(0); + } + return a; +} + +const uint8_t *vp10_get_soft_mask(int wedge_index, + BLOCK_SIZE sb_type, + int h, int w) { + const int *a = get_wedge_params(wedge_index, sb_type, h, w); + if (a) { + return get_wedge_mask_inplace(a, h, w); + } else { + return NULL; + } +} + +#if CONFIG_SUPERTX +const uint8_t *get_soft_mask_extend(int wedge_index, int plane, + BLOCK_SIZE sb_type, + int wedge_offset_y, + int wedge_offset_x) { + int subh = (plane ? 2 : 4) << b_height_log2_lookup[sb_type]; + int subw = (plane ? 2 : 4) << b_width_log2_lookup[sb_type]; + const int *a = get_wedge_params(wedge_index, sb_type, subh, subw); + if (a) { + const uint8_t *mask = get_wedge_mask_inplace(a, subh, subw); + mask -= (wedge_offset_x + wedge_offset_y * MASK_MASTER_STRIDE); + return mask; + } else { + return NULL; + } +} + +static void build_masked_compound_extend(uint8_t *dst, int dst_stride, + uint8_t *dst2, int dst2_stride, + int plane, + int wedge_index, BLOCK_SIZE sb_type, + int wedge_offset_y, int wedge_offset_x, + int h, int w) { + int i, j; + const uint8_t *mask = get_soft_mask_extend( + wedge_index, plane, sb_type, wedge_offset_y, wedge_offset_x); + for (i = 0; i < h; ++i) + for (j = 0; j < w; ++j) { + int m = mask[i * MASK_MASTER_STRIDE + j]; + dst[i * dst_stride + j] = (dst[i * dst_stride + j] * m + + dst2[i * dst2_stride + j] * + ((1 << WEDGE_WEIGHT_BITS) - m) + + (1 << (WEDGE_WEIGHT_BITS - 1))) >> + WEDGE_WEIGHT_BITS; + } +} + +#if CONFIG_VP9_HIGHBITDEPTH +static void build_masked_compound_extend_highbd( + uint8_t *dst_8, int dst_stride, + uint8_t *dst2_8, int dst2_stride, int plane, + int wedge_index, BLOCK_SIZE sb_type, + int wedge_offset_y, int wedge_offset_x, + int h, int w) { + int i, j; + const uint8_t *mask = get_soft_mask_extend( + wedge_index, plane, sb_type, wedge_offset_y, wedge_offset_x); + uint16_t *dst = CONVERT_TO_SHORTPTR(dst_8); + uint16_t *dst2 = CONVERT_TO_SHORTPTR(dst2_8); + for (i = 0; i < h; ++i) + for (j = 0; j < w; ++j) { + int m = mask[i * MASK_MASTER_STRIDE + j]; + dst[i * dst_stride + j] = (dst[i * dst_stride + j] * m + + dst2[i * dst2_stride + j] * + ((1 << WEDGE_WEIGHT_BITS) - m) + + (1 << (WEDGE_WEIGHT_BITS - 1))) >> + WEDGE_WEIGHT_BITS; + } +} +#endif // CONFIG_VP9_HIGHBITDEPTH + +#else // CONFIG_SUPERTX + +static void build_masked_compound(uint8_t *dst, int dst_stride, + uint8_t *dst2, int dst2_stride, + int wedge_index, BLOCK_SIZE sb_type, + int h, int w) { + int i, j; + const uint8_t *mask = vp10_get_soft_mask(wedge_index, sb_type, h, w); + for (i = 0; i < h; ++i) + for (j = 0; j < w; ++j) { + int m = mask[i * MASK_MASTER_STRIDE + j]; + dst[i * dst_stride + j] = (dst[i * dst_stride + j] * m + + dst2[i * dst2_stride + j] * + ((1 << WEDGE_WEIGHT_BITS) - m) + + (1 << (WEDGE_WEIGHT_BITS - 1))) >> + WEDGE_WEIGHT_BITS; + } +} + +#if CONFIG_VP9_HIGHBITDEPTH +static void build_masked_compound_highbd(uint8_t *dst_8, int dst_stride, + uint8_t *dst2_8, int dst2_stride, + int wedge_index, BLOCK_SIZE sb_type, + int h, int w) { + int i, j; + const uint8_t *mask = vp10_get_soft_mask(wedge_index, sb_type, h, w); + uint16_t *dst = CONVERT_TO_SHORTPTR(dst_8); + uint16_t *dst2 = CONVERT_TO_SHORTPTR(dst2_8); + for (i = 0; i < h; ++i) + for (j = 0; j < w; ++j) { + int m = mask[i * MASK_MASTER_STRIDE + j]; + dst[i * dst_stride + j] = (dst[i * dst_stride + j] * m + + dst2[i * dst2_stride + j] * + ((1 << WEDGE_WEIGHT_BITS) - m) + + (1 << (WEDGE_WEIGHT_BITS - 1))) >> + WEDGE_WEIGHT_BITS; + } +} +#endif // CONFIG_VP9_HIGHBITDEPTH +#endif // CONFIG_SUPERTX + +void vp10_make_masked_inter_predictor( + const uint8_t *pre, + int pre_stride, + uint8_t *dst, + int dst_stride, + const int subpel_x, + const int subpel_y, + const struct scale_factors *sf, + int w, int h, + const INTERP_FILTER interp_filter, + int xs, int ys, +#if CONFIG_SUPERTX + int plane, int wedge_offset_x, int wedge_offset_y, +#endif // CONFIG_SUPERTX + const MACROBLOCKD *xd) { + const MODE_INFO *mi = xd->mi[0]; +#if CONFIG_VP9_HIGHBITDEPTH + uint8_t tmp_dst_[2 * CU_SIZE * CU_SIZE]; + uint8_t *tmp_dst = + (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? + CONVERT_TO_BYTEPTR(tmp_dst_) : tmp_dst_; + vp10_make_inter_predictor(pre, pre_stride, tmp_dst, CU_SIZE, + subpel_x, subpel_y, sf, w, h, 0, + interp_filter, xs, ys, xd); +#if CONFIG_SUPERTX + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) + build_masked_compound_extend_highbd( + dst, dst_stride, tmp_dst, CU_SIZE, plane, + mi->mbmi.interinter_wedge_index, + mi->mbmi.sb_type, + wedge_offset_y, wedge_offset_x, h, w); + else + build_masked_compound_extend( + dst, dst_stride, tmp_dst, CU_SIZE, plane, + mi->mbmi.interinter_wedge_index, + mi->mbmi.sb_type, + wedge_offset_y, wedge_offset_x, h, w); +#else + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) + build_masked_compound_highbd( + dst, dst_stride, tmp_dst, CU_SIZE, + mi->mbmi.interinter_wedge_index, + mi->mbmi.sb_type, h, w); + else + build_masked_compound( + dst, dst_stride, tmp_dst, CU_SIZE, + mi->mbmi.interinter_wedge_index, + mi->mbmi.sb_type, h, w); +#endif // CONFIG_SUPERTX +#else // CONFIG_VP9_HIGHBITDEPTH + uint8_t tmp_dst[CU_SIZE * CU_SIZE]; + vp10_make_inter_predictor(pre, pre_stride, tmp_dst, CU_SIZE, + subpel_x, subpel_y, sf, w, h, 0, + interp_filter, xs, ys, xd); +#if CONFIG_SUPERTX + build_masked_compound_extend( + dst, dst_stride, tmp_dst, CU_SIZE, plane, + mi->mbmi.interinter_wedge_index, + mi->mbmi.sb_type, + wedge_offset_y, wedge_offset_x, h, w); +#else + build_masked_compound( + dst, dst_stride, tmp_dst, CU_SIZE, + mi->mbmi.interinter_wedge_index, + mi->mbmi.sb_type, h, w); +#endif // CONFIG_SUPERTX +#endif // CONFIG_VP9_HIGHBITDEPTH +} +#endif // CONFIG_EXT_INTER #if CONFIG_VP9_HIGHBITDEPTH void vp10_highbd_build_inter_predictor(const uint8_t *src, int src_stride, @@ -44,7 +525,7 @@ void vp10_highbd_build_inter_predictor(const uint8_t *src, int src_stride, src += (mv.row >> SUBPEL_BITS) * src_stride + (mv.col >> SUBPEL_BITS); - high_inter_predictor(src, src_stride, dst, dst_stride, subpel_x, subpel_y, + highbd_inter_predictor(src, src_stride, dst, dst_stride, subpel_x, subpel_y, sf, w, h, ref, interp_filter, sf->x_step_q4, sf->y_step_q4, bd); } @@ -78,6 +559,9 @@ void build_inter_predictors(MACROBLOCKD *xd, int plane, int block, int bw, int bh, int x, int y, int w, int h, +#if CONFIG_SUPERTX && CONFIG_EXT_INTER + int wedge_offset_x, int wedge_offset_y, +#endif // CONFIG_SUPERTX && CONFIG_EXT_INTER int mi_x, int mi_y) { struct macroblockd_plane *const pd = &xd->plane[plane]; #if CONFIG_OBMC @@ -129,19 +613,22 @@ void build_inter_predictors(MACROBLOCKD *xd, int plane, pre += (scaled_mv.row >> SUBPEL_BITS) * pre_buf->stride + (scaled_mv.col >> SUBPEL_BITS); -#if CONFIG_VP9_HIGHBITDEPTH - if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - high_inter_predictor(pre, pre_buf->stride, dst, dst_buf->stride, - subpel_x, subpel_y, sf, w, h, ref, - interp_filter, xs, ys, xd->bd); - } else { - inter_predictor(pre, pre_buf->stride, dst, dst_buf->stride, - subpel_x, subpel_y, sf, w, h, ref, interp_filter, xs, ys); - } -#else - inter_predictor(pre, pre_buf->stride, dst, dst_buf->stride, - subpel_x, subpel_y, sf, w, h, ref, interp_filter, xs, ys); -#endif // CONFIG_VP9_HIGHBITDEPTH +#if CONFIG_EXT_INTER + if (ref && get_wedge_bits(mi->mbmi.sb_type) && + mi->mbmi.use_wedge_interinter) + vp10_make_masked_inter_predictor( + pre, pre_buf->stride, dst, dst_buf->stride, + subpel_x, subpel_y, sf, w, h, + interp_filter, xs, ys, +#if CONFIG_SUPERTX + plane, wedge_offset_x, wedge_offset_y, +#endif // CONFIG_SUPERTX + xd); + else +#endif // CONFIG_EXT_INTER + vp10_make_inter_predictor(pre, pre_buf->stride, dst, dst_buf->stride, + subpel_x, subpel_y, sf, w, h, ref, + interp_filter, xs, ys, xd); } } @@ -222,14 +709,22 @@ static void build_inter_predictors_for_planes(MACROBLOCKD *xd, BLOCK_SIZE bsize, 0, 0, #endif // CONFIG_OBMC y * 2 + x, bw, bh, - 4 * x, 4 * y, pw, ph, mi_x, mi_y); + 4 * x, 4 * y, pw, ph, +#if CONFIG_SUPERTX && CONFIG_EXT_INTER + 0, 0, +#endif // CONFIG_SUPERTX && CONFIG_EXT_INTER + mi_x, mi_y); } else { build_inter_predictors(xd, plane, #if CONFIG_OBMC 0, 0, #endif // CONFIG_OBMC 0, bw, bh, - 0, 0, bw, bh, mi_x, mi_y); + 0, 0, bw, bh, +#if CONFIG_SUPERTX && CONFIG_EXT_INTER + 0, 0, +#endif // CONFIG_SUPERTX && CONFIG_EXT_INTER + mi_x, mi_y); } } } @@ -524,9 +1019,13 @@ void vp10_build_masked_inter_predictor_complex( (void) xd; } -void vp10_build_inter_predictors_sb_sub8x8(MACROBLOCKD *xd, - int mi_row, int mi_col, - BLOCK_SIZE bsize, int block) { +void vp10_build_inter_predictors_sb_sub8x8_extend( + MACROBLOCKD *xd, +#if CONFIG_EXT_INTER + int mi_row_ori, int mi_col_ori, +#endif // CONFIG_EXT_INTER + int mi_row, int mi_col, + BLOCK_SIZE bsize, int block) { // Prediction function used in supertx: // Use the mv at current block (which is less than 8x8) // to get prediction of a block located at (mi_row, mi_col) at size of bsize @@ -535,6 +1034,10 @@ void vp10_build_inter_predictors_sb_sub8x8(MACROBLOCKD *xd, int plane; const int mi_x = mi_col * MI_SIZE; const int mi_y = mi_row * MI_SIZE; +#if CONFIG_EXT_INTER + const int wedge_offset_x = (mi_col_ori - mi_col) * MI_SIZE; + const int wedge_offset_y = (mi_row_ori - mi_row) * MI_SIZE; +#endif // CONFIG_EXT_INTER // For sub8x8 uv: // Skip uv prediction in supertx except the first block (block = 0) @@ -554,6 +1057,10 @@ void vp10_build_inter_predictors_sb_sub8x8(MACROBLOCKD *xd, #endif // CONFIG_OBMC block, bw, bh, 0, 0, bw, bh, +#if CONFIG_EXT_INTER + wedge_offset_x >> (xd->plane[plane].subsampling_x), + wedge_offset_y >> (xd->plane[plane].subsampling_y), +#endif // CONFIG_SUPERTX mi_x, mi_y); } #if CONFIG_EXT_INTER @@ -568,6 +1075,59 @@ void vp10_build_inter_predictors_sb_sub8x8(MACROBLOCKD *xd, bsize); #endif // CONFIG_EXT_INTER } + +void vp10_build_inter_predictors_sb_extend(MACROBLOCKD *xd, +#if CONFIG_EXT_INTER + int mi_row_ori, int mi_col_ori, +#endif // CONFIG_EXT_INTER + int mi_row, int mi_col, + BLOCK_SIZE bsize) { + int plane; + const int mi_x = mi_col * MI_SIZE; + const int mi_y = mi_row * MI_SIZE; +#if CONFIG_EXT_INTER + const int wedge_offset_x = (mi_col_ori - mi_col) * MI_SIZE; + const int wedge_offset_y = (mi_row_ori - mi_row) * MI_SIZE; +#endif // CONFIG_EXT_INTER + for (plane = 0; plane < MAX_MB_PLANE; ++plane) { + const BLOCK_SIZE plane_bsize = get_plane_block_size( + bsize, &xd->plane[plane]); + const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize]; + const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize]; + const int bw = 4 * num_4x4_w; + const int bh = 4 * num_4x4_h; + + if (xd->mi[0]->mbmi.sb_type < BLOCK_8X8) { + int i = 0, x, y; + assert(bsize == BLOCK_8X8); + for (y = 0; y < num_4x4_h; ++y) + for (x = 0; x < num_4x4_w; ++x) + build_inter_predictors( + xd, plane, +#if CONFIG_OBMC + 0, 0, +#endif // CONFIG_OBMC + i++, bw, bh, 4 * x, 4 * y, 4, 4, +#if CONFIG_EXT_INTER + wedge_offset_x >> (xd->plane[plane].subsampling_x), + wedge_offset_y >> (xd->plane[plane].subsampling_y), +#endif // CONFIG_EXT_INTER + mi_x, mi_y); + } else { + build_inter_predictors( + xd, plane, +#if CONFIG_OBMC + 0, 0, +#endif // CONFIG_OBMC + 0, bw, bh, 0, 0, bw, bh, +#if CONFIG_EXT_INTER + wedge_offset_x >> (xd->plane[plane].subsampling_x), + wedge_offset_y >> (xd->plane[plane].subsampling_y), +#endif // CONFIG_EXT_INTER + mi_x, mi_y); + } + } +} #endif // CONFIG_SUPERTX #if CONFIG_OBMC @@ -695,7 +1255,7 @@ void vp10_build_obmc_inter_prediction(VP10_COMMON *cm, mi_step = VPXMIN(xd->n8_w, num_8x8_blocks_wide_lookup[above_mbmi->sb_type]); - if (!is_inter_block(above_mbmi)) + if (!is_neighbor_overlappable(above_mbmi)) continue; overlap = (above_mbmi->skip) ? @@ -761,7 +1321,7 @@ void vp10_build_obmc_inter_prediction(VP10_COMMON *cm, mi_step = VPXMIN(xd->n8_h, num_8x8_blocks_high_lookup[left_mbmi->sb_type]); - if (!is_inter_block(left_mbmi)) + if (!is_neighbor_overlappable(left_mbmi)) continue; overlap = (left_mbmi->skip) ? @@ -816,6 +1376,9 @@ void vp10_build_obmc_inter_prediction(VP10_COMMON *cm, #if CONFIG_EXT_INTER static void combine_interintra(PREDICTION_MODE mode, + int use_wedge_interintra, + int wedge_index, + BLOCK_SIZE bsize, BLOCK_SIZE plane_bsize, uint8_t *comppred, int compstride, @@ -846,12 +1409,26 @@ static void combine_interintra(PREDICTION_MODE mode, size == 8 ? 8 : 16); int i, j; + if (use_wedge_interintra && get_wedge_bits(bsize)) { + const uint8_t *mask = vp10_get_soft_mask(wedge_index, bsize, bh, bw); + for (i = 0; i < bh; ++i) { + for (j = 0; j < bw; ++j) { + int m = mask[i * MASK_MASTER_STRIDE + j]; + comppred[i * compstride + j] = + (intrapred[i * intrastride + j] * m + + interpred[i * interstride + j] * ((1 << WEDGE_WEIGHT_BITS) - m) + + (1 << (WEDGE_WEIGHT_BITS - 1))) >> WEDGE_WEIGHT_BITS; + } + } + return; + } + switch (mode) { case V_PRED: for (i = 0; i < bh; ++i) { for (j = 0; j < bw; ++j) { int scale = weights1d[i * size_scale]; - comppred[i * compstride + j] = + comppred[i * compstride + j] = ((scale_max - scale) * interpred[i * interstride + j] + scale * intrapred[i * intrastride + j] + scale_round) >> scale_bits; @@ -939,6 +1516,9 @@ static void combine_interintra(PREDICTION_MODE mode, #if CONFIG_VP9_HIGHBITDEPTH static void combine_interintra_highbd(PREDICTION_MODE mode, + int use_wedge_interintra, + int wedge_index, + BLOCK_SIZE bsize, BLOCK_SIZE plane_bsize, uint8_t *comppred8, int compstride, @@ -973,12 +1553,26 @@ static void combine_interintra_highbd(PREDICTION_MODE mode, uint16_t *intrapred = CONVERT_TO_SHORTPTR(intrapred8); (void) bd; + if (use_wedge_interintra && get_wedge_bits(bsize)) { + const uint8_t *mask = vp10_get_soft_mask(wedge_index, bsize, bh, bw); + for (i = 0; i < bh; ++i) { + for (j = 0; j < bw; ++j) { + int m = mask[i * MASK_MASTER_STRIDE + j]; + comppred[i * compstride + j] = + (intrapred[i * intrastride + j] * m + + interpred[i * interstride + j] * ((1 << WEDGE_WEIGHT_BITS) - m) + + (1 << (WEDGE_WEIGHT_BITS - 1))) >> WEDGE_WEIGHT_BITS; + } + } + return; + } + switch (mode) { case V_PRED: for (i = 0; i < bh; ++i) { for (j = 0; j < bw; ++j) { int scale = weights1d[i * size_scale]; - comppred[i * compstride + j] = + comppred[i * compstride + j] = ((scale_max - scale) * interpred[i * interstride + j] + scale * intrapred[i * intrastride + j] + scale_round) >> scale_bits; @@ -1119,6 +1713,9 @@ void vp10_build_interintra_predictors_sby(MACROBLOCKD *xd, CONVERT_TO_BYTEPTR(intrapredictor), bw, xd->mi[0]->mbmi.interintra_mode, bsize, 0); combine_interintra_highbd(xd->mi[0]->mbmi.interintra_mode, + xd->mi[0]->mbmi.use_wedge_interintra, + xd->mi[0]->mbmi.interintra_wedge_index, + bsize, bsize, xd->plane[0].dst.buf, xd->plane[0].dst.stride, ypred, ystride, @@ -1133,6 +1730,9 @@ void vp10_build_interintra_predictors_sby(MACROBLOCKD *xd, intrapredictor, bw, xd->mi[0]->mbmi.interintra_mode, bsize, 0); combine_interintra(xd->mi[0]->mbmi.interintra_mode, + xd->mi[0]->mbmi.use_wedge_interintra, + xd->mi[0]->mbmi.interintra_wedge_index, + bsize, bsize, xd->plane[0].dst.buf, xd->plane[0].dst.stride, ypred, ystride, intrapredictor, bw); @@ -1155,6 +1755,9 @@ void vp10_build_interintra_predictors_sbc(MACROBLOCKD *xd, CONVERT_TO_BYTEPTR(uintrapredictor), bw, xd->mi[0]->mbmi.interintra_uv_mode, bsize, plane); combine_interintra_highbd(xd->mi[0]->mbmi.interintra_uv_mode, + xd->mi[0]->mbmi.use_wedge_interintra, + xd->mi[0]->mbmi.interintra_uv_wedge_index, + bsize, uvbsize, xd->plane[plane].dst.buf, xd->plane[plane].dst.stride, @@ -1168,8 +1771,11 @@ void vp10_build_interintra_predictors_sbc(MACROBLOCKD *xd, build_intra_predictors_for_interintra( xd, xd->plane[plane].dst.buf, xd->plane[plane].dst.stride, uintrapredictor, bw, - xd->mi[0]->mbmi.interintra_uv_mode, bsize, 1); + xd->mi[0]->mbmi.interintra_uv_mode, bsize, plane); combine_interintra(xd->mi[0]->mbmi.interintra_uv_mode, + xd->mi[0]->mbmi.use_wedge_interintra, + xd->mi[0]->mbmi.interintra_uv_wedge_index, + bsize, uvbsize, xd->plane[plane].dst.buf, xd->plane[plane].dst.stride, @@ -1196,4 +1802,271 @@ void vp10_build_interintra_predictors(MACROBLOCKD *xd, vp10_build_interintra_predictors_sbuv(xd, upred, vpred, ustride, vstride, bsize); } + +// Builds the inter-predictor for the single ref case +// for use in the encoder to search the wedges efficiently. +static void build_inter_predictors_single_buf(MACROBLOCKD *xd, int plane, + int block, + int bw, int bh, + int x, int y, int w, int h, + int mi_x, int mi_y, + int ref, + uint8_t *const ext_dst, + int ext_dst_stride) { + struct macroblockd_plane *const pd = &xd->plane[plane]; + const MODE_INFO *mi = xd->mi[0]; + const INTERP_FILTER interp_filter = mi->mbmi.interp_filter; + + const struct scale_factors *const sf = &xd->block_refs[ref]->sf; + struct buf_2d *const pre_buf = &pd->pre[ref]; +#if CONFIG_VP9_HIGHBITDEPTH + uint8_t *const dst = + (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH ? + CONVERT_TO_BYTEPTR(ext_dst) : ext_dst) + ext_dst_stride * y + x; +#else + uint8_t *const dst = ext_dst + ext_dst_stride * y + x; +#endif + const MV mv = mi->mbmi.sb_type < BLOCK_8X8 + ? average_split_mvs(pd, mi, ref, block) + : mi->mbmi.mv[ref].as_mv; + + // TODO(jkoleszar): This clamping is done in the incorrect place for the + // scaling case. It needs to be done on the scaled MV, not the pre-scaling + // MV. Note however that it performs the subsampling aware scaling so + // that the result is always q4. + // mv_precision precision is MV_PRECISION_Q4. + const MV mv_q4 = clamp_mv_to_umv_border_sb(xd, &mv, bw, bh, + pd->subsampling_x, + pd->subsampling_y); + + uint8_t *pre; + MV32 scaled_mv; + int xs, ys, subpel_x, subpel_y; + const int is_scaled = vp10_is_scaled(sf); + + if (is_scaled) { + pre = pre_buf->buf + scaled_buffer_offset(x, y, pre_buf->stride, sf); + scaled_mv = vp10_scale_mv(&mv_q4, mi_x + x, mi_y + y, sf); + xs = sf->x_step_q4; + ys = sf->y_step_q4; + } else { + pre = pre_buf->buf + (y * pre_buf->stride + x); + scaled_mv.row = mv_q4.row; + scaled_mv.col = mv_q4.col; + xs = ys = 16; + } + + subpel_x = scaled_mv.col & SUBPEL_MASK; + subpel_y = scaled_mv.row & SUBPEL_MASK; + pre += (scaled_mv.row >> SUBPEL_BITS) * pre_buf->stride + + (scaled_mv.col >> SUBPEL_BITS); + + vp10_make_inter_predictor(pre, pre_buf->stride, dst, ext_dst_stride, + subpel_x, subpel_y, sf, w, h, 0, + interp_filter, xs, ys, xd); +} + +void vp10_build_inter_predictors_for_planes_single_buf( + MACROBLOCKD *xd, BLOCK_SIZE bsize, + int mi_row, int mi_col, int ref, + uint8_t *ext_dst[3], int ext_dst_stride[3]) { + const int plane_from = 0; + const int plane_to = 2; + int plane; + const int mi_x = mi_col * MI_SIZE; + const int mi_y = mi_row * MI_SIZE; + for (plane = plane_from; plane <= plane_to; ++plane) { + const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, + &xd->plane[plane]); + const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize]; + const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize]; + const int bw = 4 * num_4x4_w; + const int bh = 4 * num_4x4_h; + + if (xd->mi[0]->mbmi.sb_type < BLOCK_8X8) { + int i = 0, x, y; + assert(bsize == BLOCK_8X8); + for (y = 0; y < num_4x4_h; ++y) + for (x = 0; x < num_4x4_w; ++x) + build_inter_predictors_single_buf(xd, plane, + i++, bw, bh, + 4 * x, 4 * y, 4, 4, + mi_x, mi_y, ref, + ext_dst[plane], + ext_dst_stride[plane]); + } else { + build_inter_predictors_single_buf(xd, plane, + 0, bw, bh, + 0, 0, bw, bh, + mi_x, mi_y, ref, + ext_dst[plane], + ext_dst_stride[plane]); + } + } +} + +static void build_wedge_inter_predictor_from_buf(MACROBLOCKD *xd, int plane, + int block, int bw, int bh, + int x, int y, int w, int h, +#if CONFIG_SUPERTX + int wedge_offset_x, + int wedge_offset_y, +#endif // CONFIG_SUPERTX + int mi_x, int mi_y, + uint8_t *ext_dst0, + int ext_dst_stride0, + uint8_t *ext_dst1, + int ext_dst_stride1) { + struct macroblockd_plane *const pd = &xd->plane[plane]; + const MODE_INFO *mi = xd->mi[0]; + const int is_compound = has_second_ref(&mi->mbmi); + int ref; + (void) block; + (void) bw; + (void) bh; + (void) mi_x; + (void) mi_y; + + for (ref = 0; ref < 1 + is_compound; ++ref) { + struct buf_2d *const dst_buf = &pd->dst; + uint8_t *const dst = dst_buf->buf + dst_buf->stride * y + x; + + if (ref && get_wedge_bits(mi->mbmi.sb_type) + && mi->mbmi.use_wedge_interinter) { +#if CONFIG_VP9_HIGHBITDEPTH + uint8_t tmp_dst_[2 * CU_SIZE * CU_SIZE]; + uint8_t *tmp_dst = + (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? + CONVERT_TO_BYTEPTR(tmp_dst_) : tmp_dst_; +#else + uint8_t tmp_dst[CU_SIZE * CU_SIZE]; +#endif // CONFIG_VP9_HIGHBITDEPTH +#if CONFIG_VP9_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + int k; + for (k = 0; k < h; ++k) + memcpy(tmp_dst_ + 2 * CU_SIZE * k, ext_dst1 + + ext_dst_stride1 * 2 * k, w * 2); + } else { + int k; + for (k = 0; k < h; ++k) + memcpy(tmp_dst_ + CU_SIZE * k, ext_dst1 + + ext_dst_stride1 * k, w); + } +#else + { + int k; + for (k = 0; k < h; ++k) + memcpy(tmp_dst + CU_SIZE * k, ext_dst1 + + ext_dst_stride1 * k, w); + } +#endif // CONFIG_VP9_HIGHBITDEPTH + +#if CONFIG_SUPERTX +#if CONFIG_VP9_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + build_masked_compound_extend_highbd( + dst, dst_buf->stride, tmp_dst, CU_SIZE, plane, + mi->mbmi.interinter_wedge_index, + mi->mbmi.sb_type, + wedge_offset_y, wedge_offset_x, h, w); + } else { + build_masked_compound_extend( + dst, dst_buf->stride, tmp_dst, CU_SIZE, plane, + mi->mbmi.interinter_wedge_index, + mi->mbmi.sb_type, + wedge_offset_y, wedge_offset_x, h, w); + } +#else + build_masked_compound_extend(dst, dst_buf->stride, tmp_dst, + CU_SIZE, plane, + mi->mbmi.interinter_wedge_index, + mi->mbmi.sb_type, + wedge_offset_y, wedge_offset_x, h, w); +#endif // CONFIG_VP9_HIGHBITDEPTH +#else // CONFIG_SUPERTX +#if CONFIG_VP9_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) + build_masked_compound_highbd(dst, dst_buf->stride, tmp_dst, + CU_SIZE, + mi->mbmi.interinter_wedge_index, + mi->mbmi.sb_type, h, w); + else +#endif // CONFIG_VP9_HIGHBITDEPTH + build_masked_compound(dst, dst_buf->stride, tmp_dst, CU_SIZE, + mi->mbmi.interinter_wedge_index, + mi->mbmi.sb_type, h, w); +#endif // CONFIG_SUPERTX + } else { +#if CONFIG_VP9_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + int k; + for (k = 0; k < h; ++k) + memcpy(CONVERT_TO_SHORTPTR(dst + dst_buf->stride * k), + ext_dst0 + ext_dst_stride0 * 2 * k, w * 2); + } else { + int k; + for (k = 0; k < h; ++k) + memcpy(dst + dst_buf->stride * k, + ext_dst0 + ext_dst_stride0 * k, w); + } +#else + { + int k; + for (k = 0; k < h; ++k) + memcpy(dst + dst_buf->stride * k, + ext_dst0 + ext_dst_stride0 * k, w); + } +#endif // CONFIG_VP9_HIGHBITDEPTH + } + } +} + +void vp10_build_wedge_inter_predictor_from_buf( + MACROBLOCKD *xd, BLOCK_SIZE bsize, + int mi_row, int mi_col, + uint8_t *ext_dst0[3], int ext_dst_stride0[3], + uint8_t *ext_dst1[3], int ext_dst_stride1[3]) { + const int plane_from = 0; + const int plane_to = 2; + int plane; + const int mi_x = mi_col * MI_SIZE; + const int mi_y = mi_row * MI_SIZE; + for (plane = plane_from; plane <= plane_to; ++plane) { + const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, + &xd->plane[plane]); + const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize]; + const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize]; + const int bw = 4 * num_4x4_w; + const int bh = 4 * num_4x4_h; + + if (xd->mi[0]->mbmi.sb_type < BLOCK_8X8) { + int i = 0, x, y; + assert(bsize == BLOCK_8X8); + for (y = 0; y < num_4x4_h; ++y) + for (x = 0; x < num_4x4_w; ++x) + build_wedge_inter_predictor_from_buf(xd, plane, i++, bw, bh, + 4 * x, 4 * y, 4, 4, +#if CONFIG_SUPERTX + 0, 0, +#endif + mi_x, mi_y, + ext_dst0[plane], + ext_dst_stride0[plane], + ext_dst1[plane], + ext_dst_stride1[plane]); + } else { + build_wedge_inter_predictor_from_buf(xd, plane, 0, bw, bh, + 0, 0, bw, bh, +#if CONFIG_SUPERTX + 0, 0, +#endif + mi_x, mi_y, + ext_dst0[plane], + ext_dst_stride0[plane], + ext_dst1[plane], + ext_dst_stride1[plane]); + } + } +} #endif // CONFIG_EXT_INTER diff --git a/vp10/common/reconinter.h b/vp10/common/reconinter.h index 0e7fa4c48cad1e468131d0e60a0e9229a7a547b9..c6e89df8bac571b2c9f43b8100d6aa0a49b22c47 100644 --- a/vp10/common/reconinter.h +++ b/vp10/common/reconinter.h @@ -62,14 +62,14 @@ static INLINE void inter_predictor(const uint8_t *src, int src_stride, } #if CONFIG_VP9_HIGHBITDEPTH -static INLINE void high_inter_predictor(const uint8_t *src, int src_stride, - uint8_t *dst, int dst_stride, - const int subpel_x, - const int subpel_y, - const struct scale_factors *sf, - int w, int h, int ref, - const INTERP_FILTER interp_filter, - int xs, int ys, int bd) { +static INLINE void highbd_inter_predictor(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, + const int subpel_x, + const int subpel_y, + const struct scale_factors *sf, + int w, int h, int ref, + const INTERP_FILTER interp_filter, + int xs, int ys, int bd) { InterpFilterParams interp_filter_params = vp10_get_interp_filter_params(interp_filter); if (interp_filter_params.taps == SUBPEL_TAPS) { @@ -105,6 +105,61 @@ static INLINE void high_inter_predictor(const uint8_t *src, int src_stride, } #endif // CONFIG_VP9_HIGHBITDEPTH +void build_inter_predictors(MACROBLOCKD *xd, int plane, +#if CONFIG_OBMC + int mi_col_offset, int mi_row_offset, +#endif // CONFIG_OBMC + int block, + int bw, int bh, + int x, int y, int w, int h, +#if CONFIG_SUPERTX && CONFIG_EXT_INTER + int wedge_offset_x, int wedge_offset_y, +#endif // CONFIG_SUPERTX && CONFIG_EXT_INTER + int mi_x, int mi_y); + +static INLINE void vp10_make_inter_predictor( + const uint8_t *src, + int src_stride, + uint8_t *dst, + int dst_stride, + const int subpel_x, + const int subpel_y, + const struct scale_factors *sf, + int w, int h, int ref, + const INTERP_FILTER interp_filter, + int xs, int ys, + const MACROBLOCKD *xd) { + (void) xd; +#if CONFIG_VP9_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) + highbd_inter_predictor(src, src_stride, dst, dst_stride, + subpel_x, subpel_y, sf, w, h, ref, + interp_filter, xs, ys, xd->bd); + else +#endif // CONFIG_VP9_HIGHBITDEPTH + inter_predictor(src, src_stride, dst, dst_stride, + subpel_x, subpel_y, sf, w, h, ref, + interp_filter, xs, ys); +} + +#if CONFIG_EXT_INTER +void vp10_make_masked_inter_predictor( + const uint8_t *pre, + int pre_stride, + uint8_t *dst, + int dst_stride, + const int subpel_x, + const int subpel_y, + const struct scale_factors *sf, + int w, int h, + const INTERP_FILTER interp_filter, + int xs, int ys, +#if CONFIG_SUPERTX + int plane, int wedge_offset_x, int wedge_offset_y, +#endif // CONFIG_SUPERTX + const MACROBLOCKD *xd); +#endif // CONFIG_EXT_INTER + static INLINE int round_mv_comp_q4(int value) { return (value < 0 ? value - 2 : value + 2) / 4; } @@ -114,10 +169,10 @@ static MV mi_mv_pred_q4(const MODE_INFO *mi, int idx) { mi->bmi[1].as_mv[idx].as_mv.row + mi->bmi[2].as_mv[idx].as_mv.row + mi->bmi[3].as_mv[idx].as_mv.row), - round_mv_comp_q4(mi->bmi[0].as_mv[idx].as_mv.col + - mi->bmi[1].as_mv[idx].as_mv.col + - mi->bmi[2].as_mv[idx].as_mv.col + - mi->bmi[3].as_mv[idx].as_mv.col) }; + round_mv_comp_q4(mi->bmi[0].as_mv[idx].as_mv.col + + mi->bmi[1].as_mv[idx].as_mv.col + + mi->bmi[2].as_mv[idx].as_mv.col + + mi->bmi[3].as_mv[idx].as_mv.col) }; return res; } @@ -183,15 +238,6 @@ static INLINE MV average_split_mvs(const struct macroblockd_plane *pd, return res; } -void build_inter_predictors(MACROBLOCKD *xd, int plane, -#if CONFIG_OBMC - int mi_col_offset, int mi_row_offset, -#endif // CONFIG_OBMC - int block, - int bw, int bh, - int x, int y, int w, int h, - int mi_x, int mi_y); - void vp10_build_inter_predictor_sub8x8(MACROBLOCKD *xd, int plane, int i, int ir, int ic, int mi_row, int mi_col); @@ -209,9 +255,21 @@ void vp10_build_inter_predictors_sb(MACROBLOCKD *xd, int mi_row, int mi_col, BLOCK_SIZE bsize); #if CONFIG_SUPERTX -void vp10_build_inter_predictors_sb_sub8x8(MACROBLOCKD *xd, - int mi_row, int mi_col, - BLOCK_SIZE bsize, int block); +void vp10_build_inter_predictors_sb_sub8x8_extend( + MACROBLOCKD *xd, +#if CONFIG_EXT_INTER + int mi_row_ori, int mi_col_ori, +#endif // CONFIG_EXT_INTER + int mi_row, int mi_col, + BLOCK_SIZE bsize, int block); + +void vp10_build_inter_predictors_sb_extend( + MACROBLOCKD *xd, +#if CONFIG_EXT_INTER + int mi_row_ori, int mi_col_ori, +#endif // CONFIG_EXT_INTER + int mi_row, int mi_col, + BLOCK_SIZE bsize); struct macroblockd_plane; void vp10_build_masked_inter_predictor_complex( MACROBLOCKD *xd, @@ -219,7 +277,6 @@ void vp10_build_masked_inter_predictor_complex( const struct macroblockd_plane *pd, int mi_row, int mi_col, int mi_row_ori, int mi_col_ori, BLOCK_SIZE bsize, BLOCK_SIZE top_bsize, PARTITION_TYPE partition, int plane); - #endif // CONFIG_SUPERTX void vp10_build_inter_predictor(const uint8_t *src, int src_stride, @@ -376,6 +433,15 @@ void vp10_build_obmc_inter_prediction(VP10_COMMON *cm, #endif // CONFIG_OBMC #if CONFIG_EXT_INTER +#define MASK_MASTER_SIZE (2 * CU_SIZE) +#define MASK_MASTER_STRIDE (2 * CU_SIZE) + +void vp10_init_wedge_masks(); + +const uint8_t *vp10_get_soft_mask(int wedge_index, + BLOCK_SIZE sb_type, + int h, int w); + void vp10_build_interintra_predictors(MACROBLOCKD *xd, uint8_t *ypred, uint8_t *upred, @@ -398,6 +464,17 @@ void vp10_build_interintra_predictors_sbuv(MACROBLOCKD *xd, uint8_t *vpred, int ustride, int vstride, BLOCK_SIZE bsize); + +// Encoder only +void vp10_build_inter_predictors_for_planes_single_buf( + MACROBLOCKD *xd, BLOCK_SIZE bsize, + int mi_row, int mi_col, int ref, + uint8_t *ext_dst[3], int ext_dst_stride[3]); +void vp10_build_wedge_inter_predictor_from_buf( + MACROBLOCKD *xd, BLOCK_SIZE bsize, + int mi_row, int mi_col, + uint8_t *ext_dst0[3], int ext_dst_stride0[3], + uint8_t *ext_dst1[3], int ext_dst_stride1[3]); #endif // CONFIG_EXT_INTER #ifdef __cplusplus diff --git a/vp10/common/thread_common.c b/vp10/common/thread_common.c index ecc971a7c21d4609b09428af4d59071ae74e5e54..963eed166d278532da2dd0d30e0cf2da53ba6b57 100644 --- a/vp10/common/thread_common.c +++ b/vp10/common/thread_common.c @@ -412,6 +412,13 @@ void vp10_accumulate_frame_counts(VP10_COMMON *cm, FRAME_COUNTS *counts, for (i = 0; i < BLOCK_SIZES; i++) for (j = 0; j < 2; j++) cm->counts.interintra[i][j] += counts->interintra[i][j]; + + for (i = 0; i < BLOCK_SIZES; i++) + for (j = 0; j < 2; j++) + cm->counts.wedge_interintra[i][j] += counts->wedge_interintra[i][j]; + for (i = 0; i < BLOCK_SIZES; i++) + for (j = 0; j < 2; j++) + cm->counts.wedge_interinter[i][j] += counts->wedge_interinter[i][j]; #endif // CONFIG_EXT_INTER for (i = 0; i < INTRA_INTER_CONTEXTS; i++) diff --git a/vp10/common/vp10_rtcd_defs.pl b/vp10/common/vp10_rtcd_defs.pl index 2344ce2b2dd73ff9b23838ddc2c32fe2b2d1acbd..1e2ef58bdb3f4ced68e75662c854168dbb29fa66 100644 --- a/vp10/common/vp10_rtcd_defs.pl +++ b/vp10/common/vp10_rtcd_defs.pl @@ -12,7 +12,7 @@ struct macroblockd; /* Encoder forward decls */ struct macroblock; -struct vp9_variance_vtable; +struct vp10_variance_vtable; struct search_site_config; struct mv; union int_mv; @@ -614,15 +614,15 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { # # Motion search # -add_proto qw/int vp10_full_search_sad/, "const struct macroblock *x, const struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv, struct mv *best_mv"; +add_proto qw/int vp10_full_search_sad/, "const struct macroblock *x, const struct mv *ref_mv, int sad_per_bit, int distance, const struct vp10_variance_vtable *fn_ptr, const struct mv *center_mv, struct mv *best_mv"; specialize qw/vp10_full_search_sad sse3 sse4_1/; $vp10_full_search_sad_sse3=vp10_full_search_sadx3; $vp10_full_search_sad_sse4_1=vp10_full_search_sadx8; -add_proto qw/int vp10_diamond_search_sad/, "const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv"; +add_proto qw/int vp10_diamond_search_sad/, "const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp10_variance_vtable *fn_ptr, const struct mv *center_mv"; specialize qw/vp10_diamond_search_sad/; -add_proto qw/int vp10_full_range_search/, "const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv"; +add_proto qw/int vp10_full_range_search/, "const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp10_variance_vtable *fn_ptr, const struct mv *center_mv"; specialize qw/vp10_full_range_search/; add_proto qw/void vp10_temporal_filter_apply/, "uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count"; diff --git a/vp10/decoder/decodeframe.c b/vp10/decoder/decodeframe.c index 64ac3ccf3b5539a0b1c99f71a09a32200c5c7d83..98d291087f7a0fea7ea6e3e361f378e0c86a9233 100644 --- a/vp10/decoder/decodeframe.c +++ b/vp10/decoder/decodeframe.c @@ -540,10 +540,10 @@ static void build_mc_border(const uint8_t *src, int src_stride, } #if CONFIG_VP9_HIGHBITDEPTH -static void high_build_mc_border(const uint8_t *src8, int src_stride, - uint16_t *dst, int dst_stride, - int x, int y, int b_w, int b_h, - int w, int h) { +static void build_mc_border_highbd(const uint8_t *src8, int src_stride, + uint16_t *dst, int dst_stride, + int x, int y, int b_w, int b_h, + int w, int h) { // Get a pointer to the start of the real data for this row. const uint16_t *src = CONVERT_TO_SHORTPTR(src8); const uint16_t *ref_row = src - x - y * src_stride; @@ -585,39 +585,54 @@ static void high_build_mc_border(const uint8_t *src8, int src_stride, } while (--b_h); } -static void extend_and_predict(const uint8_t *buf_ptr1, int pre_buf_stride, - int x0, int y0, int b_w, int b_h, - int frame_width, int frame_height, - int border_offset, - uint8_t *const dst, int dst_buf_stride, - int subpel_x, int subpel_y, - const INTERP_FILTER interp_filter, - const struct scale_factors *sf, - MACROBLOCKD *xd, - int w, int h, int ref, int xs, int ys) { - DECLARE_ALIGNED(16, uint16_t, mc_buf_high[80 * 2 * 80 * 2]); +static void extend_and_predict_highbd(const uint8_t *buf_ptr1, + int pre_buf_stride, + int x0, int y0, int b_w, int b_h, + int frame_width, int frame_height, + int border_offset, + uint8_t *const dst, int dst_buf_stride, + int subpel_x, int subpel_y, + const INTERP_FILTER interp_filter, + const struct scale_factors *sf, +#if CONFIG_EXT_INTER && CONFIG_SUPERTX + int plane, + int wedge_offset_x, int wedge_offset_y, +#endif // CONFIG_EXT_INTER && CONFIG_SUPERTX + MACROBLOCKD *xd, + int w, int h, int ref, int xs, int ys) { + DECLARE_ALIGNED(16, uint16_t, + mc_buf_high[(CU_SIZE + 16) * 2 * (CU_SIZE + 16) * 2]); const uint8_t *buf_ptr; if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - high_build_mc_border(buf_ptr1, pre_buf_stride, mc_buf_high, b_w, - x0, y0, b_w, b_h, frame_width, frame_height); + build_mc_border_highbd(buf_ptr1, pre_buf_stride, mc_buf_high, b_w, + x0, y0, b_w, b_h, frame_width, frame_height); buf_ptr = CONVERT_TO_BYTEPTR(mc_buf_high) + border_offset; } else { build_mc_border(buf_ptr1, pre_buf_stride, (uint8_t *)mc_buf_high, b_w, x0, y0, b_w, b_h, frame_width, frame_height); buf_ptr = ((uint8_t *)mc_buf_high) + border_offset; } - - if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - high_inter_predictor(buf_ptr, b_w, dst, dst_buf_stride, subpel_x, - subpel_y, sf, w, h, ref, interp_filter, - xs, ys, xd->bd); - } else { - inter_predictor(buf_ptr, b_w, dst, dst_buf_stride, subpel_x, - subpel_y, sf, w, h, ref, interp_filter, xs, ys); - } +#if CONFIG_EXT_INTER + if (ref && get_wedge_bits(xd->mi[0]->mbmi.sb_type) && + xd->mi[0]->mbmi.use_wedge_interinter) + vp10_make_masked_inter_predictor( + buf_ptr, b_w, dst, dst_buf_stride, + subpel_x, subpel_y, sf, w, h, + interp_filter, xs, ys, +#if CONFIG_SUPERTX + plane, wedge_offset_x, wedge_offset_y, +#endif // CONFIG_SUPERTX + xd); + else +#endif // CONFIG_EXT_INTER + vp10_make_inter_predictor(buf_ptr, b_w, dst, dst_buf_stride, + subpel_x, subpel_y, sf, w, h, ref, + interp_filter, xs, ys, xd); } + #else + static void extend_and_predict(const uint8_t *buf_ptr1, int pre_buf_stride, int x0, int y0, int b_w, int b_h, int frame_width, int frame_height, @@ -626,21 +641,48 @@ static void extend_and_predict(const uint8_t *buf_ptr1, int pre_buf_stride, int subpel_x, int subpel_y, const INTERP_FILTER interp_filter, const struct scale_factors *sf, +#if CONFIG_EXT_INTER && CONFIG_SUPERTX + int plane, + int wedge_offset_x, int wedge_offset_y, +#endif // CONFIG_EXT_INTER && CONFIG_SUPERTX + MACROBLOCKD *xd, int w, int h, int ref, int xs, int ys) { - DECLARE_ALIGNED(16, uint8_t, mc_buf[80 * 2 * 80 * 2]); + DECLARE_ALIGNED(16, uint8_t, mc_buf[(CU_SIZE + 16) * 2 * (CU_SIZE + 16) * 2]); const uint8_t *buf_ptr; build_mc_border(buf_ptr1, pre_buf_stride, mc_buf, b_w, x0, y0, b_w, b_h, frame_width, frame_height); buf_ptr = mc_buf + border_offset; - - inter_predictor(buf_ptr, b_w, dst, dst_buf_stride, subpel_x, - subpel_y, sf, w, h, ref, interp_filter, xs, ys); +#if CONFIG_EXT_INTER + if (ref && get_wedge_bits(xd->mi[0]->mbmi.sb_type) && + xd->mi[0]->mbmi.use_wedge_interinter) + vp10_make_masked_inter_predictor( + buf_ptr, b_w, dst, dst_buf_stride, + subpel_x, subpel_y, sf, w, h, + interp_filter, xs, ys, +#if CONFIG_SUPERTX + plane, wedge_offset_x, wedge_offset_y, +#endif // CONFIG_SUPERTX + xd); + else +#endif // CONFIG_EXT_INTER + vp10_make_inter_predictor(buf_ptr, b_w, dst, dst_buf_stride, + subpel_x, subpel_y, sf, w, h, ref, + interp_filter, xs, ys, xd); } #endif // CONFIG_VP9_HIGHBITDEPTH -static void dec_build_inter_predictors(VP10Decoder *const pbi, MACROBLOCKD *xd, - int plane, int bw, int bh, int x, - int y, int w, int h, int mi_x, int mi_y, + +static void dec_build_inter_predictors(VP10Decoder *const pbi, + MACROBLOCKD *xd, int plane, +#if CONFIG_OBMC + int mi_col_offset, int mi_row_offset, +#endif // CONFIG_OBMC + int bw, int bh, + int x, int y, int w, int h, +#if CONFIG_EXT_INTER && CONFIG_SUPERTX + int wedge_offset_x, int wedge_offset_y, +#endif // CONFIG_EXT_INTER && CONFIG_SUPERTX + int mi_x, int mi_y, const INTERP_FILTER interp_filter, const struct scale_factors *sf, struct buf_2d *pre_buf, @@ -655,9 +697,20 @@ static void dec_build_inter_predictors(VP10Decoder *const pbi, MACROBLOCKD *xd, int xs, ys, x0, y0, x0_16, y0_16, frame_width, frame_height, buf_stride, subpel_x, subpel_y; uint8_t *ref_frame, *buf_ptr; +#if CONFIG_EXT_INTER +#if CONFIG_OBMC + const MODE_INFO *mi = xd->mi[mi_col_offset + xd->mi_stride * mi_row_offset]; +#else + const MODE_INFO *mi = xd->mi[0]; +#endif // CONFIG_OBMC +#endif // CONFIG_EXT_INTER #if CONFIG_EXT_INTERP const int i_filter = IsInterpolatingFilter(interp_filter); #endif // CONFIG_EXT_INTERP +#if CONFIG_OBMC + (void) mi_col_offset; + (void) mi_row_offset; +#endif // CONFIG_OBMC // Get reference frame pointer, width and height. if (plane == 0) { @@ -777,15 +830,27 @@ static void dec_build_inter_predictors(VP10Decoder *const pbi, MACROBLOCKD *xd, const int border_offset = y_pad * (filter_size / 2 - 1) * b_w + x_pad * (filter_size / 2 - 1); +#if CONFIG_VP9_HIGHBITDEPTH + extend_and_predict_highbd(buf_ptr1, buf_stride, x0, y0, b_w, b_h, + frame_width, frame_height, border_offset, + dst, dst_buf->stride, + subpel_x, subpel_y, + interp_filter, sf, +#if CONFIG_EXT_INTER && CONFIG_SUPERTX + plane, wedge_offset_x, wedge_offset_y, +#endif // CONFIG_EXT_INTER && CONFIG_SUPERTX + xd, w, h, ref, xs, ys); +#else extend_and_predict(buf_ptr1, buf_stride, x0, y0, b_w, b_h, frame_width, frame_height, border_offset, dst, dst_buf->stride, subpel_x, subpel_y, interp_filter, sf, -#if CONFIG_VP9_HIGHBITDEPTH - xd, -#endif - w, h, ref, xs, ys); +#if CONFIG_EXT_INTER && CONFIG_SUPERTX + plane, wedge_offset_x, wedge_offset_y, +#endif // CONFIG_EXT_INTER && CONFIG_SUPERTX + xd, w, h, ref, xs, ys); +#endif // CONFIG_VP9_HIGHBITDEPTH return; } } else { @@ -797,29 +862,44 @@ static void dec_build_inter_predictors(VP10Decoder *const pbi, MACROBLOCKD *xd, VPXMAX(0, (y1 + 7)) << (plane == 0 ? 0 : 1)); } } -#if CONFIG_VP9_HIGHBITDEPTH - if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - high_inter_predictor(buf_ptr, buf_stride, dst, dst_buf->stride, subpel_x, - subpel_y, sf, w, h, ref, interp_filter, - xs, ys, xd->bd); +#if CONFIG_EXT_INTER + if (ref && get_wedge_bits(mi->mbmi.sb_type) && + mi->mbmi.use_wedge_interinter) { + vp10_make_masked_inter_predictor( + buf_ptr, buf_stride, dst, dst_buf->stride, + subpel_x, subpel_y, sf, w, h, + interp_filter, xs, ys, +#if CONFIG_SUPERTX + plane, wedge_offset_x, wedge_offset_y, +#endif // CONFIG_SUPERTX + xd); } else { - inter_predictor(buf_ptr, buf_stride, dst, dst_buf->stride, subpel_x, - subpel_y, sf, w, h, ref, interp_filter, xs, ys); + vp10_make_inter_predictor(buf_ptr, buf_stride, dst, dst_buf->stride, + subpel_x, subpel_y, sf, w, h, ref, + interp_filter, xs, ys, xd); } #else - inter_predictor(buf_ptr, buf_stride, dst, dst_buf->stride, subpel_x, - subpel_y, sf, w, h, ref, interp_filter, xs, ys); -#endif // CONFIG_VP9_HIGHBITDEPTH + vp10_make_inter_predictor(buf_ptr, buf_stride, dst, dst_buf->stride, + subpel_x, subpel_y, sf, w, h, ref, + interp_filter, xs, ys, xd); +#endif // CONFIG_EXT_INTER } #endif // (CONFIG_SUPERTX || CONFIG_OBMC) #if CONFIG_SUPERTX -static void dec_build_inter_predictors_sb(VP10Decoder *const pbi, - MACROBLOCKD *xd, - int mi_row, int mi_col) { +static void dec_build_inter_predictors_sb_extend( + VP10Decoder *const pbi, MACROBLOCKD *xd, +#if CONFIG_EXT_INTER + int mi_row_ori, int mi_col_ori, +#endif // CONFIG_EXT_INTER + int mi_row, int mi_col) { int plane; const int mi_x = mi_col * MI_SIZE; const int mi_y = mi_row * MI_SIZE; +#if CONFIG_EXT_INTER + const int wedge_offset_x = (mi_col_ori - mi_col) * MI_SIZE; + const int wedge_offset_y = (mi_row_ori - mi_row) * MI_SIZE; +#endif // CONFIG_EXT_INTER const MODE_INFO *mi = xd->mi[0]; const INTERP_FILTER interp_filter = mi->mbmi.interp_filter; const BLOCK_SIZE sb_type = mi->mbmi.sb_type; @@ -827,6 +907,7 @@ static void dec_build_inter_predictors_sb(VP10Decoder *const pbi, for (plane = 0; plane < MAX_MB_PLANE; ++plane) { struct macroblockd_plane *const pd = &xd->plane[plane]; + struct buf_2d *const dst_buf = &pd->dst; const int num_4x4_w = pd->n4_w; const int num_4x4_h = pd->n4_h; @@ -855,24 +936,44 @@ static void dec_build_inter_predictors_sb(VP10Decoder *const pbi, for (y = 0; y < num_4x4_h; ++y) { for (x = 0; x < num_4x4_w; ++x) { const MV mv = average_split_mvs(pd, mi, ref, y * 2 + x); - dec_build_inter_predictors(pbi, xd, plane, n4w_x4, n4h_x4, - 4 * x, 4 * y, pw, ph, mi_x, mi_y, - interp_filter, sf, pre_buf, dst_buf, - &mv, ref_frame_buf, is_scaled, ref); + dec_build_inter_predictors( + pbi, xd, plane, +#if CONFIG_OBMC + 0, 0, +#endif // CONFIG_OBMC + n4w_x4, n4h_x4, + 4 * x, 4 * y, pw, ph, +#if CONFIG_EXT_INTER + wedge_offset_x >> (pd->subsampling_x), + wedge_offset_y >> (pd->subsampling_y), +#endif // CONFIG_EXT_INTER + mi_x, mi_y, + interp_filter, sf, pre_buf, dst_buf, + &mv, ref_frame_buf, is_scaled, ref); } } } else { const MV mv = mi->mbmi.mv[ref].as_mv; - dec_build_inter_predictors(pbi, xd, plane, n4w_x4, n4h_x4, - 0, 0, n4w_x4, n4h_x4, mi_x, mi_y, - interp_filter, sf, pre_buf, dst_buf, - &mv, ref_frame_buf, - is_scaled, ref); + dec_build_inter_predictors( + pbi, xd, plane, +#if CONFIG_OBMC + 0, 0, +#endif // CONFIG_OBMC + n4w_x4, n4h_x4, + 0, 0, n4w_x4, n4h_x4, +#if CONFIG_EXT_INTER + wedge_offset_x >> (pd->subsampling_x), + wedge_offset_y >> (pd->subsampling_y), +#endif // CONFIG_EXT_INTER + mi_x, mi_y, + interp_filter, sf, pre_buf, dst_buf, + &mv, ref_frame_buf, + is_scaled, ref); } } } #if CONFIG_EXT_INTER - if (is_interintra_pred(&xd->mi[0]->mbmi)) + if (is_interintra_pred(&mi->mbmi)) vp10_build_interintra_predictors(xd, xd->plane[0].dst.buf, xd->plane[1].dst.buf, @@ -884,15 +985,23 @@ static void dec_build_inter_predictors_sb(VP10Decoder *const pbi, #endif // CONFIG_EXT_INTER } -static void dec_build_inter_predictors_sb_sub8x8(VP10Decoder *const pbi, - MACROBLOCKD *xd, - int mi_row, int mi_col, - int block) { +static void dec_build_inter_predictors_sb_sub8x8_extend( + VP10Decoder *const pbi, + MACROBLOCKD *xd, +#if CONFIG_EXT_INTER + int mi_row_ori, int mi_col_ori, +#endif // CONFIG_EXT_INTER + int mi_row, int mi_col, + int block) { // Prediction function used in supertx: // Use the mv at current block (which is less than 8x8) int plane; const int mi_x = mi_col * MI_SIZE; const int mi_y = mi_row * MI_SIZE; +#if CONFIG_EXT_INTER + const int wedge_offset_x = (mi_col_ori - mi_col) * MI_SIZE; + const int wedge_offset_y = (mi_row_ori - mi_row) * MI_SIZE; +#endif // CONFIG_EXT_INTER const MODE_INFO *mi = xd->mi[0]; const INTERP_FILTER interp_filter = mi->mbmi.interp_filter; const int is_compound = has_second_ref(&mi->mbmi); @@ -919,14 +1028,23 @@ static void dec_build_inter_predictors_sb_sub8x8(VP10Decoder *const pbi, RefCntBuffer *const ref_frame_buf = &pool->frame_bufs[idx]; const int is_scaled = vp10_is_scaled(sf); const MV mv = average_split_mvs(pd, mi, ref, block); - dec_build_inter_predictors(pbi, xd, plane, n4w_x4, n4h_x4, - 0, 0, n4w_x4, n4h_x4, mi_x, mi_y, + dec_build_inter_predictors(pbi, xd, plane, +#if CONFIG_OBMC + 0, 0, +#endif // CONFIG_OBMC + n4w_x4, n4h_x4, + 0, 0, n4w_x4, n4h_x4, +#if CONFIG_EXT_INTER + wedge_offset_x >> (pd->subsampling_x), + wedge_offset_y >> (pd->subsampling_y), +#endif // CONFIG_EXT_INTER + mi_x, mi_y, interp_filter, sf, pre_buf, dst_buf, &mv, ref_frame_buf, is_scaled, ref); } } #if CONFIG_EXT_INTER - if (is_interintra_pred(&xd->mi[0]->mbmi)) + if (is_interintra_pred(&mi->mbmi)) vp10_build_interintra_predictors(xd, xd->plane[0].dst.buf, xd->plane[1].dst.buf, @@ -934,7 +1052,7 @@ static void dec_build_inter_predictors_sb_sub8x8(VP10Decoder *const pbi, xd->plane[0].dst.stride, xd->plane[1].dst.stride, xd->plane[2].dst.stride, - xd->mi[0]->mbmi.sb_type); + mi->mbmi.sb_type); #endif // CONFIG_EXT_INTER } #endif // CONFIG_SUPERTX @@ -964,7 +1082,7 @@ static void dec_build_prediction_by_above_preds(VP10Decoder *const pbi, mi_step = VPXMIN(xd->n8_w, num_8x8_blocks_wide_lookup[sb_type]); - if (!is_inter_block(mbmi)) + if (!is_neighbor_overlappable(mbmi)) continue; for (j = 0; j < MAX_MB_PLANE; ++j) { @@ -1021,15 +1139,27 @@ static void dec_build_prediction_by_above_preds(VP10Decoder *const pbi, && y == 0 && !pd->subsampling_y) continue; - dec_build_inter_predictors(pbi, xd, j, bw, bh, - 4 * x, 0, pw, bh, mi_x, mi_y, + dec_build_inter_predictors(pbi, xd, j, + mi_col_offset, mi_row_offset, + bw, bh, + 4 * x, 0, pw, bh, +#if CONFIG_EXT_INTER && CONFIG_SUPERTX + 0, 0, +#endif // CONFIG_EXT_INTER && CONFIG_SUPERTX + mi_x, mi_y, interp_filter, sf, pre_buf, dst_buf, &mv, ref_frame_buf, is_scaled, ref); } } else { const MV mv = mi->mbmi.mv[ref].as_mv; - dec_build_inter_predictors(pbi, xd, j, bw, bh, - 0, 0, bw, bh, mi_x, mi_y, interp_filter, + dec_build_inter_predictors(pbi, xd, j, + mi_col_offset, mi_row_offset, + bw, bh, + 0, 0, bw, bh, +#if CONFIG_EXT_INTER && CONFIG_SUPERTX + 0, 0, +#endif // CONFIG_EXT_INTER && CONFIG_SUPERTX + mi_x, mi_y, interp_filter, sf, pre_buf, dst_buf, &mv, ref_frame_buf, is_scaled, ref); } @@ -1065,7 +1195,7 @@ static void dec_build_prediction_by_left_preds(VP10Decoder *const pbi, mi_step = VPXMIN(xd->n8_h, num_8x8_blocks_high_lookup[sb_type]); - if (!is_inter_block(mbmi)) + if (!is_neighbor_overlappable(mbmi)) continue; for (j = 0; j < MAX_MB_PLANE; ++j) { @@ -1123,15 +1253,31 @@ static void dec_build_prediction_by_left_preds(VP10Decoder *const pbi, && x == 0 && !pd->subsampling_x) continue; - dec_build_inter_predictors(pbi, xd, j, bw, bh, - 0, 4 * y, bw, ph, mi_x, mi_y, + dec_build_inter_predictors(pbi, xd, j, +#if CONFIG_OBMC + mi_col_offset, mi_row_offset, +#endif // CONFIG_OBMC + bw, bh, + 0, 4 * y, bw, ph, +#if CONFIG_EXT_INTER && CONFIG_SUPERTX + 0, 0, +#endif // CONFIG_EXT_INTER && CONFIG_SUPERTX + mi_x, mi_y, interp_filter, sf, pre_buf, dst_buf, &mv, ref_frame_buf, is_scaled, ref); } } else { const MV mv = mi->mbmi.mv[ref].as_mv; - dec_build_inter_predictors(pbi, xd, j, bw, bh, - 0, 0, bw, bh, mi_x, mi_y, interp_filter, + dec_build_inter_predictors(pbi, xd, j, +#if CONFIG_OBMC + mi_col_offset, mi_row_offset, +#endif // CONFIG_OBMC + bw, bh, + 0, 0, bw, bh, +#if CONFIG_EXT_INTER && CONFIG_SUPERTX + 0, 0, +#endif // CONFIG_EXT_INTER && CONFIG_SUPERTX + mi_x, mi_y, interp_filter, sf, pre_buf, dst_buf, &mv, ref_frame_buf, is_scaled, ref); } @@ -1274,8 +1420,7 @@ static void set_offsets_topblock(VP10_COMMON *const cm, MACROBLOCKD *const xd, static void set_param_topblock(VP10_COMMON *const cm, MACROBLOCKD *const xd, BLOCK_SIZE bsize, int mi_row, int mi_col, - int txfm, - int skip) { + int txfm, int skip) { const int bw = num_8x8_blocks_wide_lookup[bsize]; const int bh = num_8x8_blocks_high_lookup[bsize]; const int x_mis = VPXMIN(bw, cm->mi_cols - mi_col); @@ -1367,10 +1512,19 @@ static void dec_predict_b_extend( (c >> xd->plane[2].subsampling_x); if (!b_sub8x8) - dec_build_inter_predictors_sb(pbi, xd, mi_row_pred, mi_col_pred); + dec_build_inter_predictors_sb_extend( + pbi, xd, +#if CONFIG_EXT_INTER + mi_row_ori, mi_col_ori, +#endif // CONFIG_EXT_INTER + mi_row_pred, mi_col_pred); else - dec_build_inter_predictors_sb_sub8x8(pbi, xd, mi_row_pred, mi_col_pred, - block); + dec_build_inter_predictors_sb_sub8x8_extend( + pbi, xd, +#if CONFIG_EXT_INTER + mi_row_ori, mi_col_ori, +#endif // CONFIG_EXT_INTER + mi_row_pred, mi_col_pred, block); } static void dec_extend_dir(VP10Decoder *const pbi, MACROBLOCKD *const xd, @@ -1872,38 +2026,43 @@ static void decode_block(VP10Decoder *const pbi, MACROBLOCKD *const xd, #if CONFIG_OBMC if (mbmi->obmc) { #if CONFIG_VP9_HIGHBITDEPTH - DECLARE_ALIGNED(16, uint8_t, tmp_buf1[2 * MAX_MB_PLANE * 64 * 64]); - DECLARE_ALIGNED(16, uint8_t, tmp_buf2[2 * MAX_MB_PLANE * 64 * 64]); + DECLARE_ALIGNED(16, uint8_t, + tmp_buf1[2 * MAX_MB_PLANE * CU_SIZE * CU_SIZE]); + DECLARE_ALIGNED(16, uint8_t, + tmp_buf2[2 * MAX_MB_PLANE * CU_SIZE * CU_SIZE]); #else - DECLARE_ALIGNED(16, uint8_t, tmp_buf1[MAX_MB_PLANE * 64 * 64]); - DECLARE_ALIGNED(16, uint8_t, tmp_buf2[MAX_MB_PLANE * 64 * 64]); + DECLARE_ALIGNED(16, uint8_t, + tmp_buf1[MAX_MB_PLANE * CU_SIZE * CU_SIZE]); + DECLARE_ALIGNED(16, uint8_t, + tmp_buf2[MAX_MB_PLANE * CU_SIZE * CU_SIZE]); #endif // CONFIG_VP9_HIGHBITDEPTH uint8_t *dst_buf1[MAX_MB_PLANE], *dst_buf2[MAX_MB_PLANE]; - int dst_stride1[MAX_MB_PLANE] = {64, 64, 64}; - int dst_stride2[MAX_MB_PLANE] = {64, 64, 64}; + int dst_stride1[MAX_MB_PLANE] = {CU_SIZE, CU_SIZE, CU_SIZE}; + int dst_stride2[MAX_MB_PLANE] = {CU_SIZE, CU_SIZE, CU_SIZE}; assert(mbmi->sb_type >= BLOCK_8X8); #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { int len = sizeof(uint16_t); dst_buf1[0] = CONVERT_TO_BYTEPTR(tmp_buf1); - dst_buf1[1] = CONVERT_TO_BYTEPTR(tmp_buf1 + 4096 * len); - dst_buf1[2] = CONVERT_TO_BYTEPTR(tmp_buf1 + 8192 * len); + dst_buf1[1] = CONVERT_TO_BYTEPTR(tmp_buf1 + CU_SIZE * CU_SIZE * len); + dst_buf1[2] = CONVERT_TO_BYTEPTR(tmp_buf1 + + CU_SIZE * CU_SIZE * 2 * len); dst_buf2[0] = CONVERT_TO_BYTEPTR(tmp_buf2); - dst_buf2[1] = CONVERT_TO_BYTEPTR(tmp_buf2 + 4096 * len); - dst_buf2[2] = CONVERT_TO_BYTEPTR(tmp_buf2 + 8192 * len); + dst_buf2[1] = CONVERT_TO_BYTEPTR(tmp_buf2 + CU_SIZE * CU_SIZE * len); + dst_buf2[2] = CONVERT_TO_BYTEPTR(tmp_buf2 + + CU_SIZE * CU_SIZE * 2 * len); } else { #endif // CONFIG_VP9_HIGHBITDEPTH - dst_buf1[0] = tmp_buf1; - dst_buf1[1] = tmp_buf1 + 4096; - dst_buf1[2] = tmp_buf1 + 8192; - dst_buf2[0] = tmp_buf2; - dst_buf2[1] = tmp_buf2 + 4096; - dst_buf2[2] = tmp_buf2 + 8192; + dst_buf1[0] = tmp_buf1; + dst_buf1[1] = tmp_buf1 + CU_SIZE * CU_SIZE; + dst_buf1[2] = tmp_buf1 + CU_SIZE * CU_SIZE * 2; + dst_buf2[0] = tmp_buf2; + dst_buf2[1] = tmp_buf2 + CU_SIZE * CU_SIZE; + dst_buf2[2] = tmp_buf2 + CU_SIZE * CU_SIZE * 2; #if CONFIG_VP9_HIGHBITDEPTH } #endif // CONFIG_VP9_HIGHBITDEPTH - dec_build_prediction_by_above_preds(pbi, xd, mi_row, mi_col, dst_buf1, dst_stride1); dec_build_prediction_by_left_preds(pbi, xd, mi_row, mi_col, @@ -3591,13 +3750,24 @@ static int read_compressed_header(VP10Decoder *pbi, const uint8_t *data, #if CONFIG_EXT_INTER read_inter_compound_mode_probs(fc, &r); - if (cm->reference_mode != COMPOUND_REFERENCE) { for (i = 0; i < BLOCK_SIZES; i++) { if (is_interintra_allowed_bsize(i)) { vp10_diff_update_prob(&r, &fc->interintra_prob[i]); } } + for (i = 0; i < BLOCK_SIZES; i++) { + if (is_interintra_allowed_bsize(i) && get_wedge_bits(i)) { + vp10_diff_update_prob(&r, &fc->wedge_interintra_prob[i]); + } + } + } + if (cm->reference_mode != SINGLE_REFERENCE) { + for (i = 0; i < BLOCK_SIZES; i++) { + if (get_wedge_bits(i)) { + vp10_diff_update_prob(&r, &fc->wedge_interinter_prob[i]); + } + } } #endif // CONFIG_EXT_INTER @@ -3666,6 +3836,10 @@ static void debug_check_frame_counts(const VP10_COMMON *const cm) { sizeof(cm->counts.inter_compound_mode))); assert(!memcmp(cm->counts.interintra, zero_counts.interintra, sizeof(cm->counts.interintra))); + assert(!memcmp(cm->counts.wedge_interintra, zero_counts.wedge_interintra, + sizeof(cm->counts.wedge_interintra))); + assert(!memcmp(cm->counts.wedge_interinter, zero_counts.wedge_interinter, + sizeof(cm->counts.wedge_interinter))); #endif // CONFIG_EXT_INTER #if CONFIG_OBMC assert(!memcmp(cm->counts.obmc, zero_counts.obmc, diff --git a/vp10/decoder/decodemv.c b/vp10/decoder/decodemv.c index fccd3c880b80d2231419579b5603399495fd0bb2..a42d08bc54c434bb13283d183a03108fe30a80e4 100644 --- a/vp10/decoder/decodemv.c +++ b/vp10/decoder/decodemv.c @@ -1185,10 +1185,11 @@ static void read_inter_block_mode_info(VP10Decoder *const pbi, } #if CONFIG_OBMC + mbmi->obmc = 0; #if CONFIG_SUPERTX if (!supertx_enabled) #endif // CONFIG_SUPERTX - mbmi->obmc = read_is_obmc_block(cm, xd, r); + mbmi->obmc = read_is_obmc_block(cm, xd, r); #endif // CONFIG_OBMC #if CONFIG_REF_MV @@ -1432,7 +1433,12 @@ static void read_inter_block_mode_info(VP10Decoder *const pbi, } #if CONFIG_EXT_INTER + mbmi->use_wedge_interintra = 0; + mbmi->use_wedge_interinter = 0; if (cm->reference_mode != COMPOUND_REFERENCE && +#if CONFIG_OBMC + !(is_obmc_allowed(mbmi) && mbmi->obmc) && +#endif // CONFIG_OBMC #if CONFIG_SUPERTX !supertx_enabled && #endif @@ -1444,19 +1450,42 @@ static void read_inter_block_mode_info(VP10Decoder *const pbi, if (interintra) { const PREDICTION_MODE interintra_mode = read_intra_mode_y(cm, xd, r, size_group_lookup[bsize]); - mbmi->ref_frame[1] = INTRA_FRAME; mbmi->interintra_mode = interintra_mode; mbmi->interintra_uv_mode = interintra_mode; #if CONFIG_EXT_INTRA - // TODO(debargha|geza.lore): - // Should we use ext_intra modes for interintra? mbmi->ext_intra_mode_info.use_ext_intra_mode[0] = 0; mbmi->ext_intra_mode_info.use_ext_intra_mode[1] = 0; mbmi->angle_delta[0] = 0; mbmi->angle_delta[1] = 0; mbmi->intra_filter = INTRA_FILTER_LINEAR; #endif // CONFIG_EXT_INTRA + if (get_wedge_bits(bsize)) { + mbmi->use_wedge_interintra = + vpx_read(r, cm->fc->wedge_interintra_prob[bsize]); + if (xd->counts) + xd->counts->wedge_interintra[bsize][mbmi->use_wedge_interintra]++; + if (mbmi->use_wedge_interintra) { + mbmi->interintra_wedge_index = + mbmi->interintra_uv_wedge_index = + vpx_read_literal(r, get_wedge_bits(bsize)); + } + } + } + } + if (cm->reference_mode != SINGLE_REFERENCE && + is_inter_compound_mode(mbmi->mode) && +#if CONFIG_OBMC + !(is_obmc_allowed(mbmi) && mbmi->obmc) && +#endif // CONFIG_OBMC + get_wedge_bits(bsize)) { + mbmi->use_wedge_interinter = + vpx_read(r, cm->fc->wedge_interinter_prob[bsize]); + if (xd->counts) + xd->counts->wedge_interinter[bsize][mbmi->use_wedge_interinter]++; + if (mbmi->use_wedge_interinter) { + mbmi->interinter_wedge_index = + vpx_read_literal(r, get_wedge_bits(bsize)); } } #endif // CONFIG_EXT_INTER diff --git a/vp10/decoder/decoder.c b/vp10/decoder/decoder.c index 35c53df6d5902e8fa0f6d3d362902d9fd93b1d77..d3fee853aa1cec3de65751b6efe985b7103fc2d6 100644 --- a/vp10/decoder/decoder.c +++ b/vp10/decoder/decoder.c @@ -30,6 +30,7 @@ #include "vp10/common/postproc.h" #endif #include "vp10/common/quant_common.h" +#include "vp10/common/reconinter.h" #include "vp10/common/reconintra.h" #include "vp10/decoder/decodeframe.h" @@ -44,6 +45,9 @@ static void initialize_dec(void) { vpx_dsp_rtcd(); vpx_scale_rtcd(); vp10_init_intra_predictors(); +#if CONFIG_EXT_INTER + vp10_init_wedge_masks(); +#endif // CONFIG_EXT_INTER init_done = 1; } } diff --git a/vp10/encoder/bitstream.c b/vp10/encoder/bitstream.c index 2603b6b48659d9f91a3ea235d944a0edd8b9da72..ed9d2a9e2ce18b06256348fd644d5d418aac41e0 100644 --- a/vp10/encoder/bitstream.c +++ b/vp10/encoder/bitstream.c @@ -997,12 +997,13 @@ static void pack_inter_mode_mvs(VP10_COMP *cpi, const MODE_INFO *mi, } else { int16_t mode_ctx = mbmi_ext->mode_context[mbmi->ref_frame[0]]; write_ref_frames(cm, xd, w); + #if CONFIG_OBMC #if CONFIG_SUPERTX if (!supertx_enabled) #endif // CONFIG_SUPERTX - if (is_obmc_allowed(mbmi)) - vpx_write(w, mbmi->obmc, cm->fc->obmc_prob[bsize]); + if (is_obmc_allowed(mbmi)) + vpx_write(w, mbmi->obmc, cm->fc->obmc_prob[bsize]); #endif // CONFIG_OBMC #if CONFIG_REF_MV @@ -1052,8 +1053,8 @@ static void pack_inter_mode_mvs(VP10_COMP *cpi, const MODE_INFO *mi, #if CONFIG_EXT_INTER if (!is_compound) #endif // CONFIG_EXT_INTER - mode_ctx = vp10_mode_context_analyzer(mbmi_ext->mode_context, - mbmi->ref_frame, bsize, j); + mode_ctx = vp10_mode_context_analyzer(mbmi_ext->mode_context, + mbmi->ref_frame, bsize, j); #endif #if CONFIG_EXT_INTER if (is_inter_compound_mode(b_mode)) @@ -1162,6 +1163,9 @@ static void pack_inter_mode_mvs(VP10_COMP *cpi, const MODE_INFO *mi, #if CONFIG_EXT_INTER if (cpi->common.reference_mode != COMPOUND_REFERENCE && +#if CONFIG_OBMC + !(is_obmc_allowed(mbmi) && mbmi->obmc) && +#endif // CONFIG_OBMC #if CONFIG_SUPERTX !supertx_enabled && #endif // CONFIG_SUPERTX @@ -1172,8 +1176,28 @@ static void pack_inter_mode_mvs(VP10_COMP *cpi, const MODE_INFO *mi, write_intra_mode(w, mbmi->interintra_mode, cm->fc->y_mode_prob[size_group_lookup[bsize]]); assert(mbmi->interintra_mode == mbmi->interintra_uv_mode); + if (get_wedge_bits(bsize)) { + vpx_write(w, mbmi->use_wedge_interintra, + cm->fc->wedge_interintra_prob[bsize]); + if (mbmi->use_wedge_interintra) { + vpx_write_literal(w, mbmi->interintra_wedge_index, + get_wedge_bits(bsize)); + } + } } } + if (cpi->common.reference_mode != SINGLE_REFERENCE && + is_inter_compound_mode(mbmi->mode) && +#if CONFIG_OBMC + !(is_obmc_allowed(mbmi) && mbmi->obmc) && +#endif // CONFIG_OBMC + get_wedge_bits(bsize)) { + vpx_write(w, mbmi->use_wedge_interinter, + cm->fc->wedge_interinter_prob[bsize]); + if (mbmi->use_wedge_interinter) + vpx_write_literal(w, mbmi->interinter_wedge_index, + get_wedge_bits(bsize)); + } #endif // CONFIG_EXT_INTER #if CONFIG_EXT_INTERP @@ -2467,6 +2491,19 @@ static size_t write_compressed_header(VP10_COMP *cpi, uint8_t *data) { cm->counts.interintra[i]); } } + for (i = 0; i < BLOCK_SIZES; i++) { + if (is_interintra_allowed_bsize(i) && get_wedge_bits(i)) + vp10_cond_prob_diff_update(&header_bc, + &fc->wedge_interintra_prob[i], + cm->counts.wedge_interintra[i]); + } + } + if (cm->reference_mode != SINGLE_REFERENCE) { + for (i = 0; i < BLOCK_SIZES; i++) + if (get_wedge_bits(i)) + vp10_cond_prob_diff_update(&header_bc, + &fc->wedge_interinter_prob[i], + cm->counts.wedge_interinter[i]); } #endif // CONFIG_EXT_INTER diff --git a/vp10/encoder/encodeframe.c b/vp10/encoder/encodeframe.c index 8c7af63f06339d41b8b2b0ad20577a03c959bbf7..bea01575df4608e5a58c68116533ba78b623b943 100644 --- a/vp10/encoder/encodeframe.c +++ b/vp10/encoder/encodeframe.c @@ -61,6 +61,9 @@ static int check_intra_sb(VP10_COMP *cpi, const TileInfo *const tile, int mi_row, int mi_col, BLOCK_SIZE bsize, PC_TREE *pc_tree); static void predict_superblock(VP10_COMP *cpi, ThreadData *td, +#if CONFIG_EXT_INTER + int mi_row_ori, int mi_col_ori, +#endif // CONFIG_EXT_INTER int mi_row_pred, int mi_col_pred, BLOCK_SIZE bsize_pred, int b_sub8x8, int block); static int check_supertx_sb(BLOCK_SIZE bsize, TX_SIZE supertx_size, @@ -1290,6 +1293,10 @@ static void update_state_supertx(VP10_COMP *cpi, ThreadData *td, mbmi->inter_tx_size[(idy << 3) + idx] = mbmi->tx_size; } #endif // CONFIG_VAR_TX +#if CONFIG_OBMC + // Turn OBMC off for supertx + mbmi->obmc = 0; +#endif // CONFIG_OBMC if (!output_enabled) return; @@ -1801,29 +1808,43 @@ static void update_stats(VP10_COMMON *cm, ThreadData *td [ref0 != GOLDEN_FRAME]++; #endif // CONFIG_EXT_REFS } + #if CONFIG_OBMC #if CONFIG_SUPERTX if (!supertx_enabled) #endif // CONFIG_SUPERTX - if (is_obmc_allowed(mbmi)) - counts->obmc[mbmi->sb_type][mbmi->obmc]++; + if (is_obmc_allowed(mbmi)) + counts->obmc[mbmi->sb_type][mbmi->obmc]++; #endif // CONFIG_OBMC } } #if CONFIG_EXT_INTER if (cm->reference_mode != COMPOUND_REFERENCE && +#if CONFIG_OBMC + !(is_obmc_allowed(mbmi) && mbmi->obmc) && +#endif #if CONFIG_SUPERTX - !supertx_enabled && + !supertx_enabled && #endif - is_interintra_allowed(mbmi)) { + is_interintra_allowed(mbmi)) { if (mbmi->ref_frame[1] == INTRA_FRAME) { counts->y_mode[size_group_lookup[bsize]][mbmi->interintra_mode]++; counts->interintra[bsize][1]++; + if (get_wedge_bits(bsize)) + counts->wedge_interintra[bsize][mbmi->use_wedge_interintra]++; } else { counts->interintra[bsize][0]++; } } + if (cm->reference_mode != SINGLE_REFERENCE && + is_inter_compound_mode(mbmi->mode) && +#if CONFIG_OBMC + !(is_obmc_allowed(mbmi) && mbmi->obmc) && +#endif // CONFIG_OBMC + get_wedge_bits(bsize)) { + counts->wedge_interinter[bsize][mbmi->use_wedge_interinter]++; + } #endif // CONFIG_EXT_INTER if (inter_block && @@ -4458,15 +4479,19 @@ static void encode_superblock(VP10_COMP *cpi, ThreadData *td, #if CONFIG_OBMC if (mbmi->obmc) { #if CONFIG_VP9_HIGHBITDEPTH - DECLARE_ALIGNED(16, uint8_t, tmp_buf1[2 * MAX_MB_PLANE * 64 * 64]); - DECLARE_ALIGNED(16, uint8_t, tmp_buf2[2 * MAX_MB_PLANE * 64 * 64]); + DECLARE_ALIGNED(16, uint8_t, + tmp_buf1[2 * MAX_MB_PLANE * CU_SIZE * CU_SIZE]); + DECLARE_ALIGNED(16, uint8_t, + tmp_buf2[2 * MAX_MB_PLANE * CU_SIZE * CU_SIZE]); #else - DECLARE_ALIGNED(16, uint8_t, tmp_buf1[MAX_MB_PLANE * 64 * 64]); - DECLARE_ALIGNED(16, uint8_t, tmp_buf2[MAX_MB_PLANE * 64 * 64]); + DECLARE_ALIGNED(16, uint8_t, + tmp_buf1[MAX_MB_PLANE * CU_SIZE * CU_SIZE]); + DECLARE_ALIGNED(16, uint8_t, + tmp_buf2[MAX_MB_PLANE * CU_SIZE * CU_SIZE]); #endif // CONFIG_VP9_HIGHBITDEPTH uint8_t *dst_buf1[MAX_MB_PLANE], *dst_buf2[MAX_MB_PLANE]; - int dst_stride1[MAX_MB_PLANE] = {64, 64, 64}; - int dst_stride2[MAX_MB_PLANE] = {64, 64, 64}; + int dst_stride1[MAX_MB_PLANE] = {CU_SIZE, CU_SIZE, CU_SIZE}; + int dst_stride2[MAX_MB_PLANE] = {CU_SIZE, CU_SIZE, CU_SIZE}; assert(mbmi->sb_type >= BLOCK_8X8); @@ -4474,23 +4499,24 @@ static void encode_superblock(VP10_COMP *cpi, ThreadData *td, if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { int len = sizeof(uint16_t); dst_buf1[0] = CONVERT_TO_BYTEPTR(tmp_buf1); - dst_buf1[1] = CONVERT_TO_BYTEPTR(tmp_buf1 + 4096 * len); - dst_buf1[2] = CONVERT_TO_BYTEPTR(tmp_buf1 + 8192 * len); + dst_buf1[1] = CONVERT_TO_BYTEPTR(tmp_buf1 + CU_SIZE * CU_SIZE * len); + dst_buf1[2] = CONVERT_TO_BYTEPTR( + tmp_buf1 + CU_SIZE * CU_SIZE * 2 * len); dst_buf2[0] = CONVERT_TO_BYTEPTR(tmp_buf2); - dst_buf2[1] = CONVERT_TO_BYTEPTR(tmp_buf2 + 4096 * len); - dst_buf2[2] = CONVERT_TO_BYTEPTR(tmp_buf2 + 8192 * len); + dst_buf2[1] = CONVERT_TO_BYTEPTR(tmp_buf2 + CU_SIZE * CU_SIZE * len); + dst_buf2[2] = CONVERT_TO_BYTEPTR( + tmp_buf2 + CU_SIZE * CU_SIZE * 2 * len); } else { #endif // CONFIG_VP9_HIGHBITDEPTH dst_buf1[0] = tmp_buf1; - dst_buf1[1] = tmp_buf1 + 4096; - dst_buf1[2] = tmp_buf1 + 8192; + dst_buf1[1] = tmp_buf1 + CU_SIZE * CU_SIZE; + dst_buf1[2] = tmp_buf1 + CU_SIZE * CU_SIZE * 2; dst_buf2[0] = tmp_buf2; - dst_buf2[1] = tmp_buf2 + 4096; - dst_buf2[2] = tmp_buf2 + 8192; + dst_buf2[1] = tmp_buf2 + CU_SIZE * CU_SIZE; + dst_buf2[2] = tmp_buf2 + CU_SIZE * CU_SIZE * 2; #if CONFIG_VP9_HIGHBITDEPTH } #endif // CONFIG_VP9_HIGHBITDEPTH - vp10_build_prediction_by_above_preds(cpi, xd, mi_row, mi_col, dst_buf1, dst_stride1); vp10_build_prediction_by_left_preds(cpi, xd, mi_row, mi_col, dst_buf2, @@ -4501,7 +4527,6 @@ static void encode_superblock(VP10_COMP *cpi, ThreadData *td, dst_buf1, dst_stride1, dst_buf2, dst_stride2); } - #endif // CONFIG_OBMC vp10_encode_sb(x, VPXMAX(bsize, BLOCK_8X8)); @@ -4695,6 +4720,9 @@ static int check_supertx_sb(BLOCK_SIZE bsize, TX_SIZE supertx_size, } static void predict_superblock(VP10_COMP *cpi, ThreadData *td, +#if CONFIG_EXT_INTER + int mi_row_ori, int mi_col_ori, +#endif // CONFIG_EXT_INTER int mi_row_pred, int mi_col_pred, BLOCK_SIZE bsize_pred, int b_sub8x8, int block) { // Used in supertx @@ -4719,10 +4747,19 @@ static void predict_superblock(VP10_COMP *cpi, ThreadData *td, } if (!b_sub8x8) - vp10_build_inter_predictors_sb(xd, mi_row_pred, mi_col_pred, bsize_pred); + vp10_build_inter_predictors_sb_extend( + xd, +#if CONFIG_EXT_INTER + mi_row_ori, mi_col_ori, +#endif // CONFIG_EXT_INTER + mi_row_pred, mi_col_pred, bsize_pred); else - vp10_build_inter_predictors_sb_sub8x8(xd, mi_row_pred, mi_col_pred, - bsize_pred, block); + vp10_build_inter_predictors_sb_sub8x8_extend( + xd, +#if CONFIG_EXT_INTER + mi_row_ori, mi_col_ori, +#endif // CONFIG_EXT_INTER + mi_row_pred, mi_col_pred, bsize_pred, block); } static void predict_b_extend(VP10_COMP *cpi, ThreadData *td, @@ -4773,6 +4810,9 @@ static void predict_b_extend(VP10_COMP *cpi, ThreadData *td, (c >> xd->plane[2].subsampling_x); predict_superblock(cpi, td, +#if CONFIG_EXT_INTER + mi_row_ori, mi_col_ori, +#endif // CONFIG_EXT_INTER mi_row_pred, mi_col_pred, bsize_pred, b_sub8x8, block); diff --git a/vp10/encoder/encoder.c b/vp10/encoder/encoder.c index ac8d2770c9a68896d52c40ba139d5d160cbed0ef..34dd8d54b145ea20bde0ddf44828e62822d9a7a0 100644 --- a/vp10/encoder/encoder.c +++ b/vp10/encoder/encoder.c @@ -349,6 +349,9 @@ void vp10_initialize_enc(void) { vp10_entropy_mv_init(); vp10_temporal_filter_init(); vp10_encode_token_init(); +#if CONFIG_EXT_INTER + vp10_init_wedge_masks(); +#endif init_done = 1; } } @@ -1038,6 +1041,19 @@ static void fnname##_bits12(const uint8_t *src_ptr, \ sad_array[i] >>= 4; \ } +#if CONFIG_EXT_PARTITION +MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad128x128) +MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad128x128_avg) +MAKE_BFP_SAD3_WRAPPER(vpx_highbd_sad128x128x3) +MAKE_BFP_SAD8_WRAPPER(vpx_highbd_sad128x128x8) +MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad128x128x4d) +MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad128x64) +MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad128x64_avg) +MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad128x64x4d) +MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad64x128) +MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad64x128_avg) +MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad64x128x4d) +#endif // CONFIG_EXT_PARTITION MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad32x16) MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad32x16_avg) MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad32x16x4d) @@ -1094,6 +1110,61 @@ MAKE_BFP_SAD3_WRAPPER(vpx_highbd_sad4x4x3) MAKE_BFP_SAD8_WRAPPER(vpx_highbd_sad4x4x8) MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad4x4x4d) +#if CONFIG_EXT_INTER +#define HIGHBD_MBFP(BT, MSDF, MVF, MSVF) \ + cpi->fn_ptr[BT].msdf = MSDF; \ + cpi->fn_ptr[BT].mvf = MVF; \ + cpi->fn_ptr[BT].msvf = MSVF; + +#define MAKE_MBFP_SAD_WRAPPER(fnname) \ +static unsigned int fnname##_bits8(const uint8_t *src_ptr, \ + int source_stride, \ + const uint8_t *ref_ptr, \ + int ref_stride, \ + const uint8_t *m, \ + int m_stride) { \ + return fnname(src_ptr, source_stride, ref_ptr, ref_stride, \ + m, m_stride); \ +} \ +static unsigned int fnname##_bits10(const uint8_t *src_ptr, \ + int source_stride, \ + const uint8_t *ref_ptr, \ + int ref_stride, \ + const uint8_t *m, \ + int m_stride) { \ + return fnname(src_ptr, source_stride, ref_ptr, ref_stride, \ + m, m_stride) >> 2; \ +} \ +static unsigned int fnname##_bits12(const uint8_t *src_ptr, \ + int source_stride, \ + const uint8_t *ref_ptr, \ + int ref_stride, \ + const uint8_t *m, \ + int m_stride) { \ + return fnname(src_ptr, source_stride, ref_ptr, ref_stride, \ + m, m_stride) >> 4; \ +} + +#if CONFIG_EXT_PARTITION +MAKE_MBFP_SAD_WRAPPER(vpx_highbd_masked_sad128x128) +MAKE_MBFP_SAD_WRAPPER(vpx_highbd_masked_sad128x64) +MAKE_MBFP_SAD_WRAPPER(vpx_highbd_masked_sad64x128) +#endif // CONFIG_EXT_PARTITION +MAKE_MBFP_SAD_WRAPPER(vpx_highbd_masked_sad64x64) +MAKE_MBFP_SAD_WRAPPER(vpx_highbd_masked_sad64x32) +MAKE_MBFP_SAD_WRAPPER(vpx_highbd_masked_sad32x64) +MAKE_MBFP_SAD_WRAPPER(vpx_highbd_masked_sad32x32) +MAKE_MBFP_SAD_WRAPPER(vpx_highbd_masked_sad32x16) +MAKE_MBFP_SAD_WRAPPER(vpx_highbd_masked_sad16x32) +MAKE_MBFP_SAD_WRAPPER(vpx_highbd_masked_sad16x16) +MAKE_MBFP_SAD_WRAPPER(vpx_highbd_masked_sad16x8) +MAKE_MBFP_SAD_WRAPPER(vpx_highbd_masked_sad8x16) +MAKE_MBFP_SAD_WRAPPER(vpx_highbd_masked_sad8x8) +MAKE_MBFP_SAD_WRAPPER(vpx_highbd_masked_sad8x4) +MAKE_MBFP_SAD_WRAPPER(vpx_highbd_masked_sad4x8) +MAKE_MBFP_SAD_WRAPPER(vpx_highbd_masked_sad4x4) +#endif // CONFIG_EXT_INTER + static void highbd_set_var_fns(VP10_COMP *const cpi) { VP10_COMMON *const cm = &cpi->common; if (cm->use_highbitdepth) { @@ -1228,6 +1299,107 @@ static void highbd_set_var_fns(VP10_COMP *const cpi) { vpx_highbd_sad4x4x3_bits8, vpx_highbd_sad4x4x8_bits8, vpx_highbd_sad4x4x4d_bits8) + +#if CONFIG_EXT_PARTITION + HIGHBD_BFP(BLOCK_128X128, + vpx_highbd_sad128x128_bits8, + vpx_highbd_sad128x128_avg_bits8, + vpx_highbd_8_variance128x128, + vpx_highbd_8_sub_pixel_variance128x128, + vpx_highbd_8_sub_pixel_avg_variance128x128, + vpx_highbd_sad128x128x3_bits8, + vpx_highbd_sad128x128x8_bits8, + vpx_highbd_sad128x128x4d_bits8) + + HIGHBD_BFP(BLOCK_128X64, + vpx_highbd_sad128x64_bits8, + vpx_highbd_sad128x64_avg_bits8, + vpx_highbd_8_variance128x64, + vpx_highbd_8_sub_pixel_variance128x64, + vpx_highbd_8_sub_pixel_avg_variance128x64, + NULL, + NULL, + vpx_highbd_sad128x64x4d_bits8) + + HIGHBD_BFP(BLOCK_64X128, + vpx_highbd_sad64x128_bits8, + vpx_highbd_sad64x128_avg_bits8, + vpx_highbd_8_variance64x128, + vpx_highbd_8_sub_pixel_variance64x128, + vpx_highbd_8_sub_pixel_avg_variance64x128, + NULL, + NULL, + vpx_highbd_sad64x128x4d_bits8) +#endif // CONFIG_EXT_PARTITION + +#if CONFIG_EXT_INTER +#if CONFIG_EXT_PARTITION + HIGHBD_MBFP(BLOCK_128X128, + vpx_highbd_masked_sad128x128_bits8, + vpx_highbd_masked_variance128x128, + vpx_highbd_masked_sub_pixel_variance128x128) + HIGHBD_MBFP(BLOCK_128X64, + vpx_highbd_masked_sad128x64_bits8, + vpx_highbd_masked_variance128x64, + vpx_highbd_masked_sub_pixel_variance128x64) + HIGHBD_MBFP(BLOCK_64X128, + vpx_highbd_masked_sad64x128_bits8, + vpx_highbd_masked_variance64x128, + vpx_highbd_masked_sub_pixel_variance64x128) +#endif // CONFIG_EXT_PARTITION + HIGHBD_MBFP(BLOCK_64X64, + vpx_highbd_masked_sad64x64_bits8, + vpx_highbd_masked_variance64x64, + vpx_highbd_masked_sub_pixel_variance64x64) + HIGHBD_MBFP(BLOCK_64X32, + vpx_highbd_masked_sad64x32_bits8, + vpx_highbd_masked_variance64x32, + vpx_highbd_masked_sub_pixel_variance64x32) + HIGHBD_MBFP(BLOCK_32X64, + vpx_highbd_masked_sad32x64_bits8, + vpx_highbd_masked_variance32x64, + vpx_highbd_masked_sub_pixel_variance32x64) + HIGHBD_MBFP(BLOCK_32X32, + vpx_highbd_masked_sad32x32_bits8, + vpx_highbd_masked_variance32x32, + vpx_highbd_masked_sub_pixel_variance32x32) + HIGHBD_MBFP(BLOCK_32X16, + vpx_highbd_masked_sad32x16_bits8, + vpx_highbd_masked_variance32x16, + vpx_highbd_masked_sub_pixel_variance32x16) + HIGHBD_MBFP(BLOCK_16X32, + vpx_highbd_masked_sad16x32_bits8, + vpx_highbd_masked_variance16x32, + vpx_highbd_masked_sub_pixel_variance16x32) + HIGHBD_MBFP(BLOCK_16X16, + vpx_highbd_masked_sad16x16_bits8, + vpx_highbd_masked_variance16x16, + vpx_highbd_masked_sub_pixel_variance16x16) + HIGHBD_MBFP(BLOCK_8X16, + vpx_highbd_masked_sad8x16_bits8, + vpx_highbd_masked_variance8x16, + vpx_highbd_masked_sub_pixel_variance8x16) + HIGHBD_MBFP(BLOCK_16X8, + vpx_highbd_masked_sad16x8_bits8, + vpx_highbd_masked_variance16x8, + vpx_highbd_masked_sub_pixel_variance16x8) + HIGHBD_MBFP(BLOCK_8X8, + vpx_highbd_masked_sad8x8_bits8, + vpx_highbd_masked_variance8x8, + vpx_highbd_masked_sub_pixel_variance8x8) + HIGHBD_MBFP(BLOCK_4X8, + vpx_highbd_masked_sad4x8_bits8, + vpx_highbd_masked_variance4x8, + vpx_highbd_masked_sub_pixel_variance4x8) + HIGHBD_MBFP(BLOCK_8X4, + vpx_highbd_masked_sad8x4_bits8, + vpx_highbd_masked_variance8x4, + vpx_highbd_masked_sub_pixel_variance8x4) + HIGHBD_MBFP(BLOCK_4X4, + vpx_highbd_masked_sad4x4_bits8, + vpx_highbd_masked_variance4x4, + vpx_highbd_masked_sub_pixel_variance4x4) +#endif // CONFIG_EXT_INTER break; case VPX_BITS_10: @@ -1360,6 +1532,107 @@ static void highbd_set_var_fns(VP10_COMP *const cpi) { vpx_highbd_sad4x4x3_bits10, vpx_highbd_sad4x4x8_bits10, vpx_highbd_sad4x4x4d_bits10) + +#if CONFIG_EXT_PARTITION + HIGHBD_BFP(BLOCK_128X128, + vpx_highbd_sad128x128_bits10, + vpx_highbd_sad128x128_avg_bits10, + vpx_highbd_10_variance128x128, + vpx_highbd_10_sub_pixel_variance128x128, + vpx_highbd_10_sub_pixel_avg_variance128x128, + vpx_highbd_sad128x128x3_bits10, + vpx_highbd_sad128x128x8_bits10, + vpx_highbd_sad128x128x4d_bits10) + + HIGHBD_BFP(BLOCK_128X64, + vpx_highbd_sad128x64_bits10, + vpx_highbd_sad128x64_avg_bits10, + vpx_highbd_10_variance128x64, + vpx_highbd_10_sub_pixel_variance128x64, + vpx_highbd_10_sub_pixel_avg_variance128x64, + NULL, + NULL, + vpx_highbd_sad128x64x4d_bits10) + + HIGHBD_BFP(BLOCK_64X128, + vpx_highbd_sad64x128_bits10, + vpx_highbd_sad64x128_avg_bits10, + vpx_highbd_10_variance64x128, + vpx_highbd_10_sub_pixel_variance64x128, + vpx_highbd_10_sub_pixel_avg_variance64x128, + NULL, + NULL, + vpx_highbd_sad64x128x4d_bits10) +#endif // CONFIG_EXT_PARTITION + +#if CONFIG_EXT_INTER +#if CONFIG_EXT_PARTITION + HIGHBD_MBFP(BLOCK_128X128, + vpx_highbd_masked_sad128x128_bits10, + vpx_highbd_10_masked_variance128x128, + vpx_highbd_10_masked_sub_pixel_variance128x128) + HIGHBD_MBFP(BLOCK_128X64, + vpx_highbd_masked_sad128x64_bits10, + vpx_highbd_10_masked_variance128x64, + vpx_highbd_10_masked_sub_pixel_variance128x64) + HIGHBD_MBFP(BLOCK_64X128, + vpx_highbd_masked_sad64x128_bits10, + vpx_highbd_10_masked_variance64x128, + vpx_highbd_10_masked_sub_pixel_variance64x128) +#endif // CONFIG_EXT_PARTITION + HIGHBD_MBFP(BLOCK_64X64, + vpx_highbd_masked_sad64x64_bits10, + vpx_highbd_10_masked_variance64x64, + vpx_highbd_10_masked_sub_pixel_variance64x64) + HIGHBD_MBFP(BLOCK_64X32, + vpx_highbd_masked_sad64x32_bits10, + vpx_highbd_10_masked_variance64x32, + vpx_highbd_10_masked_sub_pixel_variance64x32) + HIGHBD_MBFP(BLOCK_32X64, + vpx_highbd_masked_sad32x64_bits10, + vpx_highbd_10_masked_variance32x64, + vpx_highbd_10_masked_sub_pixel_variance32x64) + HIGHBD_MBFP(BLOCK_32X32, + vpx_highbd_masked_sad32x32_bits10, + vpx_highbd_10_masked_variance32x32, + vpx_highbd_10_masked_sub_pixel_variance32x32) + HIGHBD_MBFP(BLOCK_32X16, + vpx_highbd_masked_sad32x16_bits10, + vpx_highbd_10_masked_variance32x16, + vpx_highbd_10_masked_sub_pixel_variance32x16) + HIGHBD_MBFP(BLOCK_16X32, + vpx_highbd_masked_sad16x32_bits10, + vpx_highbd_10_masked_variance16x32, + vpx_highbd_10_masked_sub_pixel_variance16x32) + HIGHBD_MBFP(BLOCK_16X16, + vpx_highbd_masked_sad16x16_bits10, + vpx_highbd_10_masked_variance16x16, + vpx_highbd_10_masked_sub_pixel_variance16x16) + HIGHBD_MBFP(BLOCK_8X16, + vpx_highbd_masked_sad8x16_bits10, + vpx_highbd_10_masked_variance8x16, + vpx_highbd_10_masked_sub_pixel_variance8x16) + HIGHBD_MBFP(BLOCK_16X8, + vpx_highbd_masked_sad16x8_bits10, + vpx_highbd_10_masked_variance16x8, + vpx_highbd_10_masked_sub_pixel_variance16x8) + HIGHBD_MBFP(BLOCK_8X8, + vpx_highbd_masked_sad8x8_bits10, + vpx_highbd_10_masked_variance8x8, + vpx_highbd_10_masked_sub_pixel_variance8x8) + HIGHBD_MBFP(BLOCK_4X8, + vpx_highbd_masked_sad4x8_bits10, + vpx_highbd_10_masked_variance4x8, + vpx_highbd_10_masked_sub_pixel_variance4x8) + HIGHBD_MBFP(BLOCK_8X4, + vpx_highbd_masked_sad8x4_bits10, + vpx_highbd_10_masked_variance8x4, + vpx_highbd_10_masked_sub_pixel_variance8x4) + HIGHBD_MBFP(BLOCK_4X4, + vpx_highbd_masked_sad4x4_bits10, + vpx_highbd_10_masked_variance4x4, + vpx_highbd_10_masked_sub_pixel_variance4x4) +#endif // CONFIG_EXT_INTER break; case VPX_BITS_12: @@ -1492,6 +1765,107 @@ static void highbd_set_var_fns(VP10_COMP *const cpi) { vpx_highbd_sad4x4x3_bits12, vpx_highbd_sad4x4x8_bits12, vpx_highbd_sad4x4x4d_bits12) + +#if CONFIG_EXT_PARTITION + HIGHBD_BFP(BLOCK_128X128, + vpx_highbd_sad128x128_bits12, + vpx_highbd_sad128x128_avg_bits12, + vpx_highbd_12_variance128x128, + vpx_highbd_12_sub_pixel_variance128x128, + vpx_highbd_12_sub_pixel_avg_variance128x128, + vpx_highbd_sad128x128x3_bits12, + vpx_highbd_sad128x128x8_bits12, + vpx_highbd_sad128x128x4d_bits12) + + HIGHBD_BFP(BLOCK_128X64, + vpx_highbd_sad128x64_bits12, + vpx_highbd_sad128x64_avg_bits12, + vpx_highbd_12_variance128x64, + vpx_highbd_12_sub_pixel_variance128x64, + vpx_highbd_12_sub_pixel_avg_variance128x64, + NULL, + NULL, + vpx_highbd_sad128x64x4d_bits12) + + HIGHBD_BFP(BLOCK_64X128, + vpx_highbd_sad64x128_bits12, + vpx_highbd_sad64x128_avg_bits12, + vpx_highbd_12_variance64x128, + vpx_highbd_12_sub_pixel_variance64x128, + vpx_highbd_12_sub_pixel_avg_variance64x128, + NULL, + NULL, + vpx_highbd_sad64x128x4d_bits12) +#endif // CONFIG_EXT_PARTITION + +#if CONFIG_EXT_INTER +#if CONFIG_EXT_PARTITION + HIGHBD_MBFP(BLOCK_128X128, + vpx_highbd_masked_sad128x128_bits12, + vpx_highbd_12_masked_variance128x128, + vpx_highbd_12_masked_sub_pixel_variance128x128) + HIGHBD_MBFP(BLOCK_128X64, + vpx_highbd_masked_sad128x64_bits12, + vpx_highbd_12_masked_variance128x64, + vpx_highbd_12_masked_sub_pixel_variance128x64) + HIGHBD_MBFP(BLOCK_64X128, + vpx_highbd_masked_sad64x128_bits12, + vpx_highbd_12_masked_variance64x128, + vpx_highbd_12_masked_sub_pixel_variance64x128) +#endif // CONFIG_EXT_PARTITION + HIGHBD_MBFP(BLOCK_64X64, + vpx_highbd_masked_sad64x64_bits12, + vpx_highbd_12_masked_variance64x64, + vpx_highbd_12_masked_sub_pixel_variance64x64) + HIGHBD_MBFP(BLOCK_64X32, + vpx_highbd_masked_sad64x32_bits12, + vpx_highbd_12_masked_variance64x32, + vpx_highbd_12_masked_sub_pixel_variance64x32) + HIGHBD_MBFP(BLOCK_32X64, + vpx_highbd_masked_sad32x64_bits12, + vpx_highbd_12_masked_variance32x64, + vpx_highbd_12_masked_sub_pixel_variance32x64) + HIGHBD_MBFP(BLOCK_32X32, + vpx_highbd_masked_sad32x32_bits12, + vpx_highbd_12_masked_variance32x32, + vpx_highbd_12_masked_sub_pixel_variance32x32) + HIGHBD_MBFP(BLOCK_32X16, + vpx_highbd_masked_sad32x16_bits12, + vpx_highbd_12_masked_variance32x16, + vpx_highbd_12_masked_sub_pixel_variance32x16) + HIGHBD_MBFP(BLOCK_16X32, + vpx_highbd_masked_sad16x32_bits12, + vpx_highbd_12_masked_variance16x32, + vpx_highbd_12_masked_sub_pixel_variance16x32) + HIGHBD_MBFP(BLOCK_16X16, + vpx_highbd_masked_sad16x16_bits12, + vpx_highbd_12_masked_variance16x16, + vpx_highbd_12_masked_sub_pixel_variance16x16) + HIGHBD_MBFP(BLOCK_8X16, + vpx_highbd_masked_sad8x16_bits12, + vpx_highbd_12_masked_variance8x16, + vpx_highbd_12_masked_sub_pixel_variance8x16) + HIGHBD_MBFP(BLOCK_16X8, + vpx_highbd_masked_sad16x8_bits12, + vpx_highbd_12_masked_variance16x8, + vpx_highbd_12_masked_sub_pixel_variance16x8) + HIGHBD_MBFP(BLOCK_8X8, + vpx_highbd_masked_sad8x8_bits12, + vpx_highbd_12_masked_variance8x8, + vpx_highbd_12_masked_sub_pixel_variance8x8) + HIGHBD_MBFP(BLOCK_4X8, + vpx_highbd_masked_sad4x8_bits12, + vpx_highbd_12_masked_variance4x8, + vpx_highbd_12_masked_sub_pixel_variance4x8) + HIGHBD_MBFP(BLOCK_8X4, + vpx_highbd_masked_sad8x4_bits12, + vpx_highbd_12_masked_variance8x4, + vpx_highbd_12_masked_sub_pixel_variance8x4) + HIGHBD_MBFP(BLOCK_4X4, + vpx_highbd_masked_sad4x4_bits12, + vpx_highbd_12_masked_variance4x4, + vpx_highbd_12_masked_sub_pixel_variance4x4) +#endif // CONFIG_EXT_INTER break; default: @@ -1912,6 +2286,21 @@ VP10_COMP *vp10_create_compressor(VP10EncoderConfig *oxcf, cpi->fn_ptr[BT].sdx8f = SDX8F; \ cpi->fn_ptr[BT].sdx4df = SDX4DF; +#if CONFIG_EXT_PARTITION + BFP(BLOCK_128X128, vpx_sad128x128, vpx_sad128x128_avg, + vpx_variance128x128, vpx_sub_pixel_variance128x128, + vpx_sub_pixel_avg_variance128x128, vpx_sad128x128x3, vpx_sad128x128x8, + vpx_sad128x128x4d) + + BFP(BLOCK_128X64, vpx_sad128x64, vpx_sad128x64_avg, + vpx_variance128x64, vpx_sub_pixel_variance128x64, + vpx_sub_pixel_avg_variance128x64, NULL, NULL, vpx_sad128x64x4d) + + BFP(BLOCK_64X128, vpx_sad64x128, vpx_sad64x128_avg, + vpx_variance64x128, vpx_sub_pixel_variance64x128, + vpx_sub_pixel_avg_variance64x128, NULL, NULL, vpx_sad64x128x4d) +#endif // CONFIG_EXT_PARTITION + BFP(BLOCK_32X16, vpx_sad32x16, vpx_sad32x16_avg, vpx_variance32x16, vpx_sub_pixel_variance32x16, vpx_sub_pixel_avg_variance32x16, NULL, NULL, vpx_sad32x16x4d) @@ -1971,6 +2360,48 @@ VP10_COMP *vp10_create_compressor(VP10EncoderConfig *oxcf, vpx_sub_pixel_avg_variance4x4, vpx_sad4x4x3, vpx_sad4x4x8, vpx_sad4x4x4d) +#if CONFIG_EXT_INTER +#define MBFP(BT, MSDF, MVF, MSVF) \ + cpi->fn_ptr[BT].msdf = MSDF; \ + cpi->fn_ptr[BT].mvf = MVF; \ + cpi->fn_ptr[BT].msvf = MSVF; + +#if CONFIG_EXT_PARTITION + MBFP(BLOCK_128X128, vpx_masked_sad128x128, vpx_masked_variance128x128, + vpx_masked_sub_pixel_variance128x128) + MBFP(BLOCK_128X64, vpx_masked_sad128x64, vpx_masked_variance128x64, + vpx_masked_sub_pixel_variance128x64) + MBFP(BLOCK_64X128, vpx_masked_sad64x128, vpx_masked_variance64x128, + vpx_masked_sub_pixel_variance64x128) +#endif // CONFIG_EXT_PARTITION + MBFP(BLOCK_64X64, vpx_masked_sad64x64, vpx_masked_variance64x64, + vpx_masked_sub_pixel_variance64x64) + MBFP(BLOCK_64X32, vpx_masked_sad64x32, vpx_masked_variance64x32, + vpx_masked_sub_pixel_variance64x32) + MBFP(BLOCK_32X64, vpx_masked_sad32x64, vpx_masked_variance32x64, + vpx_masked_sub_pixel_variance32x64) + MBFP(BLOCK_32X32, vpx_masked_sad32x32, vpx_masked_variance32x32, + vpx_masked_sub_pixel_variance32x32) + MBFP(BLOCK_32X16, vpx_masked_sad32x16, vpx_masked_variance32x16, + vpx_masked_sub_pixel_variance32x16) + MBFP(BLOCK_16X32, vpx_masked_sad16x32, vpx_masked_variance16x32, + vpx_masked_sub_pixel_variance16x32) + MBFP(BLOCK_16X16, vpx_masked_sad16x16, vpx_masked_variance16x16, + vpx_masked_sub_pixel_variance16x16) + MBFP(BLOCK_16X8, vpx_masked_sad16x8, vpx_masked_variance16x8, + vpx_masked_sub_pixel_variance16x8) + MBFP(BLOCK_8X16, vpx_masked_sad8x16, vpx_masked_variance8x16, + vpx_masked_sub_pixel_variance8x16) + MBFP(BLOCK_8X8, vpx_masked_sad8x8, vpx_masked_variance8x8, + vpx_masked_sub_pixel_variance8x8) + MBFP(BLOCK_4X8, vpx_masked_sad4x8, vpx_masked_variance4x8, + vpx_masked_sub_pixel_variance4x8) + MBFP(BLOCK_8X4, vpx_masked_sad8x4, vpx_masked_variance8x4, + vpx_masked_sub_pixel_variance8x4) + MBFP(BLOCK_4X4, vpx_masked_sad4x4, vpx_masked_variance4x4, + vpx_masked_sub_pixel_variance4x4) +#endif // CONFIG_EXT_INTER + #if CONFIG_VP9_HIGHBITDEPTH highbd_set_var_fns(cpi); #endif diff --git a/vp10/encoder/encoder.h b/vp10/encoder/encoder.h index efde0fc315d43b7d04ebf42d86229c56187d54ed..49cac0cfd4f548de90bdda978aaa0f9ec5382634 100644 --- a/vp10/encoder/encoder.h +++ b/vp10/encoder/encoder.h @@ -422,7 +422,7 @@ typedef struct VP10_COMP { fractional_mv_step_fp *find_fractional_mv_step; vp10_full_search_fn_t full_search_sad; vp10_diamond_search_fn_t diamond_search_sad; - vp9_variance_fn_ptr_t fn_ptr[BLOCK_SIZES]; + vp10_variance_fn_ptr_t fn_ptr[BLOCK_SIZES]; uint64_t time_receive_data; uint64_t time_compress_data; uint64_t time_pick_lpf; diff --git a/vp10/encoder/firstpass.c b/vp10/encoder/firstpass.c index 0404e277b731d735958a7398a46adf7e98ce0c82..dd3e4378a8dac0f791a17164fbfc74d27179a8f5 100644 --- a/vp10/encoder/firstpass.c +++ b/vp10/encoder/firstpass.c @@ -388,7 +388,7 @@ static void first_pass_motion_search(VP10_COMP *cpi, MACROBLOCK *x, MV ref_mv_full = {ref_mv->row >> 3, ref_mv->col >> 3}; int num00, tmp_err, n; const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type; - vp9_variance_fn_ptr_t v_fn_ptr = cpi->fn_ptr[bsize]; + vp10_variance_fn_ptr_t v_fn_ptr = cpi->fn_ptr[bsize]; const int new_mv_mode_penalty = NEW_MV_MODE_PENALTY; int step_param = 3; diff --git a/vp10/encoder/mbgraph.c b/vp10/encoder/mbgraph.c index 1f467b811bebb6400d8f79734dda6b3254f9ffde..32ff0faf67f465efbf2cbffdd47985bae9b0517e 100644 --- a/vp10/encoder/mbgraph.c +++ b/vp10/encoder/mbgraph.c @@ -31,7 +31,7 @@ static unsigned int do_16x16_motion_iteration(VP10_COMP *cpi, MACROBLOCK *const x = &cpi->td.mb; MACROBLOCKD *const xd = &x->e_mbd; const MV_SPEED_FEATURES *const mv_sf = &cpi->sf.mv; - const vp9_variance_fn_ptr_t v_fn_ptr = cpi->fn_ptr[BLOCK_16X16]; + const vp10_variance_fn_ptr_t v_fn_ptr = cpi->fn_ptr[BLOCK_16X16]; const int tmp_col_min = x->mv_col_min; const int tmp_col_max = x->mv_col_max; diff --git a/vp10/encoder/mcomp.c b/vp10/encoder/mcomp.c index 8949f76bc985beeefe4371bcb8972633b69563dd..2c9397640df9ff66886f88d61e45a85c151fd58c 100644 --- a/vp10/encoder/mcomp.c +++ b/vp10/encoder/mcomp.c @@ -354,7 +354,7 @@ static unsigned int setup_center_error(const MACROBLOCKD *xd, const MV *bestmv, const MV *ref_mv, int error_per_bit, - const vp9_variance_fn_ptr_t *vfp, + const vp10_variance_fn_ptr_t *vfp, const uint8_t *const src, const int src_stride, const uint8_t *const y, @@ -430,7 +430,7 @@ int vp10_find_best_sub_pixel_tree_pruned_evenmore( MV *bestmv, const MV *ref_mv, int allow_hp, int error_per_bit, - const vp9_variance_fn_ptr_t *vfp, + const vp10_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step, int *cost_list, @@ -516,7 +516,7 @@ int vp10_find_best_sub_pixel_tree_pruned_more(const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp, int error_per_bit, - const vp9_variance_fn_ptr_t *vfp, + const vp10_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step, int *cost_list, @@ -599,7 +599,7 @@ int vp10_find_best_sub_pixel_tree_pruned(const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp, int error_per_bit, - const vp9_variance_fn_ptr_t *vfp, + const vp10_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step, int *cost_list, @@ -748,7 +748,7 @@ static void highbd_upsampled_pred(uint16_t *comp_pred, #endif static int upsampled_pref_error(const MACROBLOCKD *xd, - const vp9_variance_fn_ptr_t *vfp, + const vp10_variance_fn_ptr_t *vfp, const uint8_t *const src, const int src_stride, const uint8_t *const y, int y_stride, const uint8_t *second_pred, @@ -786,7 +786,7 @@ return besterr; static unsigned int upsampled_setup_center_error( const MACROBLOCKD *xd, const MV *bestmv, const MV *ref_mv, - int error_per_bit, const vp9_variance_fn_ptr_t *vfp, + int error_per_bit, const vp10_variance_fn_ptr_t *vfp, const uint8_t *const src, const int src_stride, const uint8_t *const y, int y_stride, const uint8_t *second_pred, int w, int h, int offset, int *mvjcost, int *mvcost[2], @@ -804,7 +804,7 @@ int vp10_find_best_sub_pixel_tree(const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp, int error_per_bit, - const vp9_variance_fn_ptr_t *vfp, + const vp10_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step, int *cost_list, @@ -1037,7 +1037,7 @@ static INLINE int is_mv_in(const MACROBLOCK *x, const MV *mv) { static INLINE void calc_int_cost_list(const MACROBLOCK *x, const MV *ref_mv, int sadpb, - const vp9_variance_fn_ptr_t *fn_ptr, + const vp10_variance_fn_ptr_t *fn_ptr, const MV *best_mv, int *cost_list) { static const MV neighbors[4] = {{0, -1}, {1, 0}, {0, 1}, {-1, 0}}; @@ -1095,7 +1095,7 @@ static int vp10_pattern_search(const MACROBLOCK *x, int sad_per_bit, int do_init_search, int *cost_list, - const vp9_variance_fn_ptr_t *vfp, + const vp10_variance_fn_ptr_t *vfp, int use_mvcost, const MV *center_mv, MV *best_mv, @@ -1270,7 +1270,7 @@ static int vp10_pattern_search_sad(const MACROBLOCK *x, int sad_per_bit, int do_init_search, int *cost_list, - const vp9_variance_fn_ptr_t *vfp, + const vp10_variance_fn_ptr_t *vfp, int use_mvcost, const MV *center_mv, MV *best_mv, @@ -1552,7 +1552,7 @@ static int vp10_pattern_search_sad(const MACROBLOCK *x, int vp10_get_mvpred_var(const MACROBLOCK *x, const MV *best_mv, const MV *center_mv, - const vp9_variance_fn_ptr_t *vfp, + const vp10_variance_fn_ptr_t *vfp, int use_mvcost) { const MACROBLOCKD *const xd = &x->e_mbd; const struct buf_2d *const what = &x->plane[0].src; @@ -1569,7 +1569,7 @@ int vp10_get_mvpred_var(const MACROBLOCK *x, int vp10_get_mvpred_av_var(const MACROBLOCK *x, const MV *best_mv, const MV *center_mv, const uint8_t *second_pred, - const vp9_variance_fn_ptr_t *vfp, + const vp10_variance_fn_ptr_t *vfp, int use_mvcost) { const MACROBLOCKD *const xd = &x->e_mbd; const struct buf_2d *const what = &x->plane[0].src; @@ -1589,7 +1589,7 @@ int vp10_hex_search(const MACROBLOCK *x, int sad_per_bit, int do_init_search, int *cost_list, - const vp9_variance_fn_ptr_t *vfp, + const vp10_variance_fn_ptr_t *vfp, int use_mvcost, const MV *center_mv, MV *best_mv) { // First scale has 8-closest points, the rest have 6 points in hex shape @@ -1624,7 +1624,7 @@ int vp10_bigdia_search(const MACROBLOCK *x, int sad_per_bit, int do_init_search, int *cost_list, - const vp9_variance_fn_ptr_t *vfp, + const vp10_variance_fn_ptr_t *vfp, int use_mvcost, const MV *center_mv, MV *best_mv) { @@ -1666,7 +1666,7 @@ int vp10_square_search(const MACROBLOCK *x, int sad_per_bit, int do_init_search, int *cost_list, - const vp9_variance_fn_ptr_t *vfp, + const vp10_variance_fn_ptr_t *vfp, int use_mvcost, const MV *center_mv, MV *best_mv) { @@ -1708,7 +1708,7 @@ int vp10_fast_hex_search(const MACROBLOCK *x, int sad_per_bit, int do_init_search, // must be zero for fast_hex int *cost_list, - const vp9_variance_fn_ptr_t *vfp, + const vp10_variance_fn_ptr_t *vfp, int use_mvcost, const MV *center_mv, MV *best_mv) { @@ -1723,7 +1723,7 @@ int vp10_fast_dia_search(const MACROBLOCK *x, int sad_per_bit, int do_init_search, int *cost_list, - const vp9_variance_fn_ptr_t *vfp, + const vp10_variance_fn_ptr_t *vfp, int use_mvcost, const MV *center_mv, MV *best_mv) { @@ -1739,7 +1739,7 @@ int vp10_fast_dia_search(const MACROBLOCK *x, static int exhuastive_mesh_search(const MACROBLOCK *x, MV *ref_mv, MV *best_mv, int range, int step, int sad_per_bit, - const vp9_variance_fn_ptr_t *fn_ptr, + const vp10_variance_fn_ptr_t *fn_ptr, const MV *center_mv) { const MACROBLOCKD *const xd = &x->e_mbd; const struct buf_2d *const what = &x->plane[0].src; @@ -1822,11 +1822,11 @@ static int exhuastive_mesh_search(const MACROBLOCK *x, } int vp10_diamond_search_sad_c(const MACROBLOCK *x, - const search_site_config *cfg, - MV *ref_mv, MV *best_mv, int search_param, - int sad_per_bit, int *num00, - const vp9_variance_fn_ptr_t *fn_ptr, - const MV *center_mv) { + const search_site_config *cfg, + MV *ref_mv, MV *best_mv, int search_param, + int sad_per_bit, int *num00, + const vp10_variance_fn_ptr_t *fn_ptr, + const MV *center_mv) { int i, j, step; const MACROBLOCKD *const xd = &x->e_mbd; @@ -2179,7 +2179,7 @@ int vp10_full_pixel_diamond(const VP10_COMP *cpi, MACROBLOCK *x, MV *mvp_full, int step_param, int sadpb, int further_steps, int do_refine, int *cost_list, - const vp9_variance_fn_ptr_t *fn_ptr, + const vp10_variance_fn_ptr_t *fn_ptr, const MV *ref_mv, MV *dst_mv) { MV temp_mv; int thissme, n, num00 = 0; @@ -2246,7 +2246,7 @@ int vp10_full_pixel_diamond(const VP10_COMP *cpi, MACROBLOCK *x, // according to the encode speed profile. static int full_pixel_exhaustive(VP10_COMP *cpi, MACROBLOCK *x, MV *centre_mv_full, int sadpb, int *cost_list, - const vp9_variance_fn_ptr_t *fn_ptr, + const vp10_variance_fn_ptr_t *fn_ptr, const MV *ref_mv, MV *dst_mv) { const SPEED_FEATURES *const sf = &cpi->sf; MV temp_mv = {centre_mv_full->row, centre_mv_full->col}; @@ -2305,7 +2305,7 @@ static int full_pixel_exhaustive(VP10_COMP *cpi, MACROBLOCK *x, int vp10_full_search_sad_c(const MACROBLOCK *x, const MV *ref_mv, int sad_per_bit, int distance, - const vp9_variance_fn_ptr_t *fn_ptr, + const vp10_variance_fn_ptr_t *fn_ptr, const MV *center_mv, MV *best_mv) { int r, c; const MACROBLOCKD *const xd = &x->e_mbd; @@ -2338,7 +2338,7 @@ int vp10_full_search_sad_c(const MACROBLOCK *x, const MV *ref_mv, int vp10_full_search_sadx3(const MACROBLOCK *x, const MV *ref_mv, int sad_per_bit, int distance, - const vp9_variance_fn_ptr_t *fn_ptr, + const vp10_variance_fn_ptr_t *fn_ptr, const MV *center_mv, MV *best_mv) { int r; const MACROBLOCKD *const xd = &x->e_mbd; @@ -2403,7 +2403,7 @@ int vp10_full_search_sadx3(const MACROBLOCK *x, const MV *ref_mv, int vp10_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv, int sad_per_bit, int distance, - const vp9_variance_fn_ptr_t *fn_ptr, + const vp10_variance_fn_ptr_t *fn_ptr, const MV *center_mv, MV *best_mv) { int r; const MACROBLOCKD *const xd = &x->e_mbd; @@ -2493,7 +2493,7 @@ int vp10_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv, int vp10_refining_search_sad(const MACROBLOCK *x, MV *ref_mv, int error_per_bit, int search_range, - const vp9_variance_fn_ptr_t *fn_ptr, + const vp10_variance_fn_ptr_t *fn_ptr, const MV *center_mv) { const MACROBLOCKD *const xd = &x->e_mbd; const MV neighbors[4] = {{ -1, 0}, {0, -1}, {0, 1}, {1, 0}}; @@ -2572,7 +2572,7 @@ int vp10_refining_search_sad(const MACROBLOCK *x, int vp10_refining_search_8p_c(const MACROBLOCK *x, MV *ref_mv, int error_per_bit, int search_range, - const vp9_variance_fn_ptr_t *fn_ptr, + const vp10_variance_fn_ptr_t *fn_ptr, const MV *center_mv, const uint8_t *second_pred) { const MV neighbors[8] = {{-1, 0}, {0, -1}, {0, 1}, {1, 0}, @@ -2636,7 +2636,7 @@ int vp10_full_pixel_search(VP10_COMP *cpi, MACROBLOCK *x, int var_max, int rd) { const SPEED_FEATURES *const sf = &cpi->sf; const SEARCH_METHODS method = sf->mv.search_method; - vp9_variance_fn_ptr_t *fn_ptr = &cpi->fn_ptr[bsize]; + vp10_variance_fn_ptr_t *fn_ptr = &cpi->fn_ptr[bsize]; int var = 0; if (cost_list) { cost_list[0] = INT_MAX; @@ -2707,3 +2707,354 @@ int vp10_full_pixel_search(VP10_COMP *cpi, MACROBLOCK *x, return var; } + +#if CONFIG_EXT_INTER +/* returns subpixel variance error function */ +#define DIST(r, c) \ + vfp->msvf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z, \ + src_stride, mask, mask_stride, &sse) + +/* checks if (r, c) has better score than previous best */ + +#define MVC(r, c) \ + (mvcost ? \ + ((mvjcost[((r) != rr) * 2 + ((c) != rc)] + \ + mvcost[0][((r) - rr)] + mvcost[1][((c) - rc)]) * \ + error_per_bit + 4096) >> 13 : 0) + +#define CHECK_BETTER(v, r, c) \ + if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \ + thismse = (DIST(r, c)); \ + if ((v = MVC(r, c) + thismse) < besterr) { \ + besterr = v; \ + br = r; \ + bc = c; \ + *distortion = thismse; \ + *sse1 = sse; \ + } \ + } else { \ + v = INT_MAX; \ + } + +int vp10_find_best_masked_sub_pixel_tree(const MACROBLOCK *x, + const uint8_t *mask, int mask_stride, + MV *bestmv, const MV *ref_mv, + int allow_hp, + int error_per_bit, + const vp10_variance_fn_ptr_t *vfp, + int forced_stop, + int iters_per_step, + int *mvjcost, int *mvcost[2], + int *distortion, + unsigned int *sse1, int is_second) { + const uint8_t *const z = x->plane[0].src.buf; + const int src_stride = x->plane[0].src.stride; + const MACROBLOCKD *xd = &x->e_mbd; + unsigned int besterr = INT_MAX; + unsigned int sse; + unsigned int whichdir; + int thismse; + unsigned int halfiters = iters_per_step; + unsigned int quarteriters = iters_per_step; + unsigned int eighthiters = iters_per_step; + + const int y_stride = xd->plane[0].pre[is_second].stride; + const int offset = bestmv->row * y_stride + bestmv->col; + const uint8_t *const y = xd->plane[0].pre[is_second].buf; + + int rr = ref_mv->row; + int rc = ref_mv->col; + int br = bestmv->row * 8; + int bc = bestmv->col * 8; + int hstep = 4; + const int minc = VPXMAX(x->mv_col_min * 8, ref_mv->col - MV_MAX); + const int maxc = VPXMIN(x->mv_col_max * 8, ref_mv->col + MV_MAX); + const int minr = VPXMAX(x->mv_row_min * 8, ref_mv->row - MV_MAX); + const int maxr = VPXMIN(x->mv_row_max * 8, ref_mv->row + MV_MAX); + + int tr = br; + int tc = bc; + + // central mv + bestmv->row *= 8; + bestmv->col *= 8; + + // calculate central point error + besterr = vfp->mvf(y + offset, y_stride, z, src_stride, mask, mask_stride, + sse1); + *distortion = besterr; + besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit); + + // 1/2 pel + FIRST_LEVEL_CHECKS; + if (halfiters > 1) { + SECOND_LEVEL_CHECKS; + } + tr = br; + tc = bc; + + // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only + if (forced_stop != 2) { + hstep >>= 1; + FIRST_LEVEL_CHECKS; + if (quarteriters > 1) { + SECOND_LEVEL_CHECKS; + } + tr = br; + tc = bc; + } + + if (allow_hp && vp10_use_mv_hp(ref_mv) && forced_stop == 0) { + hstep >>= 1; + FIRST_LEVEL_CHECKS; + if (eighthiters > 1) { + SECOND_LEVEL_CHECKS; + } + tr = br; + tc = bc; + } + // These lines insure static analysis doesn't warn that + // tr and tc aren't used after the above point. + (void) tr; + (void) tc; + + bestmv->row = br; + bestmv->col = bc; + + if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) || + (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3))) + return INT_MAX; + + return besterr; +} + +#undef DIST +#undef MVC +#undef CHECK_BETTER + +static int get_masked_mvpred_var(const MACROBLOCK *x, + const uint8_t *mask, int mask_stride, + const MV *best_mv, const MV *center_mv, + const vp10_variance_fn_ptr_t *vfp, + int use_mvcost, int is_second) { + const MACROBLOCKD *const xd = &x->e_mbd; + const struct buf_2d *const what = &x->plane[0].src; + const struct buf_2d *const in_what = &xd->plane[0].pre[is_second]; + const MV mv = {best_mv->row * 8, best_mv->col * 8}; + unsigned int unused; + + return vfp->mvf(what->buf, what->stride, + get_buf_from_mv(in_what, best_mv), in_what->stride, + mask, mask_stride, &unused) + + (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost, + x->mvcost, x->errorperbit) : 0); +} + +int masked_refining_search_sad(const MACROBLOCK *x, + const uint8_t *mask, int mask_stride, + MV *ref_mv, int error_per_bit, + int search_range, + const vp10_variance_fn_ptr_t *fn_ptr, + const MV *center_mv, int is_second) { + const MV neighbors[4] = {{ -1, 0}, {0, -1}, {0, 1}, {1, 0}}; + const MACROBLOCKD *const xd = &x->e_mbd; + const struct buf_2d *const what = &x->plane[0].src; + const struct buf_2d *const in_what = &xd->plane[0].pre[is_second]; + const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; + unsigned int best_sad = fn_ptr->msdf(what->buf, what->stride, + get_buf_from_mv(in_what, ref_mv), + in_what->stride, mask, mask_stride) + + mvsad_err_cost(x, ref_mv, &fcenter_mv, error_per_bit); + int i, j; + + for (i = 0; i < search_range; i++) { + int best_site = -1; + + for (j = 0; j < 4; j++) { + const MV mv = {ref_mv->row + neighbors[j].row, + ref_mv->col + neighbors[j].col}; + if (is_mv_in(x, &mv)) { + unsigned int sad = fn_ptr->msdf(what->buf, what->stride, + get_buf_from_mv(in_what, &mv), in_what->stride, mask, mask_stride); + if (sad < best_sad) { + sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit); + if (sad < best_sad) { + best_sad = sad; + best_site = j; + } + } + } + } + + if (best_site == -1) { + break; + } else { + ref_mv->row += neighbors[best_site].row; + ref_mv->col += neighbors[best_site].col; + } + } + return best_sad; +} + +int masked_diamond_search_sad(const MACROBLOCK *x, + const search_site_config *cfg, + const uint8_t *mask, int mask_stride, + MV *ref_mv, MV *best_mv, + int search_param, + int sad_per_bit, int *num00, + const vp10_variance_fn_ptr_t *fn_ptr, + const MV *center_mv, int is_second) { + const MACROBLOCKD *const xd = &x->e_mbd; + const struct buf_2d *const what = &x->plane[0].src; + const struct buf_2d *const in_what = &xd->plane[0].pre[is_second]; + // search_param determines the length of the initial step and hence the number + // of iterations + // 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 = + // (MAX_FIRST_STEP/4) pel... etc. + const search_site *const ss = &cfg->ss[search_param * cfg->searches_per_step]; + const int tot_steps = (cfg->ss_count / cfg->searches_per_step) - search_param; + const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; + const uint8_t *best_address, *in_what_ref; + int best_sad = INT_MAX; + int best_site = 0; + int last_site = 0; + int i, j, step; + + clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); + in_what_ref = get_buf_from_mv(in_what, ref_mv); + best_address = in_what_ref; + *num00 = 0; + *best_mv = *ref_mv; + + // Check the starting position + best_sad = fn_ptr->msdf(what->buf, what->stride, + best_address, in_what->stride, + mask, mask_stride) + + mvsad_err_cost(x, best_mv, &fcenter_mv, sad_per_bit); + + i = 1; + + for (step = 0; step < tot_steps; step++) { + for (j = 0; j < cfg->searches_per_step; j++) { + const MV mv = {best_mv->row + ss[i].mv.row, + best_mv->col + ss[i].mv.col}; + if (is_mv_in(x, &mv)) { + int sad = fn_ptr->msdf(what->buf, what->stride, + best_address + ss[i].offset, in_what->stride, + mask, mask_stride); + if (sad < best_sad) { + sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit); + if (sad < best_sad) { + best_sad = sad; + best_site = i; + } + } + } + + i++; + } + + if (best_site != last_site) { + best_mv->row += ss[best_site].mv.row; + best_mv->col += ss[best_site].mv.col; + best_address += ss[best_site].offset; + last_site = best_site; +#if defined(NEW_DIAMOND_SEARCH) + while (1) { + const MV this_mv = {best_mv->row + ss[best_site].mv.row, + best_mv->col + ss[best_site].mv.col}; + if (is_mv_in(x, &this_mv)) { + int sad = fn_ptr->msdf(what->buf, what->stride, + best_address + ss[best_site].offset, + in_what->stride, mask, mask_stride); + if (sad < best_sad) { + sad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit); + if (sad < best_sad) { + best_sad = sad; + best_mv->row += ss[best_site].mv.row; + best_mv->col += ss[best_site].mv.col; + best_address += ss[best_site].offset; + continue; + } + } + } + break; + } +#endif + } else if (best_address == in_what_ref) { + (*num00)++; + } + } + return best_sad; +} + +int vp10_masked_full_pixel_diamond(const VP10_COMP *cpi, MACROBLOCK *x, + const uint8_t *mask, int mask_stride, + MV *mvp_full, int step_param, + int sadpb, int further_steps, int do_refine, + const vp10_variance_fn_ptr_t *fn_ptr, + const MV *ref_mv, MV *dst_mv, + int is_second) { + MV temp_mv; + int thissme, n, num00 = 0; + int bestsme = masked_diamond_search_sad(x, &cpi->ss_cfg, + mask, mask_stride, + mvp_full, &temp_mv, + step_param, sadpb, &n, + fn_ptr, ref_mv, is_second); + if (bestsme < INT_MAX) + bestsme = get_masked_mvpred_var(x, mask, mask_stride, &temp_mv, ref_mv, + fn_ptr, 1, is_second); + *dst_mv = temp_mv; + + // If there won't be more n-step search, check to see if refining search is + // needed. + if (n > further_steps) + do_refine = 0; + + while (n < further_steps) { + ++n; + + if (num00) { + num00--; + } else { + thissme = masked_diamond_search_sad(x, &cpi->ss_cfg, + mask, mask_stride, + mvp_full, &temp_mv, + step_param + n, sadpb, &num00, + fn_ptr, ref_mv, is_second); + if (thissme < INT_MAX) + thissme = get_masked_mvpred_var(x, mask, mask_stride, + &temp_mv, ref_mv, fn_ptr, 1, + is_second); + + // check to see if refining search is needed. + if (num00 > further_steps - n) + do_refine = 0; + + if (thissme < bestsme) { + bestsme = thissme; + *dst_mv = temp_mv; + } + } + } + + // final 1-away diamond refining search + if (do_refine) { + const int search_range = 8; + MV best_mv = *dst_mv; + thissme = masked_refining_search_sad(x, mask, mask_stride, + &best_mv, sadpb, search_range, + fn_ptr, ref_mv, is_second); + if (thissme < INT_MAX) + thissme = get_masked_mvpred_var(x, mask, mask_stride, + &best_mv, ref_mv, fn_ptr, 1, + is_second); + if (thissme < bestsme) { + bestsme = thissme; + *dst_mv = best_mv; + } + } + return bestsme; +} +#endif // CONFIG_EXT_INTER diff --git a/vp10/encoder/mcomp.h b/vp10/encoder/mcomp.h index 3063b996e7b095a6fd22a95bd687af20d3880575..a430c76c2d9e795bfdc84b91b2c631db76d4ff37 100644 --- a/vp10/encoder/mcomp.h +++ b/vp10/encoder/mcomp.h @@ -53,12 +53,12 @@ int vp10_mv_bit_cost(const MV *mv, const MV *ref, // Utility to compute variance + MV rate cost for a given MV int vp10_get_mvpred_var(const MACROBLOCK *x, const MV *best_mv, const MV *center_mv, - const vp9_variance_fn_ptr_t *vfp, + const vp10_variance_fn_ptr_t *vfp, int use_mvcost); int vp10_get_mvpred_av_var(const MACROBLOCK *x, const MV *best_mv, const MV *center_mv, const uint8_t *second_pred, - const vp9_variance_fn_ptr_t *vfp, + const vp10_variance_fn_ptr_t *vfp, int use_mvcost); struct VP10_COMP; @@ -69,7 +69,7 @@ int vp10_init_search_range(int size); int vp10_refining_search_sad(const struct macroblock *x, struct mv *ref_mv, int sad_per_bit, int distance, - const struct vp9_variance_vtable *fn_ptr, + const vp10_variance_fn_ptr_t *fn_ptr, const struct mv *center_mv); // Runs sequence of diamond searches in smaller steps for RD. @@ -77,7 +77,7 @@ int vp10_full_pixel_diamond(const struct VP10_COMP *cpi, MACROBLOCK *x, MV *mvp_full, int step_param, int sadpb, int further_steps, int do_refine, int *cost_list, - const vp9_variance_fn_ptr_t *fn_ptr, + const vp10_variance_fn_ptr_t *fn_ptr, const MV *ref_mv, MV *dst_mv); // Perform integral projection based motion estimation. @@ -93,7 +93,7 @@ typedef int (integer_mv_pattern_search_fn) ( int error_per_bit, int do_init_search, int *cost_list, - const vp9_variance_fn_ptr_t *vf, + const vp10_variance_fn_ptr_t *vf, int use_mvcost, const MV *center_mv, MV *best_mv); @@ -109,7 +109,7 @@ typedef int (fractional_mv_step_fp) ( MV *bestmv, const MV *ref_mv, int allow_hp, int error_per_bit, - const vp9_variance_fn_ptr_t *vfp, + const vp10_variance_fn_ptr_t *vfp, int forced_stop, // 0 - full, 1 - qtr only, 2 - half only int iters_per_step, int *cost_list, @@ -130,13 +130,13 @@ extern fractional_mv_step_fp vp10_find_best_sub_pixel_tree_pruned_evenmore; typedef int (*vp10_full_search_fn_t)(const MACROBLOCK *x, const MV *ref_mv, int sad_per_bit, int distance, - const vp9_variance_fn_ptr_t *fn_ptr, + const vp10_variance_fn_ptr_t *fn_ptr, const MV *center_mv, MV *best_mv); typedef int (*vp10_refining_search_fn_t)(const MACROBLOCK *x, MV *ref_mv, int sad_per_bit, int distance, - const vp9_variance_fn_ptr_t *fn_ptr, + const vp10_variance_fn_ptr_t *fn_ptr, const MV *center_mv); typedef int (*vp10_diamond_search_fn_t)(const MACROBLOCK *x, @@ -144,13 +144,13 @@ typedef int (*vp10_diamond_search_fn_t)(const MACROBLOCK *x, MV *ref_mv, MV *best_mv, int search_param, int sad_per_bit, int *num00, - const vp9_variance_fn_ptr_t *fn_ptr, + const vp10_variance_fn_ptr_t *fn_ptr, const MV *center_mv); int vp10_refining_search_8p_c(const MACROBLOCK *x, MV *ref_mv, int error_per_bit, int search_range, - const vp9_variance_fn_ptr_t *fn_ptr, + const vp10_variance_fn_ptr_t *fn_ptr, const MV *center_mv, const uint8_t *second_pred); struct VP10_COMP; @@ -162,6 +162,26 @@ int vp10_full_pixel_search(struct VP10_COMP *cpi, MACROBLOCK *x, const MV *ref_mv, MV *tmp_mv, int var_max, int rd); +#if CONFIG_EXT_INTER +int vp10_find_best_masked_sub_pixel_tree(const MACROBLOCK *x, + const uint8_t *mask, int mask_stride, + MV *bestmv, const MV *ref_mv, + int allow_hp, + int error_per_bit, + const vp10_variance_fn_ptr_t *vfp, + int forced_stop, + int iters_per_step, + int *mvjcost, int *mvcost[2], + int *distortion, + unsigned int *sse1, int is_second); +int vp10_masked_full_pixel_diamond(const struct VP10_COMP *cpi, MACROBLOCK *x, + const uint8_t *mask, int mask_stride, + MV *mvp_full, int step_param, + int sadpb, int further_steps, int do_refine, + const vp10_variance_fn_ptr_t *fn_ptr, + const MV *ref_mv, MV *dst_mv, + int is_second); +#endif // CONFIG_EXT_INTER #ifdef __cplusplus } // extern "C" #endif diff --git a/vp10/encoder/rdopt.c b/vp10/encoder/rdopt.c index 66261eaf993b9e885c9a1ca7c555c39f947678d2..c65bdf13fa55a24c3fa7707deebe6fed2912408a 100644 --- a/vp10/encoder/rdopt.c +++ b/vp10/encoder/rdopt.c @@ -44,10 +44,6 @@ #include "vp10/encoder/rdopt.h" #include "vp10/encoder/aq_variance.h" -// TODO(geza.lore) Update this when the extended coding unit size experiment -// have been ported. -#define CU_SIZE 64 - #if CONFIG_EXT_REFS #define LAST_FRAME_MODE_MASK ((1 << GOLDEN_FRAME) | (1 << ALTREF_FRAME) | \ @@ -4315,8 +4311,8 @@ static void joint_motion_search(VP10_COMP *cpi, MACROBLOCK *x, if (bsize >= BLOCK_8X8) #endif // CONFIG_EXT_INTER *rate_mv += vp10_mv_bit_cost(&frame_mv[refs[ref]].as_mv, - &x->mbmi_ext->ref_mvs[refs[ref]][0].as_mv, - x->nmvjointcost, x->mvcost, MV_COST_WEIGHT); + &x->mbmi_ext->ref_mvs[refs[ref]][0].as_mv, + x->nmvjointcost, x->mvcost, MV_COST_WEIGHT); #if CONFIG_EXT_INTER else *rate_mv += vp10_mv_bit_cost(&frame_mv[refs[ref]].as_mv, @@ -5117,6 +5113,7 @@ static void single_motion_search(VP10_COMP *cpi, MACROBLOCK *x, #else int ref = mbmi->ref_frame[0]; MV ref_mv = x->mbmi_ext->ref_mvs[ref][0].as_mv; + int ref_idx = 0; #endif // CONFIG_EXT_INTER int tmp_col_min = x->mv_col_min; @@ -5143,9 +5140,9 @@ static void single_motion_search(VP10_COMP *cpi, MACROBLOCK *x, // match the resolution of the current frame, allowing the existing // motion search code to be used without additional modifications. for (i = 0; i < MAX_MB_PLANE; i++) - backup_yv12[i] = xd->plane[i].pre[0]; + backup_yv12[i] = xd->plane[i].pre[ref_idx]; - vp10_setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL); + vp10_setup_pre_planes(xd, ref_idx, scaled_ref_frame, mi_row, mi_col, NULL); } vp10_set_mv_search_range(x, &ref_mv); @@ -5189,7 +5186,7 @@ static void single_motion_search(VP10_COMP *cpi, MACROBLOCK *x, if (scaled_ref_frame) { int i; for (i = 0; i < MAX_MB_PLANE; ++i) - xd->plane[i].pre[0] = backup_yv12[i]; + xd->plane[i].pre[ref_idx] = backup_yv12[i]; } return; } @@ -5203,8 +5200,8 @@ static void single_motion_search(VP10_COMP *cpi, MACROBLOCK *x, mvp_full.row >>= 3; bestsme = vp10_full_pixel_search(cpi, x, bsize, &mvp_full, step_param, sadpb, - cond_cost_list(cpi, cost_list), - &ref_mv, &tmp_mv->as_mv, INT_MAX, 1); + cond_cost_list(cpi, cost_list), + &ref_mv, &tmp_mv->as_mv, INT_MAX, 1); x->mv_col_min = tmp_col_min; x->mv_col_max = tmp_col_max; @@ -5218,11 +5215,11 @@ static void single_motion_search(VP10_COMP *cpi, MACROBLOCK *x, const int ph = 4 * num_4x4_blocks_high_lookup[bsize]; // Use up-sampled reference frames. struct macroblockd_plane *const pd = &xd->plane[0]; - struct buf_2d backup_pred = pd->pre[0]; + struct buf_2d backup_pred = pd->pre[ref_idx]; const YV12_BUFFER_CONFIG *upsampled_ref = get_upsampled_ref(cpi, ref); // Set pred for Y plane - setup_pred_plane(&pd->pre[0], upsampled_ref->y_buffer, + setup_pred_plane(&pd->pre[ref_idx], upsampled_ref->y_buffer, upsampled_ref->y_stride, (mi_row << 3), (mi_col << 3), NULL, pd->subsampling_x, pd->subsampling_y); @@ -5238,7 +5235,7 @@ static void single_motion_search(VP10_COMP *cpi, MACROBLOCK *x, pw, ph, 1); // Restore the reference frames. - pd->pre[0] = backup_pred; + pd->pre[ref_idx] = backup_pred; #else cpi->find_fractional_mv_step(x, &tmp_mv->as_mv, &ref_mv, cm->allow_high_precision_mv, @@ -5260,7 +5257,7 @@ static void single_motion_search(VP10_COMP *cpi, MACROBLOCK *x, if (scaled_ref_frame) { int i; for (i = 0; i < MAX_MB_PLANE; i++) - xd->plane[i].pre[0] = backup_yv12[i]; + xd->plane[i].pre[ref_idx] = backup_yv12[i]; } } @@ -5274,6 +5271,176 @@ static INLINE void restore_dst_buf(MACROBLOCKD *xd, } } +#if CONFIG_EXT_INTER +static void do_masked_motion_search(VP10_COMP *cpi, MACROBLOCK *x, + const uint8_t *mask, int mask_stride, + BLOCK_SIZE bsize, + int mi_row, int mi_col, + int_mv *tmp_mv, int *rate_mv, + int ref_idx, + int mv_idx) { + MACROBLOCKD *xd = &x->e_mbd; + const VP10_COMMON *cm = &cpi->common; + MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; + struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0, 0}}; + int bestsme = INT_MAX; + int step_param; + int sadpb = x->sadperbit16; + MV mvp_full; + int ref = mbmi->ref_frame[ref_idx]; + MV ref_mv = x->mbmi_ext->ref_mvs[ref][mv_idx].as_mv; + + int tmp_col_min = x->mv_col_min; + int tmp_col_max = x->mv_col_max; + int tmp_row_min = x->mv_row_min; + int tmp_row_max = x->mv_row_max; + + const YV12_BUFFER_CONFIG *scaled_ref_frame = + vp10_get_scaled_ref_frame(cpi, ref); + + MV pred_mv[3]; + pred_mv[0] = x->mbmi_ext->ref_mvs[ref][0].as_mv; + pred_mv[1] = x->mbmi_ext->ref_mvs[ref][1].as_mv; + pred_mv[2] = x->pred_mv[ref]; + +#if CONFIG_REF_MV + vp10_set_mvcost(x, ref); +#endif + + if (scaled_ref_frame) { + int i; + // Swap out the reference frame for a version that's been scaled to + // match the resolution of the current frame, allowing the existing + // motion search code to be used without additional modifications. + for (i = 0; i < MAX_MB_PLANE; i++) + backup_yv12[i] = xd->plane[i].pre[ref_idx]; + + vp10_setup_pre_planes(xd, ref_idx, scaled_ref_frame, mi_row, mi_col, NULL); + } + + vp10_set_mv_search_range(x, &ref_mv); + + // Work out the size of the first step in the mv step search. + // 0 here is maximum length first step. 1 is MAX >> 1 etc. + if (cpi->sf.mv.auto_mv_step_size && cm->show_frame) { + // Take wtd average of the step_params based on the last frame's + // max mv magnitude and that based on the best ref mvs of the current + // block for the given reference. + step_param = (vp10_init_search_range(x->max_mv_context[ref]) + + cpi->mv_step_param) / 2; + } else { + step_param = cpi->mv_step_param; + } + + // TODO(debargha): is show_frame needed here? + if (cpi->sf.adaptive_motion_search && bsize < BLOCK_LARGEST && + cm->show_frame) { + int boffset = 2 * (b_width_log2_lookup[BLOCK_LARGEST] - + VPXMIN(b_height_log2_lookup[bsize], b_width_log2_lookup[bsize])); + step_param = VPXMAX(step_param, boffset); + } + + if (cpi->sf.adaptive_motion_search) { + int bwl = b_width_log2_lookup[bsize]; + int bhl = b_height_log2_lookup[bsize]; + int tlevel = x->pred_mv_sad[ref] >> (bwl + bhl + 4); + + if (tlevel < 5) + step_param += 2; + + // prev_mv_sad is not setup for dynamically scaled frames. + if (cpi->oxcf.resize_mode != RESIZE_DYNAMIC) { + int i; + for (i = LAST_FRAME; i <= ALTREF_FRAME && cm->show_frame; ++i) { + if ((x->pred_mv_sad[ref] >> 3) > x->pred_mv_sad[i]) { + x->pred_mv[ref].row = 0; + x->pred_mv[ref].col = 0; + tmp_mv->as_int = INVALID_MV; + + if (scaled_ref_frame) { + int i; + for (i = 0; i < MAX_MB_PLANE; ++i) + xd->plane[i].pre[ref_idx] = backup_yv12[i]; + } + return; + } + } + } + } + + mvp_full = pred_mv[x->mv_best_ref_index[ref]]; + + mvp_full.col >>= 3; + mvp_full.row >>= 3; + + bestsme = vp10_masked_full_pixel_diamond(cpi, x, mask, mask_stride, + &mvp_full, step_param, sadpb, + MAX_MVSEARCH_STEPS - 1 - step_param, + 1, &cpi->fn_ptr[bsize], + &ref_mv, &tmp_mv->as_mv, ref_idx); + + x->mv_col_min = tmp_col_min; + x->mv_col_max = tmp_col_max; + x->mv_row_min = tmp_row_min; + x->mv_row_max = tmp_row_max; + + if (bestsme < INT_MAX) { + int dis; /* TODO: use dis in distortion calculation later. */ + vp10_find_best_masked_sub_pixel_tree(x, mask, mask_stride, + &tmp_mv->as_mv, &ref_mv, + cm->allow_high_precision_mv, + x->errorperbit, + &cpi->fn_ptr[bsize], + cpi->sf.mv.subpel_force_stop, + cpi->sf.mv.subpel_iters_per_step, + x->nmvjointcost, x->mvcost, + &dis, &x->pred_sse[ref], ref_idx); + } + *rate_mv = vp10_mv_bit_cost(&tmp_mv->as_mv, &ref_mv, + x->nmvjointcost, x->mvcost, MV_COST_WEIGHT); + + if (cpi->sf.adaptive_motion_search && cm->show_frame) + x->pred_mv[ref] = tmp_mv->as_mv; + + if (scaled_ref_frame) { + int i; + for (i = 0; i < MAX_MB_PLANE; i++) + xd->plane[i].pre[ref_idx] = backup_yv12[i]; + } +} + +static void do_masked_motion_search_indexed(VP10_COMP *cpi, MACROBLOCK *x, + int wedge_index, + BLOCK_SIZE bsize, + int mi_row, int mi_col, + int_mv *tmp_mv, int *rate_mv, + int mv_idx[2], + int which) { + // NOTE: which values: 0 - 0 only, 1 - 1 only, 2 - both + MACROBLOCKD *xd = &x->e_mbd; + MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; + BLOCK_SIZE sb_type = mbmi->sb_type; + int w = (4 << b_width_log2_lookup[sb_type]); + int h = (4 << b_height_log2_lookup[sb_type]); + const uint8_t *mask; + const int mask_stride = MASK_MASTER_STRIDE; + mask = vp10_get_soft_mask(wedge_index, sb_type, h, w); + + if (which == 0 || which == 2) + do_masked_motion_search(cpi, x, mask, mask_stride, bsize, + mi_row, mi_col, &tmp_mv[0], &rate_mv[0], + 0, mv_idx[0]); + + if (which == 1 || which == 2) { + // get the negative mask + mask = vp10_get_soft_mask(wedge_index ^ 1, sb_type, h, w); + do_masked_motion_search(cpi, x, mask, mask_stride, bsize, + mi_row, mi_col, &tmp_mv[1], &rate_mv[1], + 1, mv_idx[1]); + } +} +#endif // CONFIG_EXT_INTER + // In some situations we want to discount tha pparent cost of a new motion // vector. Where there is a subtle motion field and especially where there is // low spatial complexity then it can be hard to cover the cost of a new motion @@ -5306,6 +5473,7 @@ static INLINE void clamp_mv2(MV *mv, const MACROBLOCKD *xd) { xd->mb_to_top_edge - LEFT_TOP_MARGIN, xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN); } + static INTERP_FILTER predict_interp_filter(const VP10_COMP *cpi, const MACROBLOCK *x, const BLOCK_SIZE bsize, @@ -5434,6 +5602,7 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x, int_mv single_newmvs[2][MAX_REF_FRAMES], int single_newmvs_rate[2][MAX_REF_FRAMES], int *compmode_interintra_cost, + int *compmode_wedge_cost, #else int_mv single_newmv[MAX_REF_FRAMES], #endif // CONFIG_EXT_INTER @@ -5454,41 +5623,47 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x, int refs[2] = { mbmi->ref_frame[0], (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) }; int_mv cur_mv[2]; + int rate_mv = 0; #if CONFIG_EXT_INTER int mv_idx = (this_mode == NEWFROMNEARMV) ? 1 : 0; int_mv single_newmv[MAX_REF_FRAMES]; const int * const intra_mode_cost = cpi->mbmode_cost[size_group_lookup[bsize]]; const int is_comp_interintra_pred = (mbmi->ref_frame[1] == INTRA_FRAME); + const int tmp_buf_sz = CU_SIZE * CU_SIZE; #if CONFIG_REF_MV uint8_t ref_frame_type = vp10_ref_frame_type(mbmi->ref_frame); #endif #endif // CONFIG_EXT_INTER #if CONFIG_VP9_HIGHBITDEPTH - DECLARE_ALIGNED(16, uint16_t, tmp_buf16[MAX_MB_PLANE * 64 * 64]); + DECLARE_ALIGNED(16, uint16_t, tmp_buf16[MAX_MB_PLANE * CU_SIZE * CU_SIZE]); uint8_t *tmp_buf; #else - DECLARE_ALIGNED(16, uint8_t, tmp_buf[MAX_MB_PLANE * 64 * 64]); + DECLARE_ALIGNED(16, uint8_t, tmp_buf[MAX_MB_PLANE * CU_SIZE * CU_SIZE]); #endif // CONFIG_VP9_HIGHBITDEPTH + +#if CONFIG_OBMC + int allow_obmc = #if CONFIG_EXT_INTER - const int tmp_buf_sz = CU_SIZE * CU_SIZE; + !is_comp_interintra_pred && #endif // CONFIG_EXT_INTER -#if CONFIG_OBMC - int allow_obmc = is_obmc_allowed(mbmi); + is_obmc_allowed(mbmi); int best_obmc_flag = 0; #if CONFIG_VP9_HIGHBITDEPTH - DECLARE_ALIGNED(16, uint16_t, tmp_buf1_16[MAX_MB_PLANE * 64 * 64]); - DECLARE_ALIGNED(16, uint16_t, tmp_buf2_16[MAX_MB_PLANE * 64 * 64]); + DECLARE_ALIGNED(16, uint16_t, tmp_buf1_16[MAX_MB_PLANE * CU_SIZE * CU_SIZE]); + DECLARE_ALIGNED(16, uint16_t, tmp_buf2_16[MAX_MB_PLANE * CU_SIZE * CU_SIZE]); uint8_t *tmp_buf1, *tmp_buf2; uint8_t *obmc_tmp_buf1[3]; uint8_t *obmc_tmp_buf2[3]; #else - DECLARE_ALIGNED(16, uint8_t, tmp_buf1[MAX_MB_PLANE * 64 * 64]); - DECLARE_ALIGNED(16, uint8_t, tmp_buf2[MAX_MB_PLANE * 64 * 64]); - uint8_t *obmc_tmp_buf1[3] = {tmp_buf1, tmp_buf1 + 4096, tmp_buf1 + 8192}; - uint8_t *obmc_tmp_buf2[3] = {tmp_buf2, tmp_buf2 + 4096, tmp_buf2 + 8192}; + DECLARE_ALIGNED(16, uint8_t, tmp_buf1[MAX_MB_PLANE * CU_SIZE * CU_SIZE]); + DECLARE_ALIGNED(16, uint8_t, tmp_buf2[MAX_MB_PLANE * CU_SIZE * CU_SIZE]); + uint8_t *obmc_tmp_buf1[3] = {tmp_buf1, tmp_buf1 + CU_SIZE * CU_SIZE, + tmp_buf1 + CU_SIZE * CU_SIZE * 2}; + uint8_t *obmc_tmp_buf2[3] = {tmp_buf2, tmp_buf2 + CU_SIZE * CU_SIZE, + tmp_buf2 + CU_SIZE * CU_SIZE * 2}; #endif // CONFIG_VP9_HIGHBITDEPTH - int obmc_tmp_stride[3] = {64, 64, 64}; + int obmc_tmp_stride[3] = {CU_SIZE, CU_SIZE, CU_SIZE}; uint8_t skip_txfm_bestfilter[2][MAX_MB_PLANE << 2] = {{0}, {0}}; int64_t bsse_bestfilter[2][MAX_MB_PLANE << 2] = {{0}, {0}}; @@ -5507,6 +5682,7 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x, unsigned int best_pred_var = UINT_MAX; MB_MODE_INFO best_mbmi; #endif // CONFIG_OBMC + int pred_exists = 0; int intpel_mv; int64_t rd, tmp_rd, best_rd = INT64_MAX; @@ -5525,6 +5701,9 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x, #if CONFIG_EXT_INTER *compmode_interintra_cost = 0; + mbmi->use_wedge_interintra = 0; + *compmode_wedge_cost = 0; + mbmi->use_wedge_interinter = 0; // is_comp_interintra_pred implies !is_comp_pred assert(!is_comp_interintra_pred || (!is_comp_pred)); @@ -5575,12 +5754,7 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x, return INT64_MAX; } -#if CONFIG_EXT_INTER if (have_newmv_in_inter_mode(this_mode)) { -#else - if (this_mode == NEWMV) { -#endif // CONFIG_EXT_INTER - int rate_mv; if (is_comp_pred) { #if CONFIG_EXT_INTER for (i = 0; i < 2; ++i) { @@ -5767,7 +5941,7 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x, } } #endif // CONFIG_EXT_INTER -#endif +#endif // CONFIG_REF_MV // do first prediction into the destination buffer. Do the next // prediction into a temporary buffer. Then keep track of which one @@ -5805,10 +5979,11 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x, if (RDCOST(x->rdmult, x->rddiv, *rate2, 0) > ref_best_rd && #if CONFIG_EXT_INTER - mbmi->mode != NEARESTMV && mbmi->mode != NEAREST_NEARESTMV) + mbmi->mode != NEARESTMV && mbmi->mode != NEAREST_NEARESTMV #else - mbmi->mode != NEARESTMV) + mbmi->mode != NEARESTMV #endif // CONFIG_EXT_INTER + ) return INT64_MAX; pred_exists = 0; @@ -6001,6 +6176,8 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x, skip_sse_sb = tmp_skip_sse; memcpy(skip_txfm, x->skip_txfm, sizeof(skip_txfm)); memcpy(bsse, x->bsse, sizeof(bsse)); + } else { + pred_exists = 0; } } restore_dst_buf(xd, orig_dst, orig_dst_stride); @@ -6012,12 +6189,169 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x, rs = cm->interp_filter == SWITCHABLE ? vp10_get_switchable_rate(cpi, xd) : 0; #if CONFIG_EXT_INTER + if (is_comp_pred && get_wedge_bits(bsize)) { + int wedge_index, best_wedge_index = WEDGE_NONE, rs; + int rate_sum; + int64_t dist_sum; + int64_t best_rd_nowedge = INT64_MAX; + int64_t best_rd_wedge = INT64_MAX; + int wedge_types; + int tmp_skip_txfm_sb; + int64_t tmp_skip_sse_sb; + rs = vp10_cost_bit(cm->fc->wedge_interinter_prob[bsize], 0); + vp10_build_inter_predictors_sb(xd, mi_row, mi_col, bsize); + model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum, + &tmp_skip_txfm_sb, &tmp_skip_sse_sb); + rd = RDCOST(x->rdmult, x->rddiv, rs + rate_mv + rate_sum, dist_sum); + best_rd_nowedge = rd; + mbmi->use_wedge_interinter = 1; + rs = get_wedge_bits(bsize) * 256 + + vp10_cost_bit(cm->fc->wedge_interinter_prob[bsize], 1); + wedge_types = (1 << get_wedge_bits(bsize)); + if (have_newmv_in_inter_mode(this_mode)) { + int_mv tmp_mv[2]; + int rate_mvs[2], tmp_rate_mv = 0; + uint8_t pred0[2 * CU_SIZE * CU_SIZE * 3]; + uint8_t pred1[2 * CU_SIZE * CU_SIZE * 3]; + uint8_t *preds0[3] = {pred0, + pred0 + 2 * CU_SIZE * CU_SIZE, + pred0 + 4 * CU_SIZE * CU_SIZE}; + uint8_t *preds1[3] = {pred1, + pred1 + 2 * CU_SIZE * CU_SIZE, + pred1 + 4 * CU_SIZE * CU_SIZE}; + int strides[3] = {CU_SIZE, CU_SIZE, CU_SIZE}; + vp10_build_inter_predictors_for_planes_single_buf( + xd, bsize, mi_row, mi_col, 0, preds0, strides); + vp10_build_inter_predictors_for_planes_single_buf( + xd, bsize, mi_row, mi_col, 1, preds1, strides); + + for (wedge_index = 0; wedge_index < wedge_types; ++wedge_index) { + mbmi->interinter_wedge_index = wedge_index; + vp10_build_wedge_inter_predictor_from_buf(xd, bsize, mi_row, mi_col, + preds0, strides, + preds1, strides); + model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum, + &tmp_skip_txfm_sb, &tmp_skip_sse_sb); + rd = RDCOST(x->rdmult, x->rddiv, rs + rate_mv + rate_sum, dist_sum); + if (rd < best_rd_wedge) { + best_wedge_index = wedge_index; + best_rd_wedge = rd; + } + } + mbmi->interinter_wedge_index = best_wedge_index; + if (this_mode == NEW_NEWMV) { + int mv_idxs[2] = {0, 0}; + do_masked_motion_search_indexed(cpi, x, mbmi->interinter_wedge_index, + bsize, mi_row, mi_col, tmp_mv, rate_mvs, + mv_idxs, 2); + tmp_rate_mv = rate_mvs[0] + rate_mvs[1]; + mbmi->mv[0].as_int = tmp_mv[0].as_int; + mbmi->mv[1].as_int = tmp_mv[1].as_int; + } else if (this_mode == NEW_NEARESTMV || this_mode == NEW_NEARMV) { + int mv_idxs[2] = {0, 0}; + do_masked_motion_search_indexed(cpi, x, mbmi->interinter_wedge_index, + bsize, mi_row, mi_col, tmp_mv, rate_mvs, + mv_idxs, 0); + tmp_rate_mv = rate_mvs[0]; + mbmi->mv[0].as_int = tmp_mv[0].as_int; + } else if (this_mode == NEAREST_NEWMV || this_mode == NEAR_NEWMV) { + int mv_idxs[2] = {0, 0}; + do_masked_motion_search_indexed(cpi, x, mbmi->interinter_wedge_index, + bsize, mi_row, mi_col, tmp_mv, rate_mvs, + mv_idxs, 1); + tmp_rate_mv = rate_mvs[1]; + mbmi->mv[1].as_int = tmp_mv[1].as_int; + } + vp10_build_inter_predictors_sb(xd, mi_row, mi_col, bsize); + model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum, + &tmp_skip_txfm_sb, &tmp_skip_sse_sb); + rd = RDCOST(x->rdmult, x->rddiv, rs + tmp_rate_mv + rate_sum, dist_sum); + if (rd < best_rd_wedge) { + best_rd_wedge = rd; + } else { + mbmi->mv[0].as_int = cur_mv[0].as_int; + mbmi->mv[1].as_int = cur_mv[1].as_int; + tmp_rate_mv = rate_mv; + } + if (best_rd_wedge < best_rd_nowedge) { + mbmi->use_wedge_interinter = 1; + mbmi->interinter_wedge_index = best_wedge_index; + xd->mi[0]->bmi[0].as_mv[0].as_int = mbmi->mv[0].as_int; + xd->mi[0]->bmi[0].as_mv[1].as_int = mbmi->mv[1].as_int; + *rate2 += tmp_rate_mv - rate_mv; + rate_mv = tmp_rate_mv; + } else { + mbmi->use_wedge_interinter = 0; + mbmi->mv[0].as_int = cur_mv[0].as_int; + mbmi->mv[1].as_int = cur_mv[1].as_int; + } + } else { + uint8_t pred0[2 * CU_SIZE * CU_SIZE * 3]; + uint8_t pred1[2 * CU_SIZE * CU_SIZE * 3]; + uint8_t *preds0[3] = {pred0, + pred0 + 2 * CU_SIZE * CU_SIZE, + pred0 + 4 * CU_SIZE * CU_SIZE}; + uint8_t *preds1[3] = {pred1, + pred1 + 2 * CU_SIZE * CU_SIZE, + pred1 + 4 * CU_SIZE * CU_SIZE}; + int strides[3] = {CU_SIZE, CU_SIZE, CU_SIZE}; + vp10_build_inter_predictors_for_planes_single_buf( + xd, bsize, mi_row, mi_col, 0, preds0, strides); + vp10_build_inter_predictors_for_planes_single_buf( + xd, bsize, mi_row, mi_col, 1, preds1, strides); + for (wedge_index = 0; wedge_index < wedge_types; ++wedge_index) { + mbmi->interinter_wedge_index = wedge_index; + // vp10_build_inter_predictors_sb(xd, mi_row, mi_col, bsize); + vp10_build_wedge_inter_predictor_from_buf(xd, bsize, mi_row, mi_col, + preds0, strides, + preds1, strides); + model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum, + &tmp_skip_txfm_sb, &tmp_skip_sse_sb); + rd = RDCOST(x->rdmult, x->rddiv, rs + rate_mv + rate_sum, dist_sum); + if (rd < best_rd_wedge) { + best_wedge_index = wedge_index; + best_rd_wedge = rd; + } + } + if (best_rd_wedge < best_rd_nowedge) { + mbmi->use_wedge_interinter = 1; + mbmi->interinter_wedge_index = best_wedge_index; + } else { + mbmi->use_wedge_interinter = 0; + } + } +#if CONFIG_OBMC + if (mbmi->use_wedge_interinter) + allow_obmc = 0; +#endif // CONFIG_OBMC + if (ref_best_rd < INT64_MAX && + VPXMIN(best_rd_wedge, best_rd_nowedge) / 2 > ref_best_rd) + return INT64_MAX; + + pred_exists = 0; + tmp_rd = VPXMIN(best_rd_wedge, best_rd_nowedge); + if (mbmi->use_wedge_interinter) + *compmode_wedge_cost = get_wedge_bits(bsize) * 256 + + vp10_cost_bit(cm->fc->wedge_interinter_prob[bsize], 1); + else + *compmode_wedge_cost = + vp10_cost_bit(cm->fc->wedge_interinter_prob[bsize], 0); + } + if (is_comp_interintra_pred) { PREDICTION_MODE interintra_mode, best_interintra_mode = DC_PRED; int64_t best_interintra_rd = INT64_MAX; int rmode, rate_sum; int64_t dist_sum; int j; + int wedge_bits, wedge_types, wedge_index, best_wedge_index = -1; + int64_t best_interintra_rd_nowedge = INT64_MAX; + int64_t best_interintra_rd_wedge = INT64_MAX; + int rwedge; + int bw = 4 << b_width_log2_lookup[mbmi->sb_type], + bh = 4 << b_height_log2_lookup[mbmi->sb_type]; + int_mv tmp_mv; + int tmp_rate_mv = 0; mbmi->ref_frame[1] = NONE; for (j = 0; j < MAX_MB_PLANE; j++) { xd->plane[j].dst.buf = tmp_buf + j * tmp_buf_sz; @@ -6033,16 +6367,16 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x, mbmi->interintra_uv_mode = interintra_mode; rmode = intra_mode_cost[mbmi->interintra_mode]; vp10_build_interintra_predictors(xd, - tmp_buf, - tmp_buf + tmp_buf_sz, - tmp_buf + 2 * tmp_buf_sz, - CU_SIZE, - CU_SIZE, - CU_SIZE, - bsize); + tmp_buf, + tmp_buf + tmp_buf_sz, + tmp_buf + 2 * tmp_buf_sz, + CU_SIZE, + CU_SIZE, + CU_SIZE, + bsize); model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum, &skip_txfm_sb, &skip_sse_sb); - rd = RDCOST(x->rdmult, x->rddiv, rmode + rate_sum, dist_sum); + rd = RDCOST(x->rdmult, x->rddiv, rate_mv + rmode + rate_sum, dist_sum); if (rd < best_interintra_rd) { best_interintra_rd = rd; best_interintra_mode = interintra_mode; @@ -6054,17 +6388,112 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x, best_interintra_rd / 2 > ref_best_rd) { return INT64_MAX; } + wedge_bits = get_wedge_bits(bsize); + rmode = intra_mode_cost[mbmi->interintra_mode]; + if (wedge_bits) { + vp10_build_interintra_predictors(xd, + tmp_buf, + tmp_buf + tmp_buf_sz, + tmp_buf + 2 * tmp_buf_sz, + CU_SIZE, + CU_SIZE, + CU_SIZE, + bsize); + model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum, + &skip_txfm_sb, &skip_sse_sb); + rwedge = vp10_cost_bit(cm->fc->wedge_interintra_prob[bsize], 0); + rd = RDCOST(x->rdmult, x->rddiv, + rmode + rate_mv + rwedge + rate_sum, dist_sum); + best_interintra_rd_nowedge = rd; + + mbmi->use_wedge_interintra = 1; + rwedge = wedge_bits * 256 + + vp10_cost_bit(cm->fc->wedge_interintra_prob[bsize], 1); + wedge_types = (1 << wedge_bits); + for (wedge_index = 0; wedge_index < wedge_types; ++wedge_index) { + mbmi->interintra_wedge_index = wedge_index; + mbmi->interintra_uv_wedge_index = wedge_index; + vp10_build_interintra_predictors(xd, + tmp_buf, + tmp_buf + tmp_buf_sz, + tmp_buf + 2 * tmp_buf_sz, + CU_SIZE, + CU_SIZE, + CU_SIZE, + bsize); + model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum, + &skip_txfm_sb, &skip_sse_sb); + rd = RDCOST(x->rdmult, x->rddiv, + rmode + rate_mv + rwedge + rate_sum, dist_sum); + if (rd < best_interintra_rd_wedge) { + best_interintra_rd_wedge = rd; + best_wedge_index = wedge_index; + } + } + // Refine motion vector. + if (have_newmv_in_inter_mode(this_mode)) { + // get negative of mask + const uint8_t* mask = vp10_get_soft_mask( + best_wedge_index ^ 1, bsize, bh, bw); + mbmi->interintra_wedge_index = best_wedge_index; + mbmi->interintra_uv_wedge_index = best_wedge_index; + do_masked_motion_search(cpi, x, mask, MASK_MASTER_STRIDE, bsize, + mi_row, mi_col, &tmp_mv, &tmp_rate_mv, + 0, mv_idx); + mbmi->mv[0].as_int = tmp_mv.as_int; + vp10_build_inter_predictors_sb(xd, mi_row, mi_col, bsize); + model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum, + &skip_txfm_sb, &skip_sse_sb); + rd = RDCOST(x->rdmult, x->rddiv, + rmode + tmp_rate_mv + rwedge + rate_sum, dist_sum); + if (rd < best_interintra_rd_wedge) { + best_interintra_rd_wedge = rd; + } else { + tmp_mv.as_int = cur_mv[0].as_int; + tmp_rate_mv = rate_mv; + } + } else { + tmp_mv.as_int = cur_mv[0].as_int; + tmp_rate_mv = rate_mv; + } + if (best_interintra_rd_wedge < best_interintra_rd_nowedge) { + mbmi->use_wedge_interintra = 1; + mbmi->interintra_wedge_index = best_wedge_index; + mbmi->interintra_uv_wedge_index = best_wedge_index; + best_interintra_rd = best_interintra_rd_wedge; + mbmi->mv[0].as_int = tmp_mv.as_int; + *rate2 += tmp_rate_mv - rate_mv; + rate_mv = tmp_rate_mv; + } else { + mbmi->use_wedge_interintra = 0; + best_interintra_rd = best_interintra_rd_nowedge; + mbmi->mv[0].as_int = cur_mv[0].as_int; + } + } pred_exists = 0; tmp_rd = best_interintra_rd; - *compmode_interintra_cost = - vp10_cost_bit(cm->fc->interintra_prob[bsize], 1); + vp10_cost_bit(cm->fc->interintra_prob[bsize], 1); *compmode_interintra_cost += intra_mode_cost[mbmi->interintra_mode]; + if (get_wedge_bits(bsize)) { + *compmode_interintra_cost += vp10_cost_bit( + cm->fc->wedge_interintra_prob[bsize], mbmi->use_wedge_interintra); + if (mbmi->use_wedge_interintra) { + *compmode_interintra_cost += get_wedge_bits(bsize) * 256; + } + } } else if (is_interintra_allowed(mbmi)) { *compmode_interintra_cost = vp10_cost_bit(cm->fc->interintra_prob[bsize], 0); } + +#if CONFIG_EXT_INTERP + if (!vp10_is_interp_needed(xd) && cm->interp_filter == SWITCHABLE) { + mbmi->interp_filter = EIGHTTAP_REGULAR; + pred_exists = 0; + } +#endif // CONFIG_EXT_INTERP #endif // CONFIG_EXT_INTER #if CONFIG_OBMC @@ -6811,17 +7240,15 @@ void vp10_rd_pick_inter_mode_sb(VP10_COMP *cpi, int_mv *const candidates = x->mbmi_ext->ref_mvs[ref_frame]; x->mbmi_ext->mode_context[ref_frame] = 0; vp10_find_mv_refs(cm, xd, mi, ref_frame, -#if CONFIG_REF_MV &mbmi_ext->ref_mv_count[ref_frame], mbmi_ext->ref_mv_stack[ref_frame], #if CONFIG_EXT_INTER mbmi_ext->compound_mode_context, #endif // CONFIG_EXT_INTER -#endif candidates, mi_row, mi_col, NULL, NULL, mbmi_ext->mode_context); } -#endif +#endif // CONFIG_REF_MV #if CONFIG_OBMC vp10_build_prediction_by_above_preds(cpi, xd, mi_row, mi_col, dst_buf1, @@ -6947,6 +7374,7 @@ void vp10_rd_pick_inter_mode_sb(VP10_COMP *cpi, int compmode_cost = 0; #if CONFIG_EXT_INTER int compmode_interintra_cost = 0; + int compmode_wedge_cost = 0; #endif // CONFIG_EXT_INTER int rate2 = 0, rate_y = 0, rate_uv = 0; int64_t distortion2 = 0, distortion_y = 0, distortion_uv = 0; @@ -7335,6 +7763,7 @@ void vp10_rd_pick_inter_mode_sb(VP10_COMP *cpi, single_newmvs, single_newmvs_rate, &compmode_interintra_cost, + &compmode_wedge_cost, #else single_newmv, #endif // CONFIG_EXT_INTER @@ -7401,6 +7830,7 @@ void vp10_rd_pick_inter_mode_sb(VP10_COMP *cpi, int dummy_single_newmvs_rate[2][MAX_REF_FRAMES] = { { 0 }, { 0 } }; int dummy_compmode_interintra_cost = 0; + int dummy_compmode_wedge_cost = 0; #else int_mv dummy_single_newmv[MAX_REF_FRAMES] = { { 0 } }; #endif @@ -7420,6 +7850,7 @@ void vp10_rd_pick_inter_mode_sb(VP10_COMP *cpi, dummy_single_newmvs, dummy_single_newmvs_rate, &dummy_compmode_interintra_cost, + &dummy_compmode_wedge_cost, #else dummy_single_newmv, #endif @@ -7496,6 +7927,8 @@ void vp10_rd_pick_inter_mode_sb(VP10_COMP *cpi, #if CONFIG_EXT_INTER rate2 += compmode_interintra_cost; + if (cm->reference_mode != SINGLE_REFERENCE && comp_pred) + rate2 += compmode_wedge_cost; #endif // CONFIG_EXT_INTER // Estimate the reference frame signaling cost and add it @@ -8112,6 +8545,10 @@ void vp10_rd_pick_inter_mode_sub8x8(struct VP10_COMP *cpi, #if CONFIG_OBMC mbmi->obmc = 0; #endif // CONFIG_OBMC +#if CONFIG_EXT_INTER + mbmi->use_wedge_interinter = 0; + mbmi->use_wedge_interintra = 0; +#endif // CONFIG_EXT_INTER for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) filter_cache[i] = INT64_MAX; @@ -8859,7 +9296,7 @@ void vp10_build_prediction_by_above_preds(VP10_COMP *cpi, mi_step = VPXMIN(xd->n8_w, num_8x8_blocks_wide_lookup[above_mbmi->sb_type]); - if (!is_inter_block(above_mbmi)) + if (!is_neighbor_overlappable(above_mbmi)) continue; for (j = 0; j < MAX_MB_PLANE; ++j) { @@ -8869,14 +9306,27 @@ void vp10_build_prediction_by_above_preds(VP10_COMP *cpi, 0, i, NULL, pd->subsampling_x, pd->subsampling_y); } + /* set_ref_ptrs(cm, xd, above_mbmi->ref_frame[0], above_mbmi->ref_frame[1]); for (ref = 0; ref < 1 + has_second_ref(above_mbmi); ++ref) { - YV12_BUFFER_CONFIG *cfg = get_ref_frame_buffer(cpi, - above_mbmi->ref_frame[ref]); + YV12_BUFFER_CONFIG *cfg = get_ref_frame_buffer( + cpi, above_mbmi->ref_frame[ref]); assert(cfg != NULL); vp10_setup_pre_planes(xd, ref, cfg, mi_row, mi_col + i, &xd->block_refs[ref]->sf); } + */ + for (ref = 0; ref < 1 + has_second_ref(above_mbmi); ++ref) { + MV_REFERENCE_FRAME frame = above_mbmi->ref_frame[ref]; + RefBuffer *ref_buf = &cm->frame_refs[frame - LAST_FRAME]; + + xd->block_refs[ref] = ref_buf; + if ((!vp10_is_valid_scale(&ref_buf->sf))) + vpx_internal_error(xd->error_info, VPX_CODEC_UNSUP_BITSTREAM, + "Reference frame has invalid dimensions"); + vp10_setup_pre_planes(xd, ref, ref_buf->buf, mi_row, mi_col + i, + &ref_buf->sf); + } xd->mb_to_left_edge = -(((mi_col + i) * MI_SIZE) * 8); mi_x = (mi_col + i) << MI_SIZE_LOG2; @@ -8905,11 +9355,19 @@ void vp10_build_prediction_by_above_preds(VP10_COMP *cpi, build_inter_predictors(xd, j, mi_col_offset, mi_row_offset, y * 2 + x, bw, bh, - 4 * x, 0, pw, bh, mi_x, mi_y); + 4 * x, 0, pw, bh, +#if CONFIG_SUPERTX && CONFIG_EXT_INTER + 0, 0, +#endif // CONFIG_SUPERTX && CONFIG_EXT_INTER + mi_x, mi_y); } } else { - build_inter_predictors(xd, j, mi_col_offset, mi_row_offset, 0, - bw, bh, 0, 0, bw, bh, mi_x, mi_y); + build_inter_predictors(xd, j, mi_col_offset, mi_row_offset, + 0, bw, bh, 0, 0, bw, bh, +#if CONFIG_SUPERTX && CONFIG_EXT_INTER + 0, 0, +#endif // CONFIG_SUPERTX && CONFIG_EXT_INTER + mi_x, mi_y); } } } @@ -8937,11 +9395,12 @@ void vp10_build_prediction_by_left_preds(VP10_COMP *cpi, MODE_INFO *left_mi = xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride]; MB_MODE_INFO *left_mbmi = &left_mi->mbmi; + const int is_compound = has_second_ref(left_mbmi); mi_step = VPXMIN(xd->n8_h, num_8x8_blocks_high_lookup[left_mbmi->sb_type]); - if (!is_inter_block(left_mbmi)) + if (!is_neighbor_overlappable(left_mbmi)) continue; for (j = 0; j < MAX_MB_PLANE; ++j) { @@ -8951,6 +9410,7 @@ void vp10_build_prediction_by_left_preds(VP10_COMP *cpi, i, 0, NULL, pd->subsampling_x, pd->subsampling_y); } + /* set_ref_ptrs(cm, xd, left_mbmi->ref_frame[0], left_mbmi->ref_frame[1]); for (ref = 0; ref < 1 + has_second_ref(left_mbmi); ++ref) { YV12_BUFFER_CONFIG *cfg = get_ref_frame_buffer(cpi, @@ -8959,6 +9419,18 @@ void vp10_build_prediction_by_left_preds(VP10_COMP *cpi, vp10_setup_pre_planes(xd, ref, cfg, mi_row + i, mi_col, &xd->block_refs[ref]->sf); } + */ + for (ref = 0; ref < 1 + is_compound; ++ref) { + MV_REFERENCE_FRAME frame = left_mbmi->ref_frame[ref]; + RefBuffer *ref_buf = &cm->frame_refs[frame - LAST_FRAME]; + + xd->block_refs[ref] = ref_buf; + if ((!vp10_is_valid_scale(&ref_buf->sf))) + vpx_internal_error(xd->error_info, VPX_CODEC_UNSUP_BITSTREAM, + "Reference frame has invalid dimensions"); + vp10_setup_pre_planes(xd, ref, ref_buf->buf, mi_row + i, mi_col, + &ref_buf->sf); + } xd->mb_to_top_edge = -(((mi_row + i) * MI_SIZE) * 8); mi_x = mi_col << MI_SIZE_LOG2; @@ -8987,11 +9459,19 @@ void vp10_build_prediction_by_left_preds(VP10_COMP *cpi, build_inter_predictors(xd, j, mi_col_offset, mi_row_offset, y * 2 + x, bw, bh, - 0, 4 * y, bw, ph, mi_x, mi_y); + 0, 4 * y, bw, ph, +#if CONFIG_SUPERTX && CONFIG_EXT_INTER + 0, 0, +#endif // CONFIG_SUPERTX && CONFIG_EXT_INTER + mi_x, mi_y); } } else { build_inter_predictors(xd, j, mi_col_offset, mi_row_offset, 0, - bw, bh, 0, 0, bw, bh, mi_x, mi_y); + bw, bh, 0, 0, bw, bh, +#if CONFIG_SUPERTX && CONFIG_EXT_INTER + 0, 0, +#endif // CONFIG_SUPERTX && CONFIG_EXT_INTER + mi_x, mi_y); } } } diff --git a/vpx_dsp/variance.h b/vpx_dsp/variance.h index cd0fd98785b0bccbcf875251bb77c2a93fa6fd1d..161d6474d0de6d71f26c2b1076f066f3823ecae0 100644 --- a/vpx_dsp/variance.h +++ b/vpx_dsp/variance.h @@ -74,7 +74,32 @@ typedef struct variance_vtable { } vp8_variance_fn_ptr_t; #endif // CONFIG_VP8 -#if CONFIG_VP9 || CONFIG_VP10 +#if CONFIG_VP10 && CONFIG_EXT_INTER +typedef unsigned int(*vpx_masked_sad_fn_t)(const uint8_t *src_ptr, + int source_stride, + const uint8_t *ref_ptr, + int ref_stride, + const uint8_t *msk_ptr, + int msk_stride); +typedef unsigned int (*vpx_masked_variance_fn_t)(const uint8_t *src_ptr, + int source_stride, + const uint8_t *ref_ptr, + int ref_stride, + const uint8_t *msk_ptr, + int msk_stride, + unsigned int *sse); +typedef unsigned int (*vpx_masked_subpixvariance_fn_t)(const uint8_t *src_ptr, + int source_stride, + int xoffset, + int yoffset, + const uint8_t *ref_ptr, + int Refstride, + const uint8_t *msk_ptr, + int msk_stride, + unsigned int *sse); +#endif // CONFIG_VP10 && CONFIG_EXT_INTER + +#if CONFIG_VP9 typedef struct vp9_variance_vtable { vpx_sad_fn_t sdf; vpx_sad_avg_fn_t sdaf; @@ -85,7 +110,25 @@ typedef struct vp9_variance_vtable { vpx_sad_multi_fn_t sdx8f; vpx_sad_multi_d_fn_t sdx4df; } vp9_variance_fn_ptr_t; -#endif // CONFIG_VP9 || CONFIG_VP10 +#endif // CONFIG_VP9 + +#if CONFIG_VP10 +typedef struct vp10_variance_vtable { + vpx_sad_fn_t sdf; + vpx_sad_avg_fn_t sdaf; + vpx_variance_fn_t vf; + vpx_subpixvariance_fn_t svf; + vpx_subp_avg_variance_fn_t svaf; + vpx_sad_multi_fn_t sdx3f; + vpx_sad_multi_fn_t sdx8f; + vpx_sad_multi_d_fn_t sdx4df; +#if CONFIG_EXT_INTER + vpx_masked_sad_fn_t msdf; + vpx_masked_variance_fn_t mvf; + vpx_masked_subpixvariance_fn_t msvf; +#endif // CONFIG_EXT_INTER +} vp10_variance_fn_ptr_t; +#endif // CONFIG_VP10 #ifdef __cplusplus } // extern "C"