/* * Copyright (c) 2016, Alliance for Open Media. All rights reserved * * This source code is subject to the terms of the BSD 2 Clause License and * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License * was not distributed with this source code in the LICENSE file, you can * obtain it at www.aomedia.org/license/software. If the Alliance for Open * Media Patent License 1.0 was not distributed with this source code in the * PATENTS file, you can obtain it at www.aomedia.org/license/patent. */ #include #include "./aom_scale_rtcd.h" #include "./aom_dsp_rtcd.h" #include "./aom_config.h" #include "aom/aom_integer.h" #include "aom_dsp/blend.h" #include "av1/common/blockd.h" #include "av1/common/reconinter.h" #include "av1/common/reconintra.h" #if CONFIG_MOTION_VAR #include "av1/common/onyxc_int.h" #endif // CONFIG_MOTION_VAR #if CONFIG_EXT_INTER #define NSMOOTHERS 1 #define USE_SOFT_WEIGHTS_IN_WEDGE 1 static int get_masked_weight(int m, int smoothness) { #define SMOOTHER_LEN 32 static const uint8_t smoothfn[NSMOOTHERS][2 * SMOOTHER_LEN + 1] = { { #if USE_SOFT_WEIGHTS_IN_WEDGE 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 4, 7, 13, 21, 32, 43, 51, 57, 60, 62, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, #else 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 64, 32, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, #endif // USE_SOFT_WEIGHTS_IN_WEDGE } }; if (m < -SMOOTHER_LEN) return 0; else if (m > SMOOTHER_LEN) return (1 << WEDGE_WEIGHT_BITS); else return smoothfn[smoothness][m + SMOOTHER_LEN]; } // [smoother][negative][direction] DECLARE_ALIGNED(16, static uint8_t, wedge_mask_obl[NSMOOTHERS][2][WEDGE_DIRECTIONS] [MASK_MASTER_SIZE * MASK_MASTER_SIZE]); DECLARE_ALIGNED(16, static uint8_t, wedge_signflip_lookup[BLOCK_SIZES][MAX_WEDGE_TYPES]); // 3 * MAX_WEDGE_SQUARE is an easy to compute and fairly tight upper bound // on the sum of all mask sizes up to an including MAX_WEDGE_SQUARE. DECLARE_ALIGNED(16, static uint8_t, wedge_mask_buf[2 * MAX_WEDGE_TYPES * 3 * MAX_WEDGE_SQUARE]); static wedge_masks_type wedge_masks[BLOCK_SIZES][2]; // Some unused wedge codebooks left temporarily to facilitate experiments. // To be removed when settled. /* static wedge_code_type wedge_codebook_8_hgtw[8] = { { WEDGE_OBLIQUE27, 4, 4 }, { WEDGE_OBLIQUE63, 4, 4 }, { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 }, { WEDGE_OBLIQUE27, 4, 2 }, { WEDGE_OBLIQUE27, 4, 6 }, { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 }, }; static wedge_code_type wedge_codebook_8_hltw[8] = { { WEDGE_OBLIQUE27, 4, 4 }, { WEDGE_OBLIQUE63, 4, 4 }, { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 }, { WEDGE_OBLIQUE63, 2, 4 }, { WEDGE_OBLIQUE63, 6, 4 }, { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 }, }; static wedge_code_type wedge_codebook_8_heqw[8] = { { WEDGE_OBLIQUE27, 4, 4 }, { WEDGE_OBLIQUE63, 4, 4 }, { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 }, { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 6 }, { WEDGE_VERTICAL, 2, 4 }, { WEDGE_VERTICAL, 6, 4 }, }; */ #if !USE_LARGE_WEDGE_CODEBOOK static const wedge_code_type wedge_codebook_16_hgtw[16] = { { WEDGE_OBLIQUE27, 4, 4 }, { WEDGE_OBLIQUE63, 4, 4 }, { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 }, { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 4 }, { WEDGE_HORIZONTAL, 4, 6 }, { WEDGE_VERTICAL, 4, 4 }, { WEDGE_OBLIQUE27, 4, 2 }, { WEDGE_OBLIQUE27, 4, 6 }, { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 }, { WEDGE_OBLIQUE63, 2, 4 }, { WEDGE_OBLIQUE63, 6, 4 }, { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 }, }; static const wedge_code_type wedge_codebook_16_hltw[16] = { { WEDGE_OBLIQUE27, 4, 4 }, { WEDGE_OBLIQUE63, 4, 4 }, { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 }, { WEDGE_VERTICAL, 2, 4 }, { WEDGE_VERTICAL, 4, 4 }, { WEDGE_VERTICAL, 6, 4 }, { WEDGE_HORIZONTAL, 4, 4 }, { WEDGE_OBLIQUE27, 4, 2 }, { WEDGE_OBLIQUE27, 4, 6 }, { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 }, { WEDGE_OBLIQUE63, 2, 4 }, { WEDGE_OBLIQUE63, 6, 4 }, { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 }, }; static const wedge_code_type wedge_codebook_16_heqw[16] = { { WEDGE_OBLIQUE27, 4, 4 }, { WEDGE_OBLIQUE63, 4, 4 }, { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 }, { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 6 }, { WEDGE_VERTICAL, 2, 4 }, { WEDGE_VERTICAL, 6, 4 }, { WEDGE_OBLIQUE27, 4, 2 }, { WEDGE_OBLIQUE27, 4, 6 }, { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 }, { WEDGE_OBLIQUE63, 2, 4 }, { WEDGE_OBLIQUE63, 6, 4 }, { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 }, }; const wedge_params_type wedge_params_lookup[BLOCK_SIZES] = { #if CONFIG_CB4X4 { 0, NULL, NULL, 0, NULL }, { 0, NULL, NULL, 0, NULL }, { 0, NULL, NULL, 0, NULL }, #endif // CONFIG_CB4X4 { 0, NULL, NULL, 0, NULL }, { 0, NULL, NULL, 0, NULL }, { 0, NULL, NULL, 0, NULL }, #if CONFIG_WEDGE { 4, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_8X8], 0, wedge_masks[BLOCK_8X8] }, { 4, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_8X16], 0, wedge_masks[BLOCK_8X16] }, { 4, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_16X8], 0, wedge_masks[BLOCK_16X8] }, { 4, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_16X16], 0, wedge_masks[BLOCK_16X16] }, { 4, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_16X32], 0, wedge_masks[BLOCK_16X32] }, { 4, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_32X16], 0, wedge_masks[BLOCK_32X16] }, { 4, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_32X32], 0, wedge_masks[BLOCK_32X32] }, { 0, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_32X64], 0, wedge_masks[BLOCK_32X64] }, { 0, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_64X32], 0, wedge_masks[BLOCK_64X32] }, { 0, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_64X64], 0, wedge_masks[BLOCK_64X64] }, #else { 0, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_8X8], 0, wedge_masks[BLOCK_8X8] }, { 0, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_8X16], 0, wedge_masks[BLOCK_8X16] }, { 0, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_16X8], 0, wedge_masks[BLOCK_16X8] }, { 0, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_16X16], 0, wedge_masks[BLOCK_16X16] }, { 0, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_16X32], 0, wedge_masks[BLOCK_16X32] }, { 0, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_32X16], 0, wedge_masks[BLOCK_32X16] }, { 0, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_32X32], 0, wedge_masks[BLOCK_32X32] }, { 0, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_32X64], 0, wedge_masks[BLOCK_32X64] }, { 0, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_64X32], 0, wedge_masks[BLOCK_64X32] }, { 0, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_64X64], 0, wedge_masks[BLOCK_64X64] }, #endif // CONFIG_WEDGE #if CONFIG_EXT_PARTITION { 0, NULL, NULL, 0, NULL }, { 0, NULL, NULL, 0, NULL }, { 0, NULL, NULL, 0, NULL }, #endif // CONFIG_EXT_PARTITION }; #else static const wedge_code_type wedge_codebook_32_hgtw[32] = { { WEDGE_OBLIQUE27, 4, 4 }, { WEDGE_OBLIQUE63, 4, 4 }, { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 }, { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 4 }, { WEDGE_HORIZONTAL, 4, 6 }, { WEDGE_VERTICAL, 4, 4 }, { WEDGE_OBLIQUE27, 4, 1 }, { WEDGE_OBLIQUE27, 4, 2 }, { WEDGE_OBLIQUE27, 4, 3 }, { WEDGE_OBLIQUE27, 4, 5 }, { WEDGE_OBLIQUE27, 4, 6 }, { WEDGE_OBLIQUE27, 4, 7 }, { WEDGE_OBLIQUE153, 4, 1 }, { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 3 }, { WEDGE_OBLIQUE153, 4, 5 }, { WEDGE_OBLIQUE153, 4, 6 }, { WEDGE_OBLIQUE153, 4, 7 }, { WEDGE_OBLIQUE63, 1, 4 }, { WEDGE_OBLIQUE63, 2, 4 }, { WEDGE_OBLIQUE63, 3, 4 }, { WEDGE_OBLIQUE63, 5, 4 }, { WEDGE_OBLIQUE63, 6, 4 }, { WEDGE_OBLIQUE63, 7, 4 }, { WEDGE_OBLIQUE117, 1, 4 }, { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 3, 4 }, { WEDGE_OBLIQUE117, 5, 4 }, { WEDGE_OBLIQUE117, 6, 4 }, { WEDGE_OBLIQUE117, 7, 4 }, }; static const wedge_code_type wedge_codebook_32_hltw[32] = { { WEDGE_OBLIQUE27, 4, 4 }, { WEDGE_OBLIQUE63, 4, 4 }, { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 }, { WEDGE_VERTICAL, 2, 4 }, { WEDGE_VERTICAL, 4, 4 }, { WEDGE_VERTICAL, 6, 4 }, { WEDGE_HORIZONTAL, 4, 4 }, { WEDGE_OBLIQUE27, 4, 1 }, { WEDGE_OBLIQUE27, 4, 2 }, { WEDGE_OBLIQUE27, 4, 3 }, { WEDGE_OBLIQUE27, 4, 5 }, { WEDGE_OBLIQUE27, 4, 6 }, { WEDGE_OBLIQUE27, 4, 7 }, { WEDGE_OBLIQUE153, 4, 1 }, { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 3 }, { WEDGE_OBLIQUE153, 4, 5 }, { WEDGE_OBLIQUE153, 4, 6 }, { WEDGE_OBLIQUE153, 4, 7 }, { WEDGE_OBLIQUE63, 1, 4 }, { WEDGE_OBLIQUE63, 2, 4 }, { WEDGE_OBLIQUE63, 3, 4 }, { WEDGE_OBLIQUE63, 5, 4 }, { WEDGE_OBLIQUE63, 6, 4 }, { WEDGE_OBLIQUE63, 7, 4 }, { WEDGE_OBLIQUE117, 1, 4 }, { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 3, 4 }, { WEDGE_OBLIQUE117, 5, 4 }, { WEDGE_OBLIQUE117, 6, 4 }, { WEDGE_OBLIQUE117, 7, 4 }, }; static const wedge_code_type wedge_codebook_32_heqw[32] = { { WEDGE_OBLIQUE27, 4, 4 }, { WEDGE_OBLIQUE63, 4, 4 }, { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 }, { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 6 }, { WEDGE_VERTICAL, 2, 4 }, { WEDGE_VERTICAL, 6, 4 }, { WEDGE_OBLIQUE27, 4, 1 }, { WEDGE_OBLIQUE27, 4, 2 }, { WEDGE_OBLIQUE27, 4, 3 }, { WEDGE_OBLIQUE27, 4, 5 }, { WEDGE_OBLIQUE27, 4, 6 }, { WEDGE_OBLIQUE27, 4, 7 }, { WEDGE_OBLIQUE153, 4, 1 }, { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 3 }, { WEDGE_OBLIQUE153, 4, 5 }, { WEDGE_OBLIQUE153, 4, 6 }, { WEDGE_OBLIQUE153, 4, 7 }, { WEDGE_OBLIQUE63, 1, 4 }, { WEDGE_OBLIQUE63, 2, 4 }, { WEDGE_OBLIQUE63, 3, 4 }, { WEDGE_OBLIQUE63, 5, 4 }, { WEDGE_OBLIQUE63, 6, 4 }, { WEDGE_OBLIQUE63, 7, 4 }, { WEDGE_OBLIQUE117, 1, 4 }, { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 3, 4 }, { WEDGE_OBLIQUE117, 5, 4 }, { WEDGE_OBLIQUE117, 6, 4 }, { WEDGE_OBLIQUE117, 7, 4 }, }; const wedge_params_type wedge_params_lookup[BLOCK_SIZES] = { #if CONFIG_CB4X4 { 0, NULL, NULL, 0, NULL }, { 0, NULL, NULL, 0, NULL }, { 0, NULL, NULL, 0, NULL }, #endif { 0, NULL, NULL, 0, NULL }, { 0, NULL, NULL, 0, NULL }, { 0, NULL, NULL, 0, NULL }, #if CONFIG_WEDGE { 5, wedge_codebook_32_heqw, wedge_signflip_lookup[BLOCK_8X8], 0, wedge_masks[BLOCK_8X8] }, { 5, wedge_codebook_32_hgtw, wedge_signflip_lookup[BLOCK_8X16], 0, wedge_masks[BLOCK_8X16] }, { 5, wedge_codebook_32_hltw, wedge_signflip_lookup[BLOCK_16X8], 0, wedge_masks[BLOCK_16X8] }, { 5, wedge_codebook_32_heqw, wedge_signflip_lookup[BLOCK_16X16], 0, wedge_masks[BLOCK_16X16] }, { 5, wedge_codebook_32_hgtw, wedge_signflip_lookup[BLOCK_16X32], 0, wedge_masks[BLOCK_16X32] }, { 5, wedge_codebook_32_hltw, wedge_signflip_lookup[BLOCK_32X16], 0, wedge_masks[BLOCK_32X16] }, { 5, wedge_codebook_32_heqw, wedge_signflip_lookup[BLOCK_32X32], 0, wedge_masks[BLOCK_32X32] }, { 0, wedge_codebook_32_hgtw, wedge_signflip_lookup[BLOCK_32X64], 0, wedge_masks[BLOCK_32X64] }, { 0, wedge_codebook_32_hltw, wedge_signflip_lookup[BLOCK_64X32], 0, wedge_masks[BLOCK_64X32] }, { 0, wedge_codebook_32_heqw, wedge_signflip_lookup[BLOCK_64X64], 0, wedge_masks[BLOCK_64X64] }, #else { 0, wedge_codebook_32_heqw, wedge_signflip_lookup[BLOCK_8X8], 0, wedge_masks[BLOCK_8X8] }, { 0, wedge_codebook_32_hgtw, wedge_signflip_lookup[BLOCK_8X16], 0, wedge_masks[BLOCK_8X16] }, { 0, wedge_codebook_32_hltw, wedge_signflip_lookup[BLOCK_16X8], 0, wedge_masks[BLOCK_16X8] }, { 0, wedge_codebook_32_heqw, wedge_signflip_lookup[BLOCK_16X16], 0, wedge_masks[BLOCK_16X16] }, { 0, wedge_codebook_32_hgtw, wedge_signflip_lookup[BLOCK_16X32], 0, wedge_masks[BLOCK_16X32] }, { 0, wedge_codebook_32_hltw, wedge_signflip_lookup[BLOCK_32X16], 0, wedge_masks[BLOCK_32X16] }, { 0, wedge_codebook_32_heqw, wedge_signflip_lookup[BLOCK_32X32], 0, wedge_masks[BLOCK_32X32] }, { 0, wedge_codebook_32_hgtw, wedge_signflip_lookup[BLOCK_32X64], 0, wedge_masks[BLOCK_32X64] }, { 0, wedge_codebook_32_hltw, wedge_signflip_lookup[BLOCK_64X32], 0, wedge_masks[BLOCK_64X32] }, { 0, wedge_codebook_32_heqw, wedge_signflip_lookup[BLOCK_64X64], 0, wedge_masks[BLOCK_64X64] }, #endif // CONFIG_WEDGE #if CONFIG_EXT_PARTITION { 0, NULL, NULL, 0, NULL }, { 0, NULL, NULL, 0, NULL }, { 0, NULL, NULL, 0, NULL }, #endif // CONFIG_EXT_PARTITION }; #endif // USE_LARGE_WEDGE_CODEBOOK static const uint8_t *get_wedge_mask_inplace(int wedge_index, int neg, BLOCK_SIZE sb_type) { const uint8_t *master; const int bh = block_size_high[sb_type]; const int bw = block_size_wide[sb_type]; const wedge_code_type *a = wedge_params_lookup[sb_type].codebook + wedge_index; const int smoother = wedge_params_lookup[sb_type].smoother; int woff, hoff; const uint8_t wsignflip = wedge_params_lookup[sb_type].signflip[wedge_index]; assert(wedge_index >= 0 && wedge_index < (1 << get_wedge_bits_lookup(sb_type))); woff = (a->x_offset * bw) >> 3; hoff = (a->y_offset * bh) >> 3; master = wedge_mask_obl[smoother][neg ^ wsignflip][a->direction] + MASK_MASTER_STRIDE * (MASK_MASTER_SIZE / 2 - hoff) + MASK_MASTER_SIZE / 2 - woff; return master; } const uint8_t *av1_get_soft_mask(int wedge_index, int wedge_sign, BLOCK_SIZE sb_type, int offset_x, int offset_y) { const uint8_t *mask = get_wedge_mask_inplace(wedge_index, wedge_sign, sb_type); if (mask) mask -= (offset_x + offset_y * MASK_MASTER_STRIDE); return mask; } #if CONFIG_COMPOUND_SEGMENT static uint8_t *invert_mask(uint8_t *mask_inv_buffer, const uint8_t *const mask, int h, int w, int stride) { int i, j; for (i = 0; i < h; ++i) for (j = 0; j < w; ++j) { mask_inv_buffer[i * stride + j] = AOM_BLEND_A64_MAX_ALPHA - mask[i * stride + j]; } return mask_inv_buffer; } #endif // CONFIG_COMPOUND_SEGMENT const uint8_t *av1_get_compound_type_mask_inverse( const INTERINTER_COMPOUND_DATA *const comp_data, #if CONFIG_COMPOUND_SEGMENT uint8_t *mask_buffer, int h, int w, int stride, #endif BLOCK_SIZE sb_type) { assert(is_masked_compound_type(comp_data->type)); switch (comp_data->type) { case COMPOUND_WEDGE: return av1_get_contiguous_soft_mask(comp_data->wedge_index, !comp_data->wedge_sign, sb_type); #if CONFIG_COMPOUND_SEGMENT case COMPOUND_SEG: return invert_mask(mask_buffer, comp_data->seg_mask, h, w, stride); #endif // CONFIG_COMPOUND_SEGMENT default: assert(0); return NULL; } } const uint8_t *av1_get_compound_type_mask( const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE sb_type) { assert(is_masked_compound_type(comp_data->type)); switch (comp_data->type) { case COMPOUND_WEDGE: return av1_get_contiguous_soft_mask(comp_data->wedge_index, comp_data->wedge_sign, sb_type); #if CONFIG_COMPOUND_SEGMENT case COMPOUND_SEG: return comp_data->seg_mask; #endif // CONFIG_COMPOUND_SEGMENT default: assert(0); return NULL; } } #if CONFIG_COMPOUND_SEGMENT #if COMPOUND_SEGMENT_TYPE == 0 static void uniform_mask(uint8_t *mask, int which_inverse, BLOCK_SIZE sb_type, int h, int w, int mask_val) { int i, j; int block_stride = block_size_wide[sb_type]; for (i = 0; i < h; ++i) for (j = 0; j < w; ++j) { mask[i * block_stride + j] = which_inverse ? AOM_BLEND_A64_MAX_ALPHA - mask_val : mask_val; } } void build_compound_seg_mask(uint8_t *mask, SEG_MASK_TYPE mask_type, const uint8_t *src0, int src0_stride, const uint8_t *src1, int src1_stride, BLOCK_SIZE sb_type, int h, int w) { (void)src0; (void)src1; (void)src0_stride; (void)src1_stride; switch (mask_type) { case UNIFORM_45: uniform_mask(mask, 0, sb_type, h, w, 45); break; case UNIFORM_45_INV: uniform_mask(mask, 1, sb_type, h, w, 45); break; default: assert(0); } } #if CONFIG_AOM_HIGHBITDEPTH void build_compound_seg_mask_highbd(uint8_t *mask, SEG_MASK_TYPE mask_type, const uint8_t *src0, int src0_stride, const uint8_t *src1, int src1_stride, BLOCK_SIZE sb_type, int h, int w, int bd) { (void)src0; (void)src1; (void)src0_stride; (void)src1_stride; (void)bd; switch (mask_type) { case UNIFORM_45: uniform_mask(mask, 0, sb_type, h, w, 45); break; case UNIFORM_45_INV: uniform_mask(mask, 1, sb_type, h, w, 45); break; default: assert(0); } } #endif // CONFIG_AOM_HIGHBITDEPTH #elif COMPOUND_SEGMENT_TYPE == 1 #define DIFF_FACTOR 16 static void diffwtd_mask(uint8_t *mask, int which_inverse, int mask_base, const uint8_t *src0, int src0_stride, const uint8_t *src1, int src1_stride, BLOCK_SIZE sb_type, int h, int w) { int i, j, m, diff; int block_stride = block_size_wide[sb_type]; for (i = 0; i < h; ++i) { for (j = 0; j < w; ++j) { diff = abs((int)src0[i * src0_stride + j] - (int)src1[i * src1_stride + j]); m = clamp(mask_base + (diff / DIFF_FACTOR), 0, AOM_BLEND_A64_MAX_ALPHA); mask[i * block_stride + j] = which_inverse ? AOM_BLEND_A64_MAX_ALPHA - m : m; } } } void build_compound_seg_mask(uint8_t *mask, SEG_MASK_TYPE mask_type, const uint8_t *src0, int src0_stride, const uint8_t *src1, int src1_stride, BLOCK_SIZE sb_type, int h, int w) { switch (mask_type) { case DIFFWTD_42: diffwtd_mask(mask, 0, 42, src0, src0_stride, src1, src1_stride, sb_type, h, w); break; case DIFFWTD_42_INV: diffwtd_mask(mask, 1, 42, src0, src0_stride, src1, src1_stride, sb_type, h, w); break; default: assert(0); } } #if CONFIG_AOM_HIGHBITDEPTH static void diffwtd_mask_highbd(uint8_t *mask, int which_inverse, int mask_base, const uint16_t *src0, int src0_stride, const uint16_t *src1, int src1_stride, BLOCK_SIZE sb_type, int h, int w, int bd) { int i, j, m, diff; int block_stride = block_size_wide[sb_type]; for (i = 0; i < h; ++i) { for (j = 0; j < w; ++j) { diff = abs((int)src0[i * src0_stride + j] - (int)src1[i * src1_stride + j]) >> (bd - 8); m = clamp(mask_base + (diff / DIFF_FACTOR), 0, AOM_BLEND_A64_MAX_ALPHA); mask[i * block_stride + j] = which_inverse ? AOM_BLEND_A64_MAX_ALPHA - m : m; } } } void build_compound_seg_mask_highbd(uint8_t *mask, SEG_MASK_TYPE mask_type, const uint8_t *src0, int src0_stride, const uint8_t *src1, int src1_stride, BLOCK_SIZE sb_type, int h, int w, int bd) { switch (mask_type) { case DIFFWTD_42: diffwtd_mask_highbd(mask, 0, 42, CONVERT_TO_SHORTPTR(src0), src0_stride, CONVERT_TO_SHORTPTR(src1), src1_stride, sb_type, h, w, bd); break; case DIFFWTD_42_INV: diffwtd_mask_highbd(mask, 1, 42, CONVERT_TO_SHORTPTR(src0), src0_stride, CONVERT_TO_SHORTPTR(src1), src1_stride, sb_type, h, w, bd); break; default: assert(0); } } #endif // CONFIG_AOM_HIGHBITDEPTH #endif // COMPOUND_SEGMENT_TYPE #endif // CONFIG_COMPOUND_SEGMENT static void init_wedge_master_masks() { int i, j, s; const int w = MASK_MASTER_SIZE; const int h = MASK_MASTER_SIZE; const int stride = MASK_MASTER_STRIDE; const int a[2] = { 2, 1 }; const double asqrt = sqrt(a[0] * a[0] + a[1] * a[1]); for (s = 0; s < NSMOOTHERS; s++) { for (i = 0; i < h; ++i) for (j = 0; j < w; ++j) { int x = (2 * j + 1 - w); int y = (2 * i + 1 - h); int m = (int)rint((a[0] * x + a[1] * y) / asqrt); wedge_mask_obl[s][1][WEDGE_OBLIQUE63][i * stride + j] = wedge_mask_obl[s][1][WEDGE_OBLIQUE27][j * stride + i] = get_masked_weight(m, s); wedge_mask_obl[s][1][WEDGE_OBLIQUE117][i * stride + w - 1 - j] = wedge_mask_obl[s][1][WEDGE_OBLIQUE153][(w - 1 - j) * stride + i] = (1 << WEDGE_WEIGHT_BITS) - get_masked_weight(m, s); wedge_mask_obl[s][0][WEDGE_OBLIQUE63][i * stride + j] = wedge_mask_obl[s][0][WEDGE_OBLIQUE27][j * stride + i] = (1 << WEDGE_WEIGHT_BITS) - get_masked_weight(m, s); wedge_mask_obl[s][0][WEDGE_OBLIQUE117][i * stride + w - 1 - j] = wedge_mask_obl[s][0][WEDGE_OBLIQUE153][(w - 1 - j) * stride + i] = get_masked_weight(m, s); wedge_mask_obl[s][1][WEDGE_VERTICAL][i * stride + j] = wedge_mask_obl[s][1][WEDGE_HORIZONTAL][j * stride + i] = get_masked_weight(x, s); wedge_mask_obl[s][0][WEDGE_VERTICAL][i * stride + j] = wedge_mask_obl[s][0][WEDGE_HORIZONTAL][j * stride + i] = (1 << WEDGE_WEIGHT_BITS) - get_masked_weight(x, s); } } } // If the signs for the wedges for various blocksizes are // inconsistent flip the sign flag. Do it only once for every // wedge codebook. static void init_wedge_signs() { BLOCK_SIZE sb_type; memset(wedge_signflip_lookup, 0, sizeof(wedge_signflip_lookup)); for (sb_type = BLOCK_4X4; sb_type < BLOCK_SIZES; ++sb_type) { const int bw = block_size_wide[sb_type]; const int bh = block_size_high[sb_type]; const wedge_params_type wedge_params = wedge_params_lookup[sb_type]; const int wbits = wedge_params.bits; const int wtypes = 1 << wbits; int i, w; if (wbits == 0) continue; for (w = 0; w < wtypes; ++w) { const uint8_t *mask = get_wedge_mask_inplace(w, 0, sb_type); int sum = 0; for (i = 0; i < bw; ++i) sum += mask[i]; for (i = 0; i < bh; ++i) sum += mask[i * MASK_MASTER_STRIDE]; sum = (sum + (bw + bh) / 2) / (bw + bh); wedge_params.signflip[w] = (sum < 32); } } } static void init_wedge_masks() { uint8_t *dst = wedge_mask_buf; BLOCK_SIZE bsize; memset(wedge_masks, 0, sizeof(wedge_masks)); for (bsize = BLOCK_4X4; bsize < BLOCK_SIZES; ++bsize) { const uint8_t *mask; const int bw = block_size_wide[bsize]; const int bh = block_size_high[bsize]; const wedge_params_type *wedge_params = &wedge_params_lookup[bsize]; const int wbits = wedge_params->bits; const int wtypes = 1 << wbits; int w; if (wbits == 0) continue; for (w = 0; w < wtypes; ++w) { mask = get_wedge_mask_inplace(w, 0, bsize); aom_convolve_copy(mask, MASK_MASTER_STRIDE, dst, bw, NULL, 0, NULL, 0, bw, bh); wedge_params->masks[0][w] = dst; dst += bw * bh; mask = get_wedge_mask_inplace(w, 1, bsize); aom_convolve_copy(mask, MASK_MASTER_STRIDE, dst, bw, NULL, 0, NULL, 0, bw, bh); wedge_params->masks[1][w] = dst; dst += bw * bh; } assert(sizeof(wedge_mask_buf) >= (size_t)(dst - wedge_mask_buf)); } } // Equation of line: f(x, y) = a[0]*(x - a[2]*w/8) + a[1]*(y - a[3]*h/8) = 0 void av1_init_wedge_masks() { init_wedge_master_masks(); init_wedge_signs(); init_wedge_masks(); } #if CONFIG_SUPERTX static void build_masked_compound_wedge_extend( uint8_t *dst, int dst_stride, const uint8_t *src0, int src0_stride, const uint8_t *src1, int src1_stride, const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE sb_type, int wedge_offset_x, int wedge_offset_y, int h, int w) { const int subh = (2 << b_height_log2_lookup[sb_type]) == h; const int subw = (2 << b_width_log2_lookup[sb_type]) == w; const uint8_t *mask; size_t mask_stride; switch (comp_data->type) { case COMPOUND_WEDGE: mask = av1_get_soft_mask(comp_data->wedge_index, comp_data->wedge_sign, sb_type, wedge_offset_x, wedge_offset_y); mask_stride = MASK_MASTER_STRIDE; break; #if CONFIG_COMPOUND_SEGMENT case COMPOUND_SEG: mask = comp_data->seg_mask; mask_stride = block_size_wide[sb_type]; break; #endif default: assert(0); return; } aom_blend_a64_mask(dst, dst_stride, src0, src0_stride, src1, src1_stride, mask, mask_stride, h, w, subh, subw); } #if CONFIG_AOM_HIGHBITDEPTH static void build_masked_compound_wedge_extend_highbd( uint8_t *dst_8, int dst_stride, const uint8_t *src0_8, int src0_stride, const uint8_t *src1_8, int src1_stride, const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE sb_type, int wedge_offset_x, int wedge_offset_y, int h, int w, int bd) { const int subh = (2 << b_height_log2_lookup[sb_type]) == h; const int subw = (2 << b_width_log2_lookup[sb_type]) == w; const uint8_t *mask; size_t mask_stride; switch (comp_data->type) { case COMPOUND_WEDGE: mask = av1_get_soft_mask(comp_data->wedge_index, comp_data->wedge_sign, sb_type, wedge_offset_x, wedge_offset_y); mask_stride = MASK_MASTER_STRIDE; break; #if CONFIG_COMPOUND_SEGMENT case COMPOUND_SEG: mask = comp_data->seg_mask; mask_stride = block_size_wide[sb_type]; break; #endif default: assert(0); return; } aom_highbd_blend_a64_mask(dst_8, dst_stride, src0_8, src0_stride, src1_8, src1_stride, mask, mask_stride, h, w, subh, subw, bd); } #endif // CONFIG_AOM_HIGHBITDEPTH #else static void build_masked_compound( uint8_t *dst, int dst_stride, const uint8_t *src0, int src0_stride, const uint8_t *src1, int src1_stride, const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE sb_type, int h, int w) { // Derive subsampling from h and w passed in. May be refactored to // pass in subsampling factors directly. const int subh = (2 << b_height_log2_lookup[sb_type]) == h; const int subw = (2 << b_width_log2_lookup[sb_type]) == w; const uint8_t *mask = av1_get_compound_type_mask(comp_data, sb_type); aom_blend_a64_mask(dst, dst_stride, src0, src0_stride, src1, src1_stride, mask, block_size_wide[sb_type], h, w, subh, subw); } #if CONFIG_AOM_HIGHBITDEPTH static void build_masked_compound_highbd( uint8_t *dst_8, int dst_stride, const uint8_t *src0_8, int src0_stride, const uint8_t *src1_8, int src1_stride, const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE sb_type, int h, int w, int bd) { // Derive subsampling from h and w passed in. May be refactored to // pass in subsampling factors directly. const int subh = (2 << b_height_log2_lookup[sb_type]) == h; const int subw = (2 << b_width_log2_lookup[sb_type]) == w; const uint8_t *mask = av1_get_compound_type_mask(comp_data, sb_type); // const uint8_t *mask = // av1_get_contiguous_soft_mask(wedge_index, wedge_sign, sb_type); aom_highbd_blend_a64_mask(dst_8, dst_stride, src0_8, src0_stride, src1_8, src1_stride, mask, block_size_wide[sb_type], h, w, subh, subw, bd); } #endif // CONFIG_AOM_HIGHBITDEPTH #endif // CONFIG_SUPERTX void av1_make_masked_inter_predictor(const uint8_t *pre, int pre_stride, uint8_t *dst, int dst_stride, const int subpel_x, const int subpel_y, const struct scale_factors *sf, int w, int h, #if CONFIG_DUAL_FILTER const InterpFilter *interp_filter, #else const InterpFilter interp_filter, #endif int xs, int ys, #if CONFIG_SUPERTX int wedge_offset_x, int wedge_offset_y, #endif // CONFIG_SUPERTX int plane, #if CONFIG_GLOBAL_MOTION int is_global, int p_col, int p_row, int ref, #endif // CONFIG_GLOBAL_MOTION MACROBLOCKD *xd) { MODE_INFO *mi = xd->mi[0]; INTERINTER_COMPOUND_DATA *comp_data = &mi->mbmi.interinter_compound_data; // The prediction filter types used here should be those for // the second reference block. #if CONFIG_DUAL_FILTER InterpFilter tmp_ipf[4] = { interp_filter[2], interp_filter[3], interp_filter[2], interp_filter[3], }; #else InterpFilter tmp_ipf = interp_filter; #endif // CONFIG_DUAL_FILTER ConvolveParams conv_params = get_conv_params(0, plane); #if CONFIG_AOM_HIGHBITDEPTH DECLARE_ALIGNED(16, uint8_t, tmp_dst_[2 * MAX_SB_SQUARE]); uint8_t *tmp_dst = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? CONVERT_TO_BYTEPTR(tmp_dst_) : tmp_dst_; av1_make_inter_predictor(pre, pre_stride, tmp_dst, MAX_SB_SIZE, subpel_x, subpel_y, sf, w, h, &conv_params, tmp_ipf, #if CONFIG_GLOBAL_MOTION is_global, p_col, p_row, plane, ref, #if CONFIG_MOTION_VAR 0, 0, #endif #endif // CONFIG_GLOBAL_MOTION xs, ys, xd); #if CONFIG_COMPOUND_SEGMENT if (!plane && comp_data->type == COMPOUND_SEG) { if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) build_compound_seg_mask_highbd(comp_data->seg_mask, comp_data->mask_type, dst, dst_stride, tmp_dst, MAX_SB_SIZE, mi->mbmi.sb_type, h, w, xd->bd); else build_compound_seg_mask(comp_data->seg_mask, comp_data->mask_type, dst, dst_stride, tmp_dst, MAX_SB_SIZE, mi->mbmi.sb_type, h, w); } #endif // CONFIG_COMPOUND_SEGMENT #if CONFIG_SUPERTX if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) build_masked_compound_wedge_extend_highbd( dst, dst_stride, dst, dst_stride, tmp_dst, MAX_SB_SIZE, comp_data, mi->mbmi.sb_type, wedge_offset_x, wedge_offset_y, h, w, xd->bd); else build_masked_compound_wedge_extend( dst, dst_stride, dst, dst_stride, tmp_dst, MAX_SB_SIZE, comp_data, mi->mbmi.sb_type, wedge_offset_x, wedge_offset_y, h, w); #else if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) build_masked_compound_highbd(dst, dst_stride, dst, dst_stride, tmp_dst, MAX_SB_SIZE, comp_data, mi->mbmi.sb_type, h, w, xd->bd); else build_masked_compound(dst, dst_stride, dst, dst_stride, tmp_dst, MAX_SB_SIZE, comp_data, mi->mbmi.sb_type, h, w); #endif // CONFIG_SUPERTX #else // CONFIG_AOM_HIGHBITDEPTH DECLARE_ALIGNED(16, uint8_t, tmp_dst[MAX_SB_SQUARE]); av1_make_inter_predictor(pre, pre_stride, tmp_dst, MAX_SB_SIZE, subpel_x, subpel_y, sf, w, h, &conv_params, tmp_ipf, #if CONFIG_GLOBAL_MOTION is_global, p_col, p_row, plane, ref, #if CONFIG_MOTION_VAR 0, 0, #endif #endif // CONFIG_GLOBAL_MOTION xs, ys, xd); #if CONFIG_COMPOUND_SEGMENT if (!plane && comp_data->type == COMPOUND_SEG) build_compound_seg_mask(comp_data->seg_mask, comp_data->mask_type, dst, dst_stride, tmp_dst, MAX_SB_SIZE, mi->mbmi.sb_type, h, w); #endif // CONFIG_COMPOUND_SEGMENT #if CONFIG_SUPERTX build_masked_compound_wedge_extend(dst, dst_stride, dst, dst_stride, tmp_dst, MAX_SB_SIZE, comp_data, mi->mbmi.sb_type, wedge_offset_x, wedge_offset_y, h, w); #else build_masked_compound(dst, dst_stride, dst, dst_stride, tmp_dst, MAX_SB_SIZE, comp_data, mi->mbmi.sb_type, h, w); #endif // CONFIG_SUPERTX #endif // CONFIG_AOM_HIGHBITDEPTH #if CONFIG_COMPOUND_SEGMENT (void)plane; #endif // CONFIG_COMPOUND_SEGMENT } #endif // CONFIG_EXT_INTER // TODO(sarahparker) av1_highbd_build_inter_predictor and // av1_build_inter_predictor should be combined with // av1_make_inter_predictor #if CONFIG_AOM_HIGHBITDEPTH void av1_highbd_build_inter_predictor(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const MV *src_mv, const struct scale_factors *sf, int w, int h, int ref, #if CONFIG_DUAL_FILTER const InterpFilter *interp_filter, #else const InterpFilter interp_filter, #endif #if CONFIG_GLOBAL_MOTION int is_global, int p_col, int p_row, #endif // CONFIG_GLOBAL_MOTION int plane, enum mv_precision precision, int x, int y, const MACROBLOCKD *xd) { const int is_q4 = precision == MV_PRECISION_Q4; const MV mv_q4 = { is_q4 ? src_mv->row : src_mv->row * 2, is_q4 ? src_mv->col : src_mv->col * 2 }; MV32 mv = av1_scale_mv(&mv_q4, x, y, sf); const int subpel_x = mv.col & SUBPEL_MASK; const int subpel_y = mv.row & SUBPEL_MASK; ConvolveParams conv_params = get_conv_params(ref, plane); src += (mv.row >> SUBPEL_BITS) * src_stride + (mv.col >> SUBPEL_BITS); av1_make_inter_predictor(src, src_stride, dst, dst_stride, subpel_x, subpel_y, sf, w, h, &conv_params, interp_filter, #if CONFIG_GLOBAL_MOTION is_global, p_col, p_row, plane, ref, #if CONFIG_MOTION_VAR 0, 0, #endif #endif // CONFIG_GLOBAL_MOTION sf->x_step_q4, sf->y_step_q4, xd); } #endif // CONFIG_AOM_HIGHBITDEPTH void av1_build_inter_predictor(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const MV *src_mv, const struct scale_factors *sf, int w, int h, ConvolveParams *conv_params, #if CONFIG_DUAL_FILTER const InterpFilter *interp_filter, #else const InterpFilter interp_filter, #endif #if CONFIG_GLOBAL_MOTION int is_global, int p_col, int p_row, int plane, int ref, #endif // CONFIG_GLOBAL_MOTION enum mv_precision precision, int x, int y, const MACROBLOCKD *xd) { const int is_q4 = precision == MV_PRECISION_Q4; const MV mv_q4 = { is_q4 ? src_mv->row : src_mv->row * 2, is_q4 ? src_mv->col : src_mv->col * 2 }; MV32 mv = av1_scale_mv(&mv_q4, x, y, sf); const int subpel_x = mv.col & SUBPEL_MASK; const int subpel_y = mv.row & SUBPEL_MASK; src += (mv.row >> SUBPEL_BITS) * src_stride + (mv.col >> SUBPEL_BITS); av1_make_inter_predictor(src, src_stride, dst, dst_stride, subpel_x, subpel_y, sf, w, h, conv_params, interp_filter, #if CONFIG_GLOBAL_MOTION is_global, p_col, p_row, plane, ref, #if CONFIG_MOTION_VAR 0, 0, #endif #endif // CONFIG_GLOBAL_MOTION sf->x_step_q4, sf->y_step_q4, xd); } typedef struct SubpelParams { int xs; int ys; int subpel_x; int subpel_y; } SubpelParams; void build_inter_predictors(MACROBLOCKD *xd, int plane, #if CONFIG_MOTION_VAR int mi_col_offset, int mi_row_offset, #endif // CONFIG_MOTION_VAR int block, int bw, int bh, int x, int y, int w, int h, #if CONFIG_SUPERTX && CONFIG_EXT_INTER int wedge_offset_x, int wedge_offset_y, #endif // CONFIG_SUPERTX && CONFIG_EXT_INTER int mi_x, int mi_y) { struct macroblockd_plane *const pd = &xd->plane[plane]; #if CONFIG_MOTION_VAR const MODE_INFO *mi = xd->mi[mi_col_offset + xd->mi_stride * mi_row_offset]; #if !CONFIG_CB4X4 const int build_for_obmc = !(mi_col_offset == 0 && mi_row_offset == 0); #endif // !CONFIG_CB4X4 #else const MODE_INFO *mi = xd->mi[0]; #endif // CONFIG_MOTION_VAR const int is_compound = has_second_ref(&mi->mbmi); int ref; #if CONFIG_GLOBAL_MOTION int is_global[2]; for (ref = 0; ref < 1 + is_compound; ++ref) { WarpedMotionParams *const wm = &xd->global_motion[mi->mbmi.ref_frame[ref]]; is_global[ref] = is_global_mv_block(mi, block, wm->wmtype); } #endif // CONFIG_GLOBAL_MOTION #if CONFIG_CB4X4 (void)block; #endif #if CONFIG_SUB8X8_MC #if CONFIG_MOTION_VAR if (mi->mbmi.sb_type < BLOCK_8X8 && plane > 0 && !build_for_obmc) { #else if (mi->mbmi.sb_type < BLOCK_8X8 && plane > 0) { #endif // CONFIG_MOTION_VAR // block size in log2 const int b4_wl = b_width_log2_lookup[mi->mbmi.sb_type]; const int b4_hl = b_height_log2_lookup[mi->mbmi.sb_type]; const int b8_sl = b_width_log2_lookup[BLOCK_8X8]; // block size const int b4_w = 1 << b4_wl; const int b4_h = 1 << b4_hl; const int b8_s = 1 << b8_sl; int idx, idy; const int x_base = x; const int y_base = y; // processing unit size const int x_step = w >> (b8_sl - b4_wl); const int y_step = h >> (b8_sl - b4_hl); for (idy = 0; idy < b8_s; idy += b4_h) { for (idx = 0; idx < b8_s; idx += b4_w) { const int chr_idx = (idy * 2) + idx; for (ref = 0; ref < 1 + is_compound; ++ref) { const struct scale_factors *const sf = &xd->block_refs[ref]->sf; struct buf_2d *const pre_buf = &pd->pre[ref]; struct buf_2d *const dst_buf = &pd->dst; uint8_t *dst = dst_buf->buf; const MV mv = mi->bmi[chr_idx].as_mv[ref].as_mv; const MV mv_q4 = clamp_mv_to_umv_border_sb( xd, &mv, bw, bh, pd->subsampling_x, pd->subsampling_y); uint8_t *pre; MV32 scaled_mv; int xs, ys, subpel_x, subpel_y; const int is_scaled = av1_is_scaled(sf); ConvolveParams conv_params = get_conv_params(ref, plane); x = x_base + idx * x_step; y = y_base + idy * y_step; dst += dst_buf->stride * y + x; if (is_scaled) { pre = pre_buf->buf + scaled_buffer_offset(x, y, pre_buf->stride, sf); scaled_mv = av1_scale_mv(&mv_q4, mi_x + x, mi_y + y, sf); xs = sf->x_step_q4; ys = sf->y_step_q4; } else { pre = pre_buf->buf + y * pre_buf->stride + x; scaled_mv.row = mv_q4.row; scaled_mv.col = mv_q4.col; xs = ys = 16; } subpel_x = scaled_mv.col & SUBPEL_MASK; subpel_y = scaled_mv.row & SUBPEL_MASK; pre += (scaled_mv.row >> SUBPEL_BITS) * pre_buf->stride + (scaled_mv.col >> SUBPEL_BITS); #if CONFIG_EXT_INTER if (ref && is_masked_compound_type(mi->mbmi.interinter_compound_data.type)) av1_make_masked_inter_predictor( pre, pre_buf->stride, dst, dst_buf->stride, subpel_x, subpel_y, sf, w, h, mi->mbmi.interp_filter, xs, ys, #if CONFIG_SUPERTX wedge_offset_x, wedge_offset_y, #endif // CONFIG_SUPERTX plane, #if CONFIG_GLOBAL_MOTION is_global[ref], (mi_x >> pd->subsampling_x) + x, (mi_y >> pd->subsampling_y) + y, ref, #endif // CONFIG_GLOBAL_MOTION xd); else #endif // CONFIG_EXT_INTER av1_make_inter_predictor( pre, pre_buf->stride, dst, dst_buf->stride, subpel_x, subpel_y, sf, x_step, y_step, &conv_params, mi->mbmi.interp_filter, #if CONFIG_GLOBAL_MOTION is_global[ref], (mi_x >> pd->subsampling_x) + x, (mi_y >> pd->subsampling_y) + y, plane, ref, #if CONFIG_MOTION_VAR mi_col_offset, mi_row_offset, #endif #endif // CONFIG_GLOBAL_MOTION xs, ys, xd); } } } return; } #endif { struct buf_2d *const dst_buf = &pd->dst; uint8_t *const dst = dst_buf->buf + dst_buf->stride * y + x; uint8_t *pre[2]; MV32 scaled_mv[2]; SubpelParams subpel_params[2]; #if CONFIG_CONVOLVE_ROUND DECLARE_ALIGNED(16, int32_t, tmp_dst[MAX_SB_SIZE * MAX_SB_SIZE]); av1_zero(tmp_dst); #endif // CONFIG_CONVOLVE_ROUND for (ref = 0; ref < 1 + is_compound; ++ref) { const struct scale_factors *const sf = &xd->block_refs[ref]->sf; struct buf_2d *const pre_buf = &pd->pre[ref]; #if CONFIG_CB4X4 const MV mv = mi->mbmi.mv[ref].as_mv; #else const MV mv = #if CONFIG_MOTION_VAR (mi->mbmi.sb_type < BLOCK_8X8 && !build_for_obmc) ? #else mi->mbmi.sb_type < BLOCK_8X8 ? #endif average_split_mvs(pd, mi, ref, block) : mi->mbmi.mv[ref].as_mv; #endif // TODO(jkoleszar): This clamping is done in the incorrect place for the // scaling case. It needs to be done on the scaled MV, not the pre-scaling // MV. Note however that it performs the subsampling aware scaling so // that the result is always q4. // mv_precision precision is MV_PRECISION_Q4. const MV mv_q4 = clamp_mv_to_umv_border_sb( xd, &mv, bw, bh, pd->subsampling_x, pd->subsampling_y); const int is_scaled = av1_is_scaled(sf); if (is_scaled) { pre[ref] = pre_buf->buf + scaled_buffer_offset(x, y, pre_buf->stride, sf); scaled_mv[ref] = av1_scale_mv(&mv_q4, mi_x + x, mi_y + y, sf); subpel_params[ref].xs = sf->x_step_q4; subpel_params[ref].ys = sf->y_step_q4; } else { pre[ref] = pre_buf->buf + (y * pre_buf->stride + x); scaled_mv[ref].row = mv_q4.row; scaled_mv[ref].col = mv_q4.col; subpel_params[ref].xs = 16; subpel_params[ref].ys = 16; } subpel_params[ref].subpel_x = scaled_mv[ref].col & SUBPEL_MASK; subpel_params[ref].subpel_y = scaled_mv[ref].row & SUBPEL_MASK; pre[ref] += (scaled_mv[ref].row >> SUBPEL_BITS) * pre_buf->stride + (scaled_mv[ref].col >> SUBPEL_BITS); } #if CONFIG_CONVOLVE_ROUND ConvolveParams conv_params = get_conv_params_no_round(ref, plane, tmp_dst, MAX_SB_SIZE); #else ConvolveParams conv_params = get_conv_params(ref, plane); #endif // CONFIG_CONVOLVE_ROUND for (ref = 0; ref < 1 + is_compound; ++ref) { const struct scale_factors *const sf = &xd->block_refs[ref]->sf; struct buf_2d *const pre_buf = &pd->pre[ref]; conv_params.ref = ref; #if CONFIG_EXT_INTER if (ref && is_masked_compound_type(mi->mbmi.interinter_compound_data.type)) av1_make_masked_inter_predictor( pre[ref], pre_buf->stride, dst, dst_buf->stride, subpel_params[ref].subpel_x, subpel_params[ref].subpel_y, sf, w, h, mi->mbmi.interp_filter, subpel_params[ref].xs, subpel_params[ref].ys, #if CONFIG_SUPERTX wedge_offset_x, wedge_offset_y, #endif // CONFIG_SUPERTX plane, #if CONFIG_GLOBAL_MOTION is_global[ref], (mi_x >> pd->subsampling_x) + x, (mi_y >> pd->subsampling_y) + y, ref, #endif // CONFIG_GLOBAL_MOTION xd); else #endif // CONFIG_EXT_INTER av1_make_inter_predictor( pre[ref], pre_buf->stride, dst, dst_buf->stride, subpel_params[ref].subpel_x, subpel_params[ref].subpel_y, sf, w, h, &conv_params, mi->mbmi.interp_filter, #if CONFIG_GLOBAL_MOTION is_global[ref], (mi_x >> pd->subsampling_x) + x, (mi_y >> pd->subsampling_y) + y, plane, ref, #if CONFIG_MOTION_VAR mi_col_offset, mi_row_offset, #endif #endif // CONFIG_GLOBAL_MOTION subpel_params[ref].xs, subpel_params[ref].ys, xd); } #if CONFIG_CONVOLVE_ROUND // TODO(angiebird): This part needs optimization #if CONFIG_AOM_HIGHBITDEPTH if (!(xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)) #endif // CONFIG_AOM_HIGHBITDEPTH av1_convolve_rounding(tmp_dst, MAX_SB_SIZE, dst, dst_buf->stride, w, h, FILTER_BITS * 2 + is_compound - conv_params.round_0 - conv_params.round_1); #endif // CONFIG_CONVOLVE_ROUND } } void av1_build_inter_predictor_sub8x8(MACROBLOCKD *xd, int plane, int i, int ir, int ic, int mi_row, int mi_col) { struct macroblockd_plane *const pd = &xd->plane[plane]; MODE_INFO *const mi = xd->mi[0]; const BLOCK_SIZE plane_bsize = get_plane_block_size(mi->mbmi.sb_type, pd); const int width = block_size_wide[plane_bsize]; const int height = block_size_high[plane_bsize]; uint8_t *const dst = &pd->dst.buf[(ir * pd->dst.stride + ic) << 2]; int ref; const int is_compound = has_second_ref(&mi->mbmi); #if CONFIG_GLOBAL_MOTION const int p_col = ((mi_col * MI_SIZE) >> pd->subsampling_x) + 4 * ic; const int p_row = ((mi_row * MI_SIZE) >> pd->subsampling_y) + 4 * ir; int is_global[2]; for (ref = 0; ref < 1 + is_compound; ++ref) { WarpedMotionParams *const wm = &xd->global_motion[mi->mbmi.ref_frame[ref]]; is_global[ref] = is_global_mv_block(mi, i, wm->wmtype); } #endif // CONFIG_GLOBAL_MOTION for (ref = 0; ref < 1 + is_compound; ++ref) { ConvolveParams conv_params = get_conv_params(ref, plane); const uint8_t *pre = &pd->pre[ref].buf[(ir * pd->pre[ref].stride + ic) << 2]; #if CONFIG_AOM_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) av1_highbd_build_inter_predictor( pre, pd->pre[ref].stride, dst, pd->dst.stride, &mi->bmi[i].as_mv[ref].as_mv, &xd->block_refs[ref]->sf, width, height, ref, mi->mbmi.interp_filter, #if CONFIG_GLOBAL_MOTION is_global[ref], p_col, p_row, #endif // CONFIG_GLOBAL_MOTION plane, MV_PRECISION_Q3, mi_col * MI_SIZE + 4 * ic, mi_row * MI_SIZE + 4 * ir, xd); else #endif // CONFIG_AOM_HIGHBITDEPTH av1_build_inter_predictor(pre, pd->pre[ref].stride, dst, pd->dst.stride, &mi->bmi[i].as_mv[ref].as_mv, &xd->block_refs[ref]->sf, width, height, &conv_params, mi->mbmi.interp_filter, #if CONFIG_GLOBAL_MOTION is_global[ref], p_col, p_row, plane, ref, #endif // CONFIG_GLOBAL_MOTION MV_PRECISION_Q3, mi_col * MI_SIZE + 4 * ic, mi_row * MI_SIZE + 4 * ir, xd); } } static void build_inter_predictors_for_planes(MACROBLOCKD *xd, BLOCK_SIZE bsize, int mi_row, int mi_col, int plane_from, int plane_to) { int plane; const int mi_x = mi_col * MI_SIZE; const int mi_y = mi_row * MI_SIZE; #if CONFIG_CB4X4 const int unify_bsize = 1; #else const int unify_bsize = 0; #endif for (plane = plane_from; plane <= plane_to; ++plane) { const struct macroblockd_plane *pd = &xd->plane[plane]; const int bw = pd->width; const int bh = pd->height; #if CONFIG_CB4X4 if (bsize < BLOCK_8X8 && plane && !is_chroma_reference(mi_row, mi_col)) continue; #endif if (xd->mi[0]->mbmi.sb_type < BLOCK_8X8 && !unify_bsize) { const PARTITION_TYPE bp = bsize - xd->mi[0]->mbmi.sb_type; const int have_vsplit = bp != PARTITION_HORZ; const int have_hsplit = bp != PARTITION_VERT; const int num_4x4_w = 2 >> ((!have_vsplit) | pd->subsampling_x); const int num_4x4_h = 2 >> ((!have_hsplit) | pd->subsampling_y); const int pw = 8 >> (have_vsplit | pd->subsampling_x); const int ph = 8 >> (have_hsplit | pd->subsampling_y); int x, y; assert(bp != PARTITION_NONE && bp < PARTITION_TYPES); assert(bsize == BLOCK_8X8); assert(pw * num_4x4_w == bw && ph * num_4x4_h == bh); for (y = 0; y < num_4x4_h; ++y) for (x = 0; x < num_4x4_w; ++x) build_inter_predictors(xd, plane, #if CONFIG_MOTION_VAR 0, 0, #endif // CONFIG_MOTION_VAR y * 2 + x, bw, bh, 4 * x, 4 * y, pw, ph, #if CONFIG_SUPERTX && CONFIG_EXT_INTER 0, 0, #endif // CONFIG_SUPERTX && CONFIG_EXT_INTER mi_x, mi_y); } else { build_inter_predictors(xd, plane, #if CONFIG_MOTION_VAR 0, 0, #endif // CONFIG_MOTION_VAR 0, bw, bh, 0, 0, bw, bh, #if CONFIG_SUPERTX && CONFIG_EXT_INTER 0, 0, #endif // CONFIG_SUPERTX && CONFIG_EXT_INTER mi_x, mi_y); } } } void av1_build_inter_predictors_sby(MACROBLOCKD *xd, int mi_row, int mi_col, BUFFER_SET *ctx, BLOCK_SIZE bsize) { build_inter_predictors_for_planes(xd, bsize, mi_row, mi_col, 0, 0); #if CONFIG_EXT_INTER if (is_interintra_pred(&xd->mi[0]->mbmi)) { BUFFER_SET default_ctx = { { xd->plane[0].dst.buf, NULL, NULL }, { xd->plane[0].dst.stride, 0, 0 } }; if (!ctx) ctx = &default_ctx; av1_build_interintra_predictors_sby(xd, xd->plane[0].dst.buf, xd->plane[0].dst.stride, ctx, bsize); } #else (void)ctx; #endif // CONFIG_EXT_INTER } void av1_build_inter_predictors_sbuv(MACROBLOCKD *xd, int mi_row, int mi_col, BUFFER_SET *ctx, BLOCK_SIZE bsize) { build_inter_predictors_for_planes(xd, bsize, mi_row, mi_col, 1, MAX_MB_PLANE - 1); #if CONFIG_EXT_INTER if (is_interintra_pred(&xd->mi[0]->mbmi)) { BUFFER_SET default_ctx = { { NULL, xd->plane[1].dst.buf, xd->plane[2].dst.buf }, { 0, xd->plane[1].dst.stride, xd->plane[2].dst.stride } }; if (!ctx) ctx = &default_ctx; av1_build_interintra_predictors_sbuv( xd, xd->plane[1].dst.buf, xd->plane[2].dst.buf, xd->plane[1].dst.stride, xd->plane[2].dst.stride, ctx, bsize); } #else (void)ctx; #endif // CONFIG_EXT_INTER } // TODO(afergs): Check if ctx can be made constant void av1_build_inter_predictors_sb(MACROBLOCKD *xd, int mi_row, int mi_col, BUFFER_SET *ctx, BLOCK_SIZE bsize) { build_inter_predictors_for_planes(xd, bsize, mi_row, mi_col, 0, MAX_MB_PLANE - 1); #if CONFIG_EXT_INTER if (is_interintra_pred(&xd->mi[0]->mbmi)) { BUFFER_SET default_ctx = { { xd->plane[0].dst.buf, xd->plane[1].dst.buf, xd->plane[2].dst.buf }, { xd->plane[0].dst.stride, xd->plane[1].dst.stride, xd->plane[2].dst.stride } }; if (!ctx) ctx = &default_ctx; av1_build_interintra_predictors( xd, xd->plane[0].dst.buf, xd->plane[1].dst.buf, xd->plane[2].dst.buf, xd->plane[0].dst.stride, xd->plane[1].dst.stride, xd->plane[2].dst.stride, ctx, bsize); } #else (void)ctx; #endif // CONFIG_EXT_INTER } void av1_setup_dst_planes(struct macroblockd_plane planes[MAX_MB_PLANE], const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col) { uint8_t *const buffers[MAX_MB_PLANE] = { src->y_buffer, src->u_buffer, src->v_buffer }; const int widths[MAX_MB_PLANE] = { src->y_crop_width, src->uv_crop_width, src->uv_crop_width }; const int heights[MAX_MB_PLANE] = { src->y_crop_height, src->uv_crop_height, src->uv_crop_height }; const int strides[MAX_MB_PLANE] = { src->y_stride, src->uv_stride, src->uv_stride }; int i; for (i = 0; i < MAX_MB_PLANE; ++i) { struct macroblockd_plane *const pd = &planes[i]; setup_pred_plane(&pd->dst, buffers[i], widths[i], heights[i], strides[i], mi_row, mi_col, NULL, pd->subsampling_x, pd->subsampling_y); } } void av1_setup_pre_planes(MACROBLOCKD *xd, int idx, const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col, const struct scale_factors *sf) { if (src != NULL) { int i; uint8_t *const buffers[MAX_MB_PLANE] = { src->y_buffer, src->u_buffer, src->v_buffer }; const int widths[MAX_MB_PLANE] = { src->y_crop_width, src->uv_crop_width, src->uv_crop_width }; const int heights[MAX_MB_PLANE] = { src->y_crop_height, src->uv_crop_height, src->uv_crop_height }; const int strides[MAX_MB_PLANE] = { src->y_stride, src->uv_stride, src->uv_stride }; for (i = 0; i < MAX_MB_PLANE; ++i) { struct macroblockd_plane *const pd = &xd->plane[i]; setup_pred_plane(&pd->pre[idx], buffers[i], widths[i], heights[i], strides[i], mi_row, mi_col, sf, pd->subsampling_x, pd->subsampling_y); } } } #if CONFIG_SUPERTX #if CONFIG_CB4X4 static const uint8_t mask_4[4] = { 64, 52, 12, 0 }; static const uint8_t mask_4_uv[4] = { 64, 52, 12, 0 }; #endif // CONFIG_CB4X4 static const uint8_t mask_8[8] = { 64, 64, 62, 52, 12, 2, 0, 0 }; static const uint8_t mask_16[16] = { 63, 62, 60, 58, 55, 50, 43, 36, 28, 21, 14, 9, 6, 4, 2, 1 }; static const uint8_t mask_32[32] = { 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 63, 61, 57, 52, 45, 36, 28, 19, 12, 7, 3, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; static const uint8_t mask_8_uv[8] = { 64, 64, 62, 52, 12, 2, 0, 0 }; static const uint8_t mask_16_uv[16] = { 64, 64, 64, 64, 61, 53, 45, 36, 28, 19, 11, 3, 0, 0, 0, 0 }; static const uint8_t mask_32_uv[32] = { 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 60, 54, 46, 36, 28, 18, 10, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; static const uint8_t *get_supertx_mask(int length, int plane) { switch (length) { #if CONFIG_CB4X4 case 4: return plane ? mask_4_uv : mask_4; #endif // CONFIG_CB4X4 case 8: return plane ? mask_8_uv : mask_8; case 16: return plane ? mask_16_uv : mask_16; case 32: return plane ? mask_32_uv : mask_32; default: assert(0); } return NULL; } void av1_build_masked_inter_predictor_complex( MACROBLOCKD *xd, uint8_t *dst, int dst_stride, const uint8_t *pre, int pre_stride, int mi_row, int mi_col, int mi_row_ori, int mi_col_ori, BLOCK_SIZE bsize, BLOCK_SIZE top_bsize, PARTITION_TYPE partition, int plane) { const struct macroblockd_plane *pd = &xd->plane[plane]; const int ssx = pd->subsampling_x; const int ssy = pd->subsampling_y; const int top_w = block_size_wide[top_bsize] >> ssx; const int top_h = block_size_high[top_bsize] >> ssy; const int w = block_size_wide[bsize] >> ssx; const int h = block_size_high[bsize] >> ssy; const int w_offset = ((mi_col - mi_col_ori) * MI_SIZE) >> ssx; const int h_offset = ((mi_row - mi_row_ori) * MI_SIZE) >> ssy; int w_remain, h_remain; #if CONFIG_AOM_HIGHBITDEPTH const int is_hdb = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0; #endif // CONFIG_AOM_HIGHBITDEPTH assert(bsize <= BLOCK_32X32); assert(IMPLIES(plane == 0, ssx == 0)); assert(IMPLIES(plane == 0, ssy == 0)); switch (partition) { case PARTITION_HORZ: { const uint8_t *const mask = get_supertx_mask(h, ssy); w_remain = top_w; h_remain = top_h - h_offset - h; dst += h_offset * dst_stride; pre += h_offset * pre_stride; #if CONFIG_AOM_HIGHBITDEPTH if (is_hdb) aom_highbd_blend_a64_vmask(dst, dst_stride, dst, dst_stride, pre, pre_stride, mask, h, top_w, xd->bd); else #endif // CONFIG_AOM_HIGHBITDEPTH aom_blend_a64_vmask(dst, dst_stride, dst, dst_stride, pre, pre_stride, mask, h, top_w); dst += h * dst_stride; pre += h * pre_stride; break; } case PARTITION_VERT: { const uint8_t *const mask = get_supertx_mask(w, ssx); w_remain = top_w - w_offset - w; h_remain = top_h; dst += w_offset; pre += w_offset; #if CONFIG_AOM_HIGHBITDEPTH if (is_hdb) aom_highbd_blend_a64_hmask(dst, dst_stride, dst, dst_stride, pre, pre_stride, mask, top_h, w, xd->bd); else #endif // CONFIG_AOM_HIGHBITDEPTH aom_blend_a64_hmask(dst, dst_stride, dst, dst_stride, pre, pre_stride, mask, top_h, w); dst += w; pre += w; break; } default: { assert(0); return; } } if (w_remain == 0 || h_remain == 0) { return; } #if CONFIG_AOM_HIGHBITDEPTH if (is_hdb) { dst = (uint8_t *)CONVERT_TO_SHORTPTR(dst); pre = (const uint8_t *)CONVERT_TO_SHORTPTR(pre); dst_stride *= 2; pre_stride *= 2; w_remain *= 2; } #endif // CONFIG_AOM_HIGHBITDEPTH do { memcpy(dst, pre, w_remain * sizeof(uint8_t)); dst += dst_stride; pre += pre_stride; } while (--h_remain); } void av1_build_inter_predictors_sb_sub8x8_extend(MACROBLOCKD *xd, #if CONFIG_EXT_INTER int mi_row_ori, int mi_col_ori, #endif // CONFIG_EXT_INTER int mi_row, int mi_col, BLOCK_SIZE bsize, int block) { // Prediction function used in supertx: // Use the mv at current block (which is less than 8x8) // to get prediction of a block located at (mi_row, mi_col) at size of bsize // bsize can be larger than 8x8. // block (0-3): the sub8x8 location of current block int plane; const int mi_x = mi_col * MI_SIZE; const int mi_y = mi_row * MI_SIZE; #if CONFIG_EXT_INTER const int wedge_offset_x = (mi_col_ori - mi_col) * MI_SIZE; const int wedge_offset_y = (mi_row_ori - mi_row) * MI_SIZE; #endif // CONFIG_EXT_INTER // For sub8x8 uv: // Skip uv prediction in supertx except the first block (block = 0) int max_plane = block ? 1 : MAX_MB_PLANE; for (plane = 0; plane < max_plane; plane++) { const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, &xd->plane[plane]); const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize]; const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize]; const int bw = 4 * num_4x4_w; const int bh = 4 * num_4x4_h; build_inter_predictors(xd, plane, #if CONFIG_MOTION_VAR 0, 0, #endif // CONFIG_MOTION_VAR block, bw, bh, 0, 0, bw, bh, #if CONFIG_EXT_INTER wedge_offset_x, wedge_offset_y, #endif // CONFIG_EXT_INTER mi_x, mi_y); } #if CONFIG_EXT_INTER if (is_interintra_pred(&xd->mi[0]->mbmi)) { BUFFER_SET ctx = { { xd->plane[0].dst.buf, xd->plane[1].dst.buf, xd->plane[2].dst.buf }, { xd->plane[0].dst.stride, xd->plane[1].dst.stride, xd->plane[2].dst.stride } }; av1_build_interintra_predictors( xd, xd->plane[0].dst.buf, xd->plane[1].dst.buf, xd->plane[2].dst.buf, xd->plane[0].dst.stride, xd->plane[1].dst.stride, xd->plane[2].dst.stride, &ctx, bsize); } #endif // CONFIG_EXT_INTER } void av1_build_inter_predictors_sb_extend(MACROBLOCKD *xd, #if CONFIG_EXT_INTER int mi_row_ori, int mi_col_ori, #endif // CONFIG_EXT_INTER int mi_row, int mi_col, BLOCK_SIZE bsize) { int plane; const int mi_x = mi_col * MI_SIZE; const int mi_y = mi_row * MI_SIZE; #if CONFIG_EXT_INTER const int wedge_offset_x = (mi_col_ori - mi_col) * MI_SIZE; const int wedge_offset_y = (mi_row_ori - mi_row) * MI_SIZE; #endif // CONFIG_EXT_INTER for (plane = 0; plane < MAX_MB_PLANE; ++plane) { const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, &xd->plane[plane]); const int bw = block_size_wide[plane_bsize]; const int bh = block_size_high[plane_bsize]; build_inter_predictors(xd, plane, #if CONFIG_MOTION_VAR 0, 0, #endif // CONFIG_MOTION_VAR 0, bw, bh, 0, 0, bw, bh, #if CONFIG_EXT_INTER wedge_offset_x, wedge_offset_y, #endif // CONFIG_EXT_INTER mi_x, mi_y); } } #endif // CONFIG_SUPERTX #if CONFIG_MOTION_VAR // obmc_mask_N[overlap_position] static const uint8_t obmc_mask_1[1] = { 64 }; static const uint8_t obmc_mask_2[2] = { 45, 64 }; static const uint8_t obmc_mask_4[4] = { 39, 50, 59, 64 }; static const uint8_t obmc_mask_8[8] = { 36, 42, 48, 53, 57, 61, 64, 64 }; static const uint8_t obmc_mask_16[16] = { 34, 37, 40, 43, 46, 49, 52, 54, 56, 58, 60, 61, 64, 64, 64, 64 }; static const uint8_t obmc_mask_32[32] = { 33, 35, 36, 38, 40, 41, 43, 44, 45, 47, 48, 50, 51, 52, 53, 55, 56, 57, 58, 59, 60, 60, 61, 62, 64, 64, 64, 64, 64, 64, 64, 64 }; #if CONFIG_EXT_PARTITION static const uint8_t obmc_mask_64[64] = { 33, 34, 35, 35, 36, 37, 38, 39, 40, 40, 41, 42, 43, 44, 44, 44, 45, 46, 47, 47, 48, 49, 50, 51, 51, 51, 52, 52, 53, 54, 55, 56, 56, 56, 57, 57, 58, 58, 59, 60, 60, 60, 60, 60, 61, 62, 62, 62, 62, 62, 63, 63, 63, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, }; #endif // CONFIG_EXT_PARTITION const uint8_t *av1_get_obmc_mask(int length) { switch (length) { case 1: return obmc_mask_1; case 2: return obmc_mask_2; case 4: return obmc_mask_4; case 8: return obmc_mask_8; case 16: return obmc_mask_16; case 32: return obmc_mask_32; #if CONFIG_EXT_PARTITION case 64: return obmc_mask_64; #endif // CONFIG_EXT_PARTITION default: assert(0); return NULL; } } #if CONFIG_NCOBMC // obmc_mask_flipN[overlap_position] static const uint8_t obmc_mask_flip1[1] = { 55 }; static const uint8_t obmc_mask_flip2[2] = { 62, 45 }; static const uint8_t obmc_mask_flip4[4] = { 64, 59, 50, 39 }; static const uint8_t obmc_mask_flip8[8] = { 64, 63, 61, 57, 53, 48, 42, 36 }; static const uint8_t obmc_mask_flip16[16] = { 64, 64, 64, 63, 61, 60, 58, 56, 54, 52, 49, 46, 43, 40, 37, 34 }; static const uint8_t obmc_mask_flip32[32] = { 64, 64, 64, 64, 64, 63, 63, 62, 62, 61, 60, 60, 59, 58, 57, 56, 55, 53, 52, 51, 50, 48, 47, 45, 44, 43, 41, 40, 38, 36, 35, 33 }; #if CONFIG_EXT_PARTITION static const uint8_t obmc_mask_flip64[64] = { 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 63, 63, 63, 63, 62, 62, 62, 62, 62, 61, 60, 60, 60, 60, 60, 59, 58, 58, 57, 57, 56, 56, 56, 55, 54, 53, 52, 52, 51, 51, 51, 50, 49, 48, 47, 47, 46, 45, 44, 44, 44, 43, 42, 41, 40, 40, 39, 38, 37, 36, 35, 35, 34, 33, }; #endif // CONFIG_EXT_PARTITION const uint8_t *av1_get_obmc_mask_flipped(int length) { switch (length) { case 1: return obmc_mask_flip1; case 2: return obmc_mask_flip2; case 4: return obmc_mask_flip4; case 8: return obmc_mask_flip8; case 16: return obmc_mask_flip16; case 32: return obmc_mask_flip32; #if CONFIG_EXT_PARTITION case 64: return obmc_mask_flip64; #endif // CONFIG_EXT_PARTITION default: assert(0); return NULL; } } #endif // CONFIG_NCOBMC void av1_count_overlappable_neighbors(const AV1_COMMON *cm, MACROBLOCKD *xd, int mi_row, int mi_col) { int i, mi_step; xd->mi[0]->mbmi.overlappable_neighbors[0] = 0; xd->mi[0]->mbmi.overlappable_neighbors[1] = 0; if (xd->up_available) { const int ilimit = AOMMIN(xd->n8_w, cm->mi_cols - mi_col); for (i = 0; i < ilimit; i += mi_step) { int mi_row_offset = -1; int mi_col_offset = i; MODE_INFO *above_mi = xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride]; MB_MODE_INFO *above_mbmi = &above_mi->mbmi; mi_step = AOMMIN(xd->n8_w, mi_size_wide[above_mbmi->sb_type]); if (is_neighbor_overlappable(above_mbmi)) xd->mi[0]->mbmi.overlappable_neighbors[0]++; } } if (xd->left_available) { const int ilimit = AOMMIN(xd->n8_h, cm->mi_rows - mi_row); for (i = 0; i < ilimit; i += mi_step) { int mi_row_offset = i; int mi_col_offset = -1; MODE_INFO *left_mi = xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride]; MB_MODE_INFO *left_mbmi = &left_mi->mbmi; mi_step = AOMMIN(xd->n8_h, mi_size_high[left_mbmi->sb_type]); if (is_neighbor_overlappable(left_mbmi)) xd->mi[0]->mbmi.overlappable_neighbors[1]++; } } } // This function combines motion compensated predictions that is generated by // top/left neighboring blocks' inter predictors with the regular inter // prediction. We assume the original prediction (bmc) is stored in // xd->plane[].dst.buf void av1_build_obmc_inter_prediction(const AV1_COMMON *cm, MACROBLOCKD *xd, int mi_row, int mi_col, uint8_t *above[MAX_MB_PLANE], int above_stride[MAX_MB_PLANE], uint8_t *left[MAX_MB_PLANE], int left_stride[MAX_MB_PLANE]) { const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type; int plane, i; #if CONFIG_AOM_HIGHBITDEPTH const int is_hbd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0; #endif // CONFIG_AOM_HIGHBITDEPTH // handle above row if (xd->up_available) { const int overlap = num_4x4_blocks_high_lookup[bsize] * 2; const int miw = AOMMIN(xd->n8_w, cm->mi_cols - mi_col); const int mi_row_offset = -1; const int neighbor_limit = max_neighbor_obmc[b_width_log2_lookup[bsize]]; int neighbor_count = 0; assert(miw > 0); i = 0; do { // for each mi in the above row const int mi_col_offset = i; const MB_MODE_INFO *const above_mbmi = &xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride]->mbmi; const BLOCK_SIZE a_bsize = above_mbmi->sb_type; const int mi_step = AOMMIN(xd->n8_w, mi_size_wide[a_bsize]); if (is_neighbor_overlappable(above_mbmi)) { neighbor_count++; if (neighbor_count > neighbor_limit) break; for (plane = 0; plane < MAX_MB_PLANE; ++plane) { const struct macroblockd_plane *pd = &xd->plane[plane]; const int bw = (mi_step * MI_SIZE) >> pd->subsampling_x; const int bh = overlap >> pd->subsampling_y; const int dst_stride = pd->dst.stride; uint8_t *const dst = &pd->dst.buf[(i * MI_SIZE) >> pd->subsampling_x]; const int tmp_stride = above_stride[plane]; const uint8_t *const tmp = &above[plane][(i * MI_SIZE) >> pd->subsampling_x]; const uint8_t *const mask = av1_get_obmc_mask(bh); #if CONFIG_AOM_HIGHBITDEPTH if (is_hbd) aom_highbd_blend_a64_vmask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride, mask, bh, bw, xd->bd); else #endif // CONFIG_AOM_HIGHBITDEPTH aom_blend_a64_vmask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride, mask, bh, bw); } } i += mi_step; } while (i < miw); } // handle left column if (xd->left_available) { const int overlap = num_4x4_blocks_wide_lookup[bsize] * 2; const int mih = AOMMIN(xd->n8_h, cm->mi_rows - mi_row); const int mi_col_offset = -1; const int neighbor_limit = max_neighbor_obmc[b_height_log2_lookup[bsize]]; int neighbor_count = 0; assert(mih > 0); i = 0; do { // for each mi in the left column const int mi_row_offset = i; const MB_MODE_INFO *const left_mbmi = &xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride]->mbmi; const BLOCK_SIZE l_bsize = left_mbmi->sb_type; const int mi_step = AOMMIN(xd->n8_h, mi_size_high[l_bsize]); if (is_neighbor_overlappable(left_mbmi)) { neighbor_count++; if (neighbor_count > neighbor_limit) break; for (plane = 0; plane < MAX_MB_PLANE; ++plane) { const struct macroblockd_plane *pd = &xd->plane[plane]; const int bw = overlap >> pd->subsampling_x; const int bh = (mi_step * MI_SIZE) >> pd->subsampling_y; const int dst_stride = pd->dst.stride; uint8_t *const dst = &pd->dst.buf[(i * MI_SIZE * dst_stride) >> pd->subsampling_y]; const int tmp_stride = left_stride[plane]; const uint8_t *const tmp = &left[plane][(i * MI_SIZE * tmp_stride) >> pd->subsampling_y]; const uint8_t *const mask = av1_get_obmc_mask(bw); #if CONFIG_AOM_HIGHBITDEPTH if (is_hbd) aom_highbd_blend_a64_hmask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride, mask, bh, bw, xd->bd); else #endif // CONFIG_AOM_HIGHBITDEPTH aom_blend_a64_hmask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride, mask, bh, bw); } } i += mi_step; } while (i < mih); } } void modify_neighbor_predictor_for_obmc(MB_MODE_INFO *mbmi) { #if CONFIG_EXT_INTER if (is_interintra_pred(mbmi)) { mbmi->ref_frame[1] = NONE_FRAME; } else if (has_second_ref(mbmi) && is_masked_compound_type(mbmi->interinter_compound_data.type)) { mbmi->interinter_compound_data.type = COMPOUND_AVERAGE; mbmi->ref_frame[1] = NONE_FRAME; } #endif // CONFIG_EXT_INTER if (has_second_ref(mbmi)) mbmi->ref_frame[1] = NONE_FRAME; return; } void av1_build_prediction_by_above_preds(const AV1_COMMON *cm, MACROBLOCKD *xd, int mi_row, int mi_col, uint8_t *tmp_buf[MAX_MB_PLANE], int tmp_width[MAX_MB_PLANE], int tmp_height[MAX_MB_PLANE], int tmp_stride[MAX_MB_PLANE]) { const TileInfo *const tile = &xd->tile; BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type; int i, j, mi_step, ref; const int ilimit = AOMMIN(xd->n8_w, cm->mi_cols - mi_col); int mb_to_right_edge_base = xd->mb_to_right_edge; const int neighbor_limit = max_neighbor_obmc[b_width_log2_lookup[bsize]]; int neighbor_count = 0; if (mi_row <= tile->mi_row_start) return; xd->mb_to_bottom_edge += xd->n8_h * 32; for (i = 0; i < ilimit; i += mi_step) { int mi_row_offset = -1; int mi_col_offset = i; int mi_x, mi_y, bw, bh; MODE_INFO *above_mi = xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride]; MB_MODE_INFO *above_mbmi = &above_mi->mbmi; const BLOCK_SIZE a_bsize = above_mbmi->sb_type; MB_MODE_INFO backup_mbmi; mi_step = AOMMIN(xd->n8_w, mi_size_wide[a_bsize]); if (!is_neighbor_overlappable(above_mbmi)) continue; neighbor_count++; if (neighbor_count > neighbor_limit) break; backup_mbmi = *above_mbmi; modify_neighbor_predictor_for_obmc(above_mbmi); for (j = 0; j < MAX_MB_PLANE; ++j) { struct macroblockd_plane *const pd = &xd->plane[j]; setup_pred_plane(&pd->dst, tmp_buf[j], tmp_width[j], tmp_height[j], tmp_stride[j], 0, i, NULL, pd->subsampling_x, pd->subsampling_y); } for (ref = 0; ref < 1 + has_second_ref(above_mbmi); ++ref) { const MV_REFERENCE_FRAME frame = above_mbmi->ref_frame[ref]; const RefBuffer *const ref_buf = &cm->frame_refs[frame - LAST_FRAME]; xd->block_refs[ref] = ref_buf; if ((!av1_is_valid_scale(&ref_buf->sf))) aom_internal_error(xd->error_info, AOM_CODEC_UNSUP_BITSTREAM, "Reference frame has invalid dimensions"); av1_setup_pre_planes(xd, ref, ref_buf->buf, mi_row, mi_col + i, &ref_buf->sf); } xd->mb_to_left_edge = -(((mi_col + i) * MI_SIZE) * 8); xd->mb_to_right_edge = mb_to_right_edge_base + (xd->n8_w - i - mi_step) * 64; mi_x = (mi_col + i) << MI_SIZE_LOG2; mi_y = mi_row << MI_SIZE_LOG2; for (j = 0; j < MAX_MB_PLANE; ++j) { const struct macroblockd_plane *pd = &xd->plane[j]; bw = (mi_step * MI_SIZE) >> pd->subsampling_x; bh = AOMMAX((num_4x4_blocks_high_lookup[bsize] * 2) >> pd->subsampling_y, 4); #if CONFIG_WARPED_MOTION if (above_mbmi->motion_mode == WARPED_CAUSAL) { assert_motion_mode_valid(WARPED_CAUSAL, #if CONFIG_GLOBAL_MOTION && SEPARATE_GLOBAL_MOTION 0, cm->global_motion, #endif // CONFIG_GLOBAL_MOTION && SEPARATE_GLOBAL_MOTION above_mi); av1_warp_plane(&above_mbmi->wm_params[0], #if CONFIG_AOM_HIGHBITDEPTH xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH, xd->bd, #endif // CONFIG_AOM_HIGHBITDEPTH pd->pre[0].buf0, pd->pre[0].width, pd->pre[0].height, pd->pre[0].stride, pd->dst.buf, (((mi_col + i) * MI_SIZE) >> pd->subsampling_x), ((mi_row * MI_SIZE) >> pd->subsampling_y), bw, bh, pd->dst.stride, pd->subsampling_x, pd->subsampling_y, 16, 16, 0); } else { #endif // CONFIG_WARPED_MOTION build_inter_predictors(xd, j, mi_col_offset, mi_row_offset, 0, bw, bh, 0, 0, bw, bh, #if CONFIG_SUPERTX && CONFIG_EXT_INTER 0, 0, #endif // CONFIG_SUPERTX && CONFIG_EXT_INTER mi_x, mi_y); #if CONFIG_WARPED_MOTION } #endif // CONFIG_WARPED_MOTION } *above_mbmi = backup_mbmi; } xd->mb_to_left_edge = -((mi_col * MI_SIZE) * 8); xd->mb_to_right_edge = mb_to_right_edge_base; xd->mb_to_bottom_edge -= xd->n8_h * 32; } void av1_build_prediction_by_left_preds(const AV1_COMMON *cm, MACROBLOCKD *xd, int mi_row, int mi_col, uint8_t *tmp_buf[MAX_MB_PLANE], int tmp_width[MAX_MB_PLANE], int tmp_height[MAX_MB_PLANE], int tmp_stride[MAX_MB_PLANE]) { const TileInfo *const tile = &xd->tile; BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type; int i, j, mi_step, ref; const int ilimit = AOMMIN(xd->n8_h, cm->mi_rows - mi_row); int mb_to_bottom_edge_base = xd->mb_to_bottom_edge; const int neighbor_limit = max_neighbor_obmc[b_height_log2_lookup[bsize]]; int neighbor_count = 0; if (mi_col == 0 || (mi_col - 1 < tile->mi_col_start)) return; xd->mb_to_right_edge += xd->n8_w * 32; for (i = 0; i < ilimit; i += mi_step) { int mi_row_offset = i; int mi_col_offset = -1; int mi_x, mi_y, bw, bh; MODE_INFO *left_mi = xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride]; MB_MODE_INFO *left_mbmi = &left_mi->mbmi; const BLOCK_SIZE l_bsize = left_mbmi->sb_type; MB_MODE_INFO backup_mbmi; mi_step = AOMMIN(xd->n8_h, mi_size_high[l_bsize]); if (!is_neighbor_overlappable(left_mbmi)) continue; neighbor_count++; if (neighbor_count > neighbor_limit) break; backup_mbmi = *left_mbmi; modify_neighbor_predictor_for_obmc(left_mbmi); for (j = 0; j < MAX_MB_PLANE; ++j) { struct macroblockd_plane *const pd = &xd->plane[j]; setup_pred_plane(&pd->dst, tmp_buf[j], tmp_width[j], tmp_height[j], tmp_stride[j], i, 0, NULL, pd->subsampling_x, pd->subsampling_y); } for (ref = 0; ref < 1 + has_second_ref(left_mbmi); ++ref) { const MV_REFERENCE_FRAME frame = left_mbmi->ref_frame[ref]; const RefBuffer *const ref_buf = &cm->frame_refs[frame - LAST_FRAME]; xd->block_refs[ref] = ref_buf; if ((!av1_is_valid_scale(&ref_buf->sf))) aom_internal_error(xd->error_info, AOM_CODEC_UNSUP_BITSTREAM, "Reference frame has invalid dimensions"); av1_setup_pre_planes(xd, ref, ref_buf->buf, mi_row + i, mi_col, &ref_buf->sf); } xd->mb_to_top_edge = -(((mi_row + i) * MI_SIZE) * 8); xd->mb_to_bottom_edge = mb_to_bottom_edge_base + (xd->n8_h - i - mi_step) * 64; mi_x = mi_col << MI_SIZE_LOG2; mi_y = (mi_row + i) << MI_SIZE_LOG2; for (j = 0; j < MAX_MB_PLANE; ++j) { const struct macroblockd_plane *pd = &xd->plane[j]; bw = AOMMAX((num_4x4_blocks_wide_lookup[bsize] * 2) >> pd->subsampling_x, 4); bh = (mi_step << MI_SIZE_LOG2) >> pd->subsampling_y; #if CONFIG_WARPED_MOTION if (left_mbmi->motion_mode == WARPED_CAUSAL) { assert_motion_mode_valid(WARPED_CAUSAL, #if CONFIG_GLOBAL_MOTION && SEPARATE_GLOBAL_MOTION 0, cm->global_motion, #endif // CONFIG_GLOBAL_MOTION && SEPARATE_GLOBAL_MOTION left_mi); av1_warp_plane(&left_mbmi->wm_params[0], #if CONFIG_AOM_HIGHBITDEPTH xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH, xd->bd, #endif // CONFIG_AOM_HIGHBITDEPTH pd->pre[0].buf0, pd->pre[0].width, pd->pre[0].height, pd->pre[0].stride, pd->dst.buf, ((mi_col * MI_SIZE) >> pd->subsampling_x), (((mi_row + i) * MI_SIZE) >> pd->subsampling_y), bw, bh, pd->dst.stride, pd->subsampling_x, pd->subsampling_y, 16, 16, 0); } else { #endif // CONFIG_WARPED_MOTION build_inter_predictors(xd, j, mi_col_offset, mi_row_offset, 0, bw, bh, 0, 0, bw, bh, #if CONFIG_SUPERTX && CONFIG_EXT_INTER 0, 0, #endif // CONFIG_SUPERTX && CONFIG_EXT_INTER mi_x, mi_y); #if CONFIG_WARPED_MOTION } #endif // CONFIG_WARPED_MOTION } *left_mbmi = backup_mbmi; } xd->mb_to_top_edge = -((mi_row * MI_SIZE) * 8); xd->mb_to_bottom_edge = mb_to_bottom_edge_base; xd->mb_to_right_edge -= xd->n8_w * 32; } void av1_build_obmc_inter_predictors_sb(const AV1_COMMON *cm, MACROBLOCKD *xd, int mi_row, int mi_col) { #if CONFIG_AOM_HIGHBITDEPTH DECLARE_ALIGNED(16, uint8_t, tmp_buf1[2 * MAX_MB_PLANE * MAX_SB_SQUARE]); DECLARE_ALIGNED(16, uint8_t, tmp_buf2[2 * MAX_MB_PLANE * MAX_SB_SQUARE]); #else DECLARE_ALIGNED(16, uint8_t, tmp_buf1[MAX_MB_PLANE * MAX_SB_SQUARE]); DECLARE_ALIGNED(16, uint8_t, tmp_buf2[MAX_MB_PLANE * MAX_SB_SQUARE]); #endif // CONFIG_AOM_HIGHBITDEPTH uint8_t *dst_buf1[MAX_MB_PLANE], *dst_buf2[MAX_MB_PLANE]; int dst_stride1[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE }; int dst_stride2[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE }; int dst_width1[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE }; int dst_width2[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE }; int dst_height1[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE }; int dst_height2[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE }; #if CONFIG_AOM_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { int len = sizeof(uint16_t); dst_buf1[0] = CONVERT_TO_BYTEPTR(tmp_buf1); dst_buf1[1] = CONVERT_TO_BYTEPTR(tmp_buf1 + MAX_SB_SQUARE * len); dst_buf1[2] = CONVERT_TO_BYTEPTR(tmp_buf1 + MAX_SB_SQUARE * 2 * len); dst_buf2[0] = CONVERT_TO_BYTEPTR(tmp_buf2); dst_buf2[1] = CONVERT_TO_BYTEPTR(tmp_buf2 + MAX_SB_SQUARE * len); dst_buf2[2] = CONVERT_TO_BYTEPTR(tmp_buf2 + MAX_SB_SQUARE * 2 * len); } else { #endif // CONFIG_AOM_HIGHBITDEPTH dst_buf1[0] = tmp_buf1; dst_buf1[1] = tmp_buf1 + MAX_SB_SQUARE; dst_buf1[2] = tmp_buf1 + MAX_SB_SQUARE * 2; dst_buf2[0] = tmp_buf2; dst_buf2[1] = tmp_buf2 + MAX_SB_SQUARE; dst_buf2[2] = tmp_buf2 + MAX_SB_SQUARE * 2; #if CONFIG_AOM_HIGHBITDEPTH } #endif // CONFIG_AOM_HIGHBITDEPTH av1_build_prediction_by_above_preds(cm, xd, mi_row, mi_col, dst_buf1, dst_width1, dst_height1, dst_stride1); av1_build_prediction_by_left_preds(cm, xd, mi_row, mi_col, dst_buf2, dst_width2, dst_height2, dst_stride2); av1_setup_dst_planes(xd->plane, get_frame_new_buffer(cm), mi_row, mi_col); av1_build_obmc_inter_prediction(cm, xd, mi_row, mi_col, dst_buf1, dst_stride1, dst_buf2, dst_stride2); } #if CONFIG_NCOBMC void av1_build_prediction_by_bottom_preds(const AV1_COMMON *cm, MACROBLOCKD *xd, int mi_row, int mi_col, uint8_t *tmp_buf[MAX_MB_PLANE], int tmp_width[MAX_MB_PLANE], int tmp_height[MAX_MB_PLANE], int tmp_stride[MAX_MB_PLANE]) { const TileInfo *const tile = &xd->tile; BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type; int i, j, mi_step, ref; const int ilimit = AOMMIN(xd->n8_w, cm->mi_cols - mi_col); int mb_to_right_edge_base = xd->mb_to_right_edge; if (mi_row + xd->n8_h >= tile->mi_row_end || (mi_row + xd->n8_h) % MI_SIZE == 0 || (mi_row + xd->n8_h) >= cm->mi_rows) return; assert(bsize >= BLOCK_8X8); xd->mb_to_top_edge -= xd->n8_h * 32; for (i = 0; i < ilimit; i += mi_step) { int mi_row_offset = xd->n8_h; int mi_col_offset = i; int mi_x, mi_y, bw, bh; MODE_INFO *mi = xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride]; MB_MODE_INFO *mbmi = &mi->mbmi; #if CONFIG_EXT_INTER MB_MODE_INFO backup_mbmi; #endif // CONFIG_EXT_INTER mi_step = AOMMIN(xd->n8_w, mi_size_wide[mbmi->sb_type]); if (!is_neighbor_overlappable(mbmi)) continue; #if CONFIG_EXT_INTER backup_mbmi = *mbmi; modify_neighbor_predictor_for_obmc(mbmi); #endif // CONFIG_EXT_INTER for (j = 0; j < MAX_MB_PLANE; ++j) { struct macroblockd_plane *const pd = &xd->plane[j]; setup_pred_plane(&pd->dst, tmp_buf[j], tmp_width[j], tmp_height[j], tmp_stride[j], (xd->n8_h >> 1), i, NULL, pd->subsampling_x, pd->subsampling_y); } for (ref = 0; ref < 1 + has_second_ref(mbmi); ++ref) { const MV_REFERENCE_FRAME frame = mbmi->ref_frame[ref]; const RefBuffer *const ref_buf = &cm->frame_refs[frame - LAST_FRAME]; xd->block_refs[ref] = ref_buf; if ((!av1_is_valid_scale(&ref_buf->sf))) aom_internal_error(xd->error_info, AOM_CODEC_UNSUP_BITSTREAM, "Reference frame has invalid dimensions"); av1_setup_pre_planes(xd, ref, ref_buf->buf, mi_row + (xd->n8_h >> 1), mi_col + i, &ref_buf->sf); } xd->mb_to_left_edge = -(((mi_col + i) * MI_SIZE) * 8); xd->mb_to_right_edge = mb_to_right_edge_base + (xd->n8_w - i - mi_step) * 64; mi_x = (mi_col + i) << MI_SIZE_LOG2; mi_y = (mi_row << MI_SIZE_LOG2) + xd->n8_h * 4; for (j = 0; j < MAX_MB_PLANE; ++j) { const struct macroblockd_plane *pd = &xd->plane[j]; bw = (mi_step << MI_SIZE_LOG2) >> pd->subsampling_x; bh = (num_4x4_blocks_high_lookup[bsize] << 1) >> pd->subsampling_y; if (mbmi->sb_type < BLOCK_8X8 && !CONFIG_CB4X4) { const PARTITION_TYPE bp = BLOCK_8X8 - mbmi->sb_type; const int have_vsplit = bp != PARTITION_HORZ; const int have_hsplit = bp != PARTITION_VERT; const int num_4x4_w = 2 >> (!have_vsplit); const int num_4x4_h = 2 >> (!have_hsplit); const int pw = 8 >> (have_vsplit + pd->subsampling_x); int x, y; for (y = 0; y < num_4x4_h; ++y) for (x = 0; x < num_4x4_w; ++x) { if ((bp == PARTITION_HORZ || bp == PARTITION_SPLIT) && y != 0) continue; build_inter_predictors( xd, j, mi_col_offset, mi_row_offset, y * 2 + x, bw, bh, (4 * x) >> pd->subsampling_x, xd->n8_h == 1 ? (4 >> pd->subsampling_y) : 0, pw, bh, #if CONFIG_SUPERTX && CONFIG_EXT_INTER 0, 0, #endif // CONFIG_SUPERTX && CONFIG_EXT_INTER mi_x, mi_y); } } else { #if CONFIG_WARPED_MOTION if (mbmi->motion_mode == WARPED_CAUSAL) { assert_motion_mode_valid(WARPED_CAUSAL, #if CONFIG_GLOBAL_MOTION && SEPARATE_GLOBAL_MOTION 0, cm->global_motion, #endif // CONFIG_GLOBAL_MOTION && SEPARATE_GLOBAL_MOTION mi); av1_warp_plane(&mbmi->wm_params[0], #if CONFIG_AOM_HIGHBITDEPTH xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH, xd->bd, #endif // CONFIG_AOM_HIGHBITDEPTH pd->pre[0].buf0, pd->pre[0].width, pd->pre[0].height, pd->pre[0].stride, pd->dst.buf, (((mi_col + i) * MI_SIZE) >> pd->subsampling_x), ((mi_row * MI_SIZE) >> pd->subsampling_y), bw, bh, pd->dst.stride, pd->subsampling_x, pd->subsampling_y, 16, 16, 0); } else { #endif // CONFIG_WARPED_MOTION build_inter_predictors( xd, j, mi_col_offset, mi_row_offset, 0, bw, bh, 0, xd->n8_h == 1 ? (4 >> pd->subsampling_y) : 0, bw, bh, #if CONFIG_SUPERTX && CONFIG_EXT_INTER 0, 0, #endif // CONFIG_SUPERTX && CONFIG_EXT_INTER mi_x, mi_y); #if CONFIG_WARPED_MOTION } #endif // CONFIG_WARPED_MOTION } } #if CONFIG_EXT_INTER *mbmi = backup_mbmi; #endif // CONFIG_EXT_INTER } xd->mb_to_left_edge = -((mi_col * MI_SIZE) * 8); xd->mb_to_right_edge = mb_to_right_edge_base; xd->mb_to_top_edge += xd->n8_h * 32; } void av1_build_prediction_by_right_preds(const AV1_COMMON *cm, MACROBLOCKD *xd, int mi_row, int mi_col, uint8_t *tmp_buf[MAX_MB_PLANE], int tmp_width[MAX_MB_PLANE], int tmp_height[MAX_MB_PLANE], const int tmp_stride[MAX_MB_PLANE]) { const TileInfo *const tile = &xd->tile; BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type; int i, j, mi_step, ref; const int ilimit = AOMMIN(xd->n8_h, cm->mi_rows - mi_row); int mb_to_bottom_edge_base = xd->mb_to_bottom_edge; if (mi_col + xd->n8_w >= tile->mi_col_end || (mi_col + xd->n8_w) % MI_SIZE == 0 || (mi_col + xd->n8_w) >= cm->mi_cols) return; xd->mb_to_left_edge -= xd->n8_w * 32; for (i = 0; i < ilimit; i += mi_step) { int mi_row_offset = i; int mi_col_offset = xd->n8_w; int mi_x, mi_y, bw, bh; MODE_INFO *mi = xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride]; MB_MODE_INFO *mbmi = &mi->mbmi; #if CONFIG_EXT_INTER MB_MODE_INFO backup_mbmi; #endif // CONFIG_EXT_INTER mi_step = AOMMIN(xd->n8_h, mi_size_high[mbmi->sb_type]); if (!is_neighbor_overlappable(mbmi)) continue; #if CONFIG_EXT_INTER backup_mbmi = *mbmi; modify_neighbor_predictor_for_obmc(mbmi); #endif // CONFIG_EXT_INTER for (j = 0; j < MAX_MB_PLANE; ++j) { struct macroblockd_plane *const pd = &xd->plane[j]; setup_pred_plane(&pd->dst, tmp_buf[j], tmp_width[j], tmp_height[j], tmp_stride[j], i, xd->n8_w >> 1, NULL, pd->subsampling_x, pd->subsampling_y); } for (ref = 0; ref < 1 + has_second_ref(mbmi); ++ref) { const MV_REFERENCE_FRAME frame = mbmi->ref_frame[ref]; const RefBuffer *const ref_buf = &cm->frame_refs[frame - LAST_FRAME]; xd->block_refs[ref] = ref_buf; if ((!av1_is_valid_scale(&ref_buf->sf))) aom_internal_error(xd->error_info, AOM_CODEC_UNSUP_BITSTREAM, "Reference frame has invalid dimensions"); av1_setup_pre_planes(xd, ref, ref_buf->buf, mi_row + i, mi_col + (xd->n8_w >> 1), &ref_buf->sf); } xd->mb_to_top_edge = -(((mi_row + i) * MI_SIZE) * 8); xd->mb_to_bottom_edge = mb_to_bottom_edge_base + (xd->n8_h - i - mi_step) * 64; mi_x = (mi_col << MI_SIZE_LOG2) + xd->n8_w * 4; mi_y = (mi_row + i) << MI_SIZE_LOG2; for (j = 0; j < MAX_MB_PLANE; ++j) { const struct macroblockd_plane *pd = &xd->plane[j]; bw = (num_4x4_blocks_wide_lookup[bsize] << 1) >> pd->subsampling_x; bh = (mi_step << MI_SIZE_LOG2) >> pd->subsampling_y; if (mbmi->sb_type < BLOCK_8X8 && !CONFIG_CB4X4) { const PARTITION_TYPE bp = BLOCK_8X8 - mbmi->sb_type; const int have_vsplit = bp != PARTITION_HORZ; const int have_hsplit = bp != PARTITION_VERT; const int num_4x4_w = 2 >> (!have_vsplit); const int num_4x4_h = 2 >> (!have_hsplit); const int ph = 8 >> (have_hsplit + pd->subsampling_y); int x, y; for (y = 0; y < num_4x4_h; ++y) for (x = 0; x < num_4x4_w; ++x) { if ((bp == PARTITION_VERT || bp == PARTITION_SPLIT) && x != 0) continue; build_inter_predictors(xd, j, mi_col_offset, mi_row_offset, y * 2 + x, bw, bh, xd->n8_w == 1 ? 4 >> pd->subsampling_x : 0, (4 * y) >> pd->subsampling_y, bw, ph, #if CONFIG_SUPERTX && CONFIG_EXT_INTER 0, 0, #endif // CONFIG_SUPERTX && CONFIG_EXT_INTER mi_x, mi_y); } } else { #if CONFIG_WARPED_MOTION if (mbmi->motion_mode == WARPED_CAUSAL) { assert_motion_mode_valid(WARPED_CAUSAL, #if CONFIG_GLOBAL_MOTION && SEPARATE_GLOBAL_MOTION 0, cm->global_motion, #endif // CONFIG_GLOBAL_MOTION && SEPARATE_GLOBAL_MOTION mi); av1_warp_plane(&mbmi->wm_params[0], #if CONFIG_AOM_HIGHBITDEPTH xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH, xd->bd, #endif // CONFIG_AOM_HIGHBITDEPTH pd->pre[0].buf0, pd->pre[0].width, pd->pre[0].height, pd->pre[0].stride, pd->dst.buf, ((mi_col * MI_SIZE) >> pd->subsampling_x), (((mi_row + i) * MI_SIZE) >> pd->subsampling_y), bw, bh, pd->dst.stride, pd->subsampling_x, pd->subsampling_y, 16, 16, 0); } else { #endif // CONFIG_WARPED_MOTION build_inter_predictors(xd, j, mi_col_offset, mi_row_offset, 0, bw, bh, xd->n8_w == 1 ? 4 >> pd->subsampling_x : 0, 0, bw, bh, #if CONFIG_SUPERTX && CONFIG_EXT_INTER 0, 0, #endif // CONFIG_SUPERTX && CONFIG_EXT_INTER mi_x, mi_y); #if CONFIG_WARPED_MOTION } #endif // CONFIG_WARPED_MOTION } } #if CONFIG_EXT_INTER *mbmi = backup_mbmi; #endif // CONFIG_EXT_INTER } xd->mb_to_top_edge = -((mi_row * MI_SIZE) * 8); xd->mb_to_bottom_edge = mb_to_bottom_edge_base; xd->mb_to_left_edge += xd->n8_w * 32; } // This function combines motion compensated predictions that is generated by // bottom/right neighboring blocks' inter predictors with prediction in dst // buffer. void av1_merge_dst_bottom_right_preds(const AV1_COMMON *cm, MACROBLOCKD *xd, int mi_row, int mi_col, uint8_t *bottom[MAX_MB_PLANE], const int bottom_stride[MAX_MB_PLANE], uint8_t *right[MAX_MB_PLANE], const int right_stride[MAX_MB_PLANE]) { const TileInfo *const tile = &xd->tile; BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type; int plane, i, mi_step; const int bottom_available = mi_row + xd->n8_h < tile->mi_row_end && (mi_row + xd->n8_h) % MI_SIZE != 0 && (mi_row + xd->n8_h) < cm->mi_rows; #if CONFIG_AOM_HIGHBITDEPTH int is_hbd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0; #endif // CONFIG_AOM_HIGHBITDEPTH // handle bottom row for (i = 0; bottom_available && i < AOMMIN(xd->n8_w, cm->mi_cols - mi_col); i += mi_step) { int mi_row_offset = xd->n8_h; int mi_col_offset = i; MODE_INFO *mi = xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride]; MB_MODE_INFO *mbmi = &mi->mbmi; int overlap; mi_step = AOMMIN(xd->n8_w, mi_size_wide[mbmi->sb_type]); if (!is_neighbor_overlappable(mbmi)) continue; overlap = num_4x4_blocks_high_lookup[bsize] << 1; for (plane = 0; plane < MAX_MB_PLANE; ++plane) { const struct macroblockd_plane *pd = &xd->plane[plane]; const int bw = (mi_step * MI_SIZE) >> pd->subsampling_x; const int bh = overlap >> pd->subsampling_y; const int dst_stride = pd->dst.stride; uint8_t *dst = &pd->dst.buf[((i * MI_SIZE) >> pd->subsampling_x) + (((xd->n8_h * MI_SIZE - overlap) * dst_stride) >> pd->subsampling_y)]; const int tmp_stride = bottom_stride[plane]; const uint8_t *const tmp = &bottom[plane][((i * MI_SIZE) >> pd->subsampling_x) + (((xd->n8_h * MI_SIZE - overlap) * tmp_stride) >> pd->subsampling_y)]; const uint8_t *const mask = av1_get_obmc_mask_flipped(bh); #if CONFIG_AOM_HIGHBITDEPTH if (is_hbd) aom_highbd_blend_a64_vmask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride, mask, bh, bw, xd->bd); else #endif // CONFIG_AOM_HIGHBITDEPTH aom_blend_a64_vmask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride, mask, bh, bw); } } // each mi in the bottom row // handle right column if (mi_col + xd->n8_w >= tile->mi_col_end || (mi_col + xd->n8_w) % MI_SIZE == 0 || (mi_col + xd->n8_w) >= cm->mi_cols) return; for (i = 0; i < AOMMIN(xd->n8_h, cm->mi_rows - mi_row); i += mi_step) { int mi_row_offset = i; int mi_col_offset = xd->n8_w; int overlap; MODE_INFO *mi = xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride]; MB_MODE_INFO *mbmi = &mi->mbmi; mi_step = AOMMIN(xd->n8_h, mi_size_high[mbmi->sb_type]); if (!is_neighbor_overlappable(mbmi)) continue; overlap = num_4x4_blocks_wide_lookup[bsize] << 1; for (plane = 0; plane < MAX_MB_PLANE; ++plane) { const struct macroblockd_plane *pd = &xd->plane[plane]; const int bw = overlap >> pd->subsampling_x; const int bh = (mi_step * MI_SIZE) >> pd->subsampling_y; const int dst_stride = pd->dst.stride; uint8_t *dst = &pd->dst.buf[((i * MI_SIZE * dst_stride) >> pd->subsampling_y) + ((xd->n8_w * MI_SIZE - overlap) >> pd->subsampling_x)]; const int tmp_stride = right_stride[plane]; const uint8_t *const tmp = &right[plane][((i * MI_SIZE * tmp_stride) >> pd->subsampling_y) + ((xd->n8_w * MI_SIZE - overlap) >> pd->subsampling_x)]; const uint8_t *const mask = av1_get_obmc_mask_flipped(bw); #if CONFIG_AOM_HIGHBITDEPTH if (is_hbd) aom_highbd_blend_a64_hmask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride, mask, bh, bw, xd->bd); else #endif // CONFIG_AOM_HIGHBITDEPTH aom_blend_a64_hmask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride, mask, bh, bw); } } // each mi in the right column } // This function generates 4 sided obmc. (1) Prediction blocks generated by // bottom and right motion vectors are calculated. (2) Combine them with the // original prediction block (which should be pre-stored in xd->plane[].dst.buf // before calling this function). The results is updated in xd->plane[].dst.buf // (3) Call causal obmc prediction function, which will generate left and above // preds, and then merge them and xd->plane[].dst.buf. void av1_build_ncobmc_inter_predictors_sb(const AV1_COMMON *cm, MACROBLOCKD *xd, int mi_row, int mi_col) { #if CONFIG_AOM_HIGHBITDEPTH DECLARE_ALIGNED(16, uint8_t, tmp_buf1[2 * MAX_MB_PLANE * MAX_SB_SQUARE]); DECLARE_ALIGNED(16, uint8_t, tmp_buf2[2 * MAX_MB_PLANE * MAX_SB_SQUARE]); #else DECLARE_ALIGNED(16, uint8_t, tmp_buf1[MAX_MB_PLANE * MAX_SB_SQUARE]); DECLARE_ALIGNED(16, uint8_t, tmp_buf2[MAX_MB_PLANE * MAX_SB_SQUARE]); #endif // CONFIG_AOM_HIGHBITDEPTH uint8_t *dst_buf1[MAX_MB_PLANE], *dst_buf2[MAX_MB_PLANE]; int dst_stride1[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE }; int dst_stride2[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE }; int dst_width1[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE }; int dst_width2[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE }; int dst_height1[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE }; int dst_height2[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE }; #if CONFIG_AOM_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { int len = sizeof(uint16_t); dst_buf1[0] = CONVERT_TO_BYTEPTR(tmp_buf1); dst_buf1[1] = CONVERT_TO_BYTEPTR(tmp_buf1 + MAX_SB_SQUARE * len); dst_buf1[2] = CONVERT_TO_BYTEPTR(tmp_buf1 + MAX_SB_SQUARE * 2 * len); dst_buf2[0] = CONVERT_TO_BYTEPTR(tmp_buf2); dst_buf2[1] = CONVERT_TO_BYTEPTR(tmp_buf2 + MAX_SB_SQUARE * len); dst_buf2[2] = CONVERT_TO_BYTEPTR(tmp_buf2 + MAX_SB_SQUARE * 2 * len); } else { #endif // CONFIG_AOM_HIGHBITDEPTH dst_buf1[0] = tmp_buf1; dst_buf1[1] = tmp_buf1 + MAX_SB_SQUARE; dst_buf1[2] = tmp_buf1 + MAX_SB_SQUARE * 2; dst_buf2[0] = tmp_buf2; dst_buf2[1] = tmp_buf2 + MAX_SB_SQUARE; dst_buf2[2] = tmp_buf2 + MAX_SB_SQUARE * 2; #if CONFIG_AOM_HIGHBITDEPTH } #endif // CONFIG_AOM_HIGHBITDEPTH av1_build_prediction_by_bottom_preds(cm, xd, mi_row, mi_col, dst_buf1, dst_width1, dst_height1, dst_stride1); av1_build_prediction_by_right_preds(cm, xd, mi_row, mi_col, dst_buf2, dst_width2, dst_height2, dst_stride2); av1_setup_dst_planes(xd->plane, get_frame_new_buffer(cm), mi_row, mi_col); av1_merge_dst_bottom_right_preds(cm, xd, mi_row, mi_col, dst_buf1, dst_stride1, dst_buf2, dst_stride2); av1_setup_dst_planes(xd->plane, get_frame_new_buffer(cm), mi_row, mi_col); av1_build_obmc_inter_predictors_sb(cm, xd, mi_row, mi_col); av1_setup_dst_planes(xd->plane, get_frame_new_buffer(cm), mi_row, mi_col); } #endif // CONFIG_NCOBMC #endif // CONFIG_MOTION_VAR #if CONFIG_EXT_INTER /* clang-format off */ #if CONFIG_EXT_PARTITION static const int ii_weights1d[MAX_SB_SIZE] = { 102, 100, 97, 95, 92, 90, 88, 86, 84, 82, 80, 78, 76, 74, 73, 71, 69, 68, 67, 65, 64, 62, 61, 60, 59, 58, 57, 55, 54, 53, 52, 52, 51, 50, 49, 48, 47, 47, 46, 45, 45, 44, 43, 43, 42, 41, 41, 40, 40, 39, 39, 38, 38, 38, 37, 37, 36, 36, 36, 35, 35, 35, 34, 34, 34, 33, 33, 33, 33, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 30, 30, 30, 30, 30, 30, 30, 29, 29, 29, 29, 29, 29, 29, 29, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, }; static int ii_size_scales[BLOCK_SIZES] = { #if CONFIG_CB4X4 32, 32, 32, #endif 32, 16, 16, 16, 8, 8, 8, 4, 4, 4, 2, 2, 2, 1, 1, 1, }; #else static const int ii_weights1d[MAX_SB_SIZE] = { 102, 100, 97, 95, 92, 90, 88, 86, 84, 82, 80, 78, 76, 74, 73, 71, 69, 68, 67, 65, 64, 62, 61, 60, 59, 58, 57, 55, 54, 53, 52, 52, 51, 50, 49, 48, 47, 47, 46, 45, 45, 44, 43, 43, 42, 41, 41, 40, 40, 39, 39, 38, 38, 38, 37, 37, 36, 36, 36, 35, 35, 35, 34, 34, }; static int ii_size_scales[BLOCK_SIZES] = { #if CONFIG_CB4X4 16, 16, 16, #endif 16, 8, 8, 8, 4, 4, 4, 2, 2, 2, 1, 1, 1, }; /* clang-format on */ #endif // CONFIG_EXT_PARTITION static void combine_interintra(INTERINTRA_MODE mode, int use_wedge_interintra, int wedge_index, int wedge_sign, BLOCK_SIZE bsize, BLOCK_SIZE plane_bsize, uint8_t *comppred, int compstride, const uint8_t *interpred, int interstride, const uint8_t *intrapred, int intrastride) { const int bw = block_size_wide[plane_bsize]; const int bh = block_size_high[plane_bsize]; const int size_scale = ii_size_scales[plane_bsize]; int i, j; if (use_wedge_interintra) { if (is_interintra_wedge_used(bsize)) { const uint8_t *mask = av1_get_contiguous_soft_mask(wedge_index, wedge_sign, bsize); const int subw = 2 * num_4x4_blocks_wide_lookup[bsize] == bw; const int subh = 2 * num_4x4_blocks_high_lookup[bsize] == bh; aom_blend_a64_mask(comppred, compstride, intrapred, intrastride, interpred, interstride, mask, block_size_wide[bsize], bh, bw, subh, subw); } return; } switch (mode) { case II_V_PRED: for (i = 0; i < bh; ++i) { for (j = 0; j < bw; ++j) { int scale = ii_weights1d[i * size_scale]; comppred[i * compstride + j] = AOM_BLEND_A256(scale, intrapred[i * intrastride + j], interpred[i * interstride + j]); } } break; case II_H_PRED: for (i = 0; i < bh; ++i) { for (j = 0; j < bw; ++j) { int scale = ii_weights1d[j * size_scale]; comppred[i * compstride + j] = AOM_BLEND_A256(scale, intrapred[i * intrastride + j], interpred[i * interstride + j]); } } break; case II_D63_PRED: case II_D117_PRED: for (i = 0; i < bh; ++i) { for (j = 0; j < bw; ++j) { int scale = (ii_weights1d[i * size_scale] * 3 + ii_weights1d[j * size_scale]) >> 2; comppred[i * compstride + j] = AOM_BLEND_A256(scale, intrapred[i * intrastride + j], interpred[i * interstride + j]); } } break; case II_D207_PRED: case II_D153_PRED: for (i = 0; i < bh; ++i) { for (j = 0; j < bw; ++j) { int scale = (ii_weights1d[j * size_scale] * 3 + ii_weights1d[i * size_scale]) >> 2; comppred[i * compstride + j] = AOM_BLEND_A256(scale, intrapred[i * intrastride + j], interpred[i * interstride + j]); } } break; case II_D135_PRED: for (i = 0; i < bh; ++i) { for (j = 0; j < bw; ++j) { int scale = ii_weights1d[(i < j ? i : j) * size_scale]; comppred[i * compstride + j] = AOM_BLEND_A256(scale, intrapred[i * intrastride + j], interpred[i * interstride + j]); } } break; case II_D45_PRED: for (i = 0; i < bh; ++i) { for (j = 0; j < bw; ++j) { int scale = (ii_weights1d[i * size_scale] + ii_weights1d[j * size_scale]) >> 1; comppred[i * compstride + j] = AOM_BLEND_A256(scale, intrapred[i * intrastride + j], interpred[i * interstride + j]); } } break; case II_TM_PRED: case II_DC_PRED: default: for (i = 0; i < bh; ++i) { for (j = 0; j < bw; ++j) { comppred[i * compstride + j] = AOM_BLEND_AVG( intrapred[i * intrastride + j], interpred[i * interstride + j]); } } break; } } #if CONFIG_AOM_HIGHBITDEPTH static void combine_interintra_highbd( INTERINTRA_MODE mode, int use_wedge_interintra, int wedge_index, int wedge_sign, BLOCK_SIZE bsize, BLOCK_SIZE plane_bsize, uint8_t *comppred8, int compstride, const uint8_t *interpred8, int interstride, const uint8_t *intrapred8, int intrastride, int bd) { const int bw = block_size_wide[plane_bsize]; const int bh = block_size_high[plane_bsize]; const int size_scale = ii_size_scales[plane_bsize]; int i, j; uint16_t *comppred = CONVERT_TO_SHORTPTR(comppred8); const uint16_t *interpred = CONVERT_TO_SHORTPTR(interpred8); const uint16_t *intrapred = CONVERT_TO_SHORTPTR(intrapred8); if (use_wedge_interintra) { if (is_interintra_wedge_used(bsize)) { const uint8_t *mask = av1_get_contiguous_soft_mask(wedge_index, wedge_sign, bsize); const int subh = 2 * num_4x4_blocks_high_lookup[bsize] == bh; const int subw = 2 * num_4x4_blocks_wide_lookup[bsize] == bw; aom_highbd_blend_a64_mask(comppred8, compstride, intrapred8, intrastride, interpred8, interstride, mask, bw, bh, bw, subh, subw, bd); } return; } switch (mode) { case II_V_PRED: for (i = 0; i < bh; ++i) { for (j = 0; j < bw; ++j) { int scale = ii_weights1d[i * size_scale]; comppred[i * compstride + j] = AOM_BLEND_A256(scale, intrapred[i * intrastride + j], interpred[i * interstride + j]); } } break; case II_H_PRED: for (i = 0; i < bh; ++i) { for (j = 0; j < bw; ++j) { int scale = ii_weights1d[j * size_scale]; comppred[i * compstride + j] = AOM_BLEND_A256(scale, intrapred[i * intrastride + j], interpred[i * interstride + j]); } } break; case II_D63_PRED: case II_D117_PRED: for (i = 0; i < bh; ++i) { for (j = 0; j < bw; ++j) { int scale = (ii_weights1d[i * size_scale] * 3 + ii_weights1d[j * size_scale]) >> 2; comppred[i * compstride + j] = AOM_BLEND_A256(scale, intrapred[i * intrastride + j], interpred[i * interstride + j]); } } break; case II_D207_PRED: case II_D153_PRED: for (i = 0; i < bh; ++i) { for (j = 0; j < bw; ++j) { int scale = (ii_weights1d[j * size_scale] * 3 + ii_weights1d[i * size_scale]) >> 2; comppred[i * compstride + j] = AOM_BLEND_A256(scale, intrapred[i * intrastride + j], interpred[i * interstride + j]); } } break; case II_D135_PRED: for (i = 0; i < bh; ++i) { for (j = 0; j < bw; ++j) { int scale = ii_weights1d[(i < j ? i : j) * size_scale]; comppred[i * compstride + j] = AOM_BLEND_A256(scale, intrapred[i * intrastride + j], interpred[i * interstride + j]); } } break; case II_D45_PRED: for (i = 0; i < bh; ++i) { for (j = 0; j < bw; ++j) { int scale = (ii_weights1d[i * size_scale] + ii_weights1d[j * size_scale]) >> 1; comppred[i * compstride + j] = AOM_BLEND_A256(scale, intrapred[i * intrastride + j], interpred[i * interstride + j]); } } break; case II_TM_PRED: case II_DC_PRED: default: for (i = 0; i < bh; ++i) { for (j = 0; j < bw; ++j) { comppred[i * compstride + j] = AOM_BLEND_AVG( interpred[i * interstride + j], intrapred[i * intrastride + j]); } } break; } } #endif // CONFIG_AOM_HIGHBITDEPTH void av1_build_intra_predictors_for_interintra(MACROBLOCKD *xd, BLOCK_SIZE bsize, int plane, BUFFER_SET *ctx, uint8_t *dst, int dst_stride) { struct macroblockd_plane *const pd = &xd->plane[plane]; BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, &xd->plane[plane]); PREDICTION_MODE mode = interintra_to_intra_mode[xd->mi[0]->mbmi.interintra_mode]; av1_predict_intra_block(xd, pd->width, pd->height, plane_bsize, mode, ctx->plane[plane], ctx->stride[plane], dst, dst_stride, 0, 0, plane); } void av1_combine_interintra(MACROBLOCKD *xd, BLOCK_SIZE bsize, int plane, const uint8_t *inter_pred, int inter_stride, const uint8_t *intra_pred, int intra_stride) { const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, &xd->plane[plane]); #if CONFIG_AOM_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { combine_interintra_highbd( xd->mi[0]->mbmi.interintra_mode, xd->mi[0]->mbmi.use_wedge_interintra, xd->mi[0]->mbmi.interintra_wedge_index, xd->mi[0]->mbmi.interintra_wedge_sign, bsize, plane_bsize, xd->plane[plane].dst.buf, xd->plane[plane].dst.stride, inter_pred, inter_stride, intra_pred, intra_stride, xd->bd); return; } #endif // CONFIG_AOM_HIGHBITDEPTH combine_interintra(xd->mi[0]->mbmi.interintra_mode, xd->mi[0]->mbmi.use_wedge_interintra, xd->mi[0]->mbmi.interintra_wedge_index, xd->mi[0]->mbmi.interintra_wedge_sign, bsize, plane_bsize, xd->plane[plane].dst.buf, xd->plane[plane].dst.stride, inter_pred, inter_stride, intra_pred, intra_stride); } void av1_build_interintra_predictors_sby(MACROBLOCKD *xd, uint8_t *ypred, int ystride, BUFFER_SET *ctx, BLOCK_SIZE bsize) { #if CONFIG_AOM_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { DECLARE_ALIGNED(16, uint16_t, intrapredictor[MAX_SB_SQUARE]); av1_build_intra_predictors_for_interintra( xd, bsize, 0, ctx, CONVERT_TO_BYTEPTR(intrapredictor), MAX_SB_SIZE); av1_combine_interintra(xd, bsize, 0, ypred, ystride, CONVERT_TO_BYTEPTR(intrapredictor), MAX_SB_SIZE); return; } #endif // CONFIG_AOM_HIGHBITDEPTH { DECLARE_ALIGNED(16, uint8_t, intrapredictor[MAX_SB_SQUARE]); av1_build_intra_predictors_for_interintra(xd, bsize, 0, ctx, intrapredictor, MAX_SB_SIZE); av1_combine_interintra(xd, bsize, 0, ypred, ystride, intrapredictor, MAX_SB_SIZE); } } void av1_build_interintra_predictors_sbc(MACROBLOCKD *xd, uint8_t *upred, int ustride, BUFFER_SET *ctx, int plane, BLOCK_SIZE bsize) { #if CONFIG_AOM_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { DECLARE_ALIGNED(16, uint16_t, uintrapredictor[MAX_SB_SQUARE]); av1_build_intra_predictors_for_interintra( xd, bsize, plane, ctx, CONVERT_TO_BYTEPTR(uintrapredictor), MAX_SB_SIZE); av1_combine_interintra(xd, bsize, plane, upred, ustride, CONVERT_TO_BYTEPTR(uintrapredictor), MAX_SB_SIZE); return; } #endif // CONFIG_AOM_HIGHBITDEPTH { DECLARE_ALIGNED(16, uint8_t, uintrapredictor[MAX_SB_SQUARE]); av1_build_intra_predictors_for_interintra(xd, bsize, plane, ctx, uintrapredictor, MAX_SB_SIZE); av1_combine_interintra(xd, bsize, plane, upred, ustride, uintrapredictor, MAX_SB_SIZE); } } void av1_build_interintra_predictors_sbuv(MACROBLOCKD *xd, uint8_t *upred, uint8_t *vpred, int ustride, int vstride, BUFFER_SET *ctx, BLOCK_SIZE bsize) { av1_build_interintra_predictors_sbc(xd, upred, ustride, ctx, 1, bsize); av1_build_interintra_predictors_sbc(xd, vpred, vstride, ctx, 2, bsize); } void av1_build_interintra_predictors(MACROBLOCKD *xd, uint8_t *ypred, uint8_t *upred, uint8_t *vpred, int ystride, int ustride, int vstride, BUFFER_SET *ctx, BLOCK_SIZE bsize) { av1_build_interintra_predictors_sby(xd, ypred, ystride, ctx, bsize); av1_build_interintra_predictors_sbuv(xd, upred, vpred, ustride, vstride, ctx, bsize); } // Builds the inter-predictor for the single ref case // for use in the encoder to search the wedges efficiently. static void build_inter_predictors_single_buf(MACROBLOCKD *xd, int plane, int block, int bw, int bh, int x, int y, int w, int h, int mi_x, int mi_y, int ref, uint8_t *const ext_dst, int ext_dst_stride) { struct macroblockd_plane *const pd = &xd->plane[plane]; const MODE_INFO *mi = xd->mi[0]; const struct scale_factors *const sf = &xd->block_refs[ref]->sf; struct buf_2d *const pre_buf = &pd->pre[ref]; #if CONFIG_AOM_HIGHBITDEPTH uint8_t *const dst = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH ? CONVERT_TO_BYTEPTR(ext_dst) : ext_dst) + ext_dst_stride * y + x; #else uint8_t *const dst = ext_dst + ext_dst_stride * y + x; #endif const MV mv = mi->mbmi.sb_type < BLOCK_8X8 ? average_split_mvs(pd, mi, ref, block) : mi->mbmi.mv[ref].as_mv; // TODO(jkoleszar): This clamping is done in the incorrect place for the // scaling case. It needs to be done on the scaled MV, not the pre-scaling // MV. Note however that it performs the subsampling aware scaling so // that the result is always q4. // mv_precision precision is MV_PRECISION_Q4. const MV mv_q4 = clamp_mv_to_umv_border_sb(xd, &mv, bw, bh, pd->subsampling_x, pd->subsampling_y); uint8_t *pre; MV32 scaled_mv; int xs, ys, subpel_x, subpel_y; const int is_scaled = av1_is_scaled(sf); ConvolveParams conv_params = get_conv_params(0, plane); #if CONFIG_GLOBAL_MOTION WarpedMotionParams *const wm = &xd->global_motion[mi->mbmi.ref_frame[ref]]; const int is_global = is_global_mv_block(mi, block, wm->wmtype); #endif // CONFIG_GLOBAL_MOTION if (is_scaled) { pre = pre_buf->buf + scaled_buffer_offset(x, y, pre_buf->stride, sf); scaled_mv = av1_scale_mv(&mv_q4, mi_x + x, mi_y + y, sf); xs = sf->x_step_q4; ys = sf->y_step_q4; } else { pre = pre_buf->buf + (y * pre_buf->stride + x); scaled_mv.row = mv_q4.row; scaled_mv.col = mv_q4.col; xs = ys = 16; } subpel_x = scaled_mv.col & SUBPEL_MASK; subpel_y = scaled_mv.row & SUBPEL_MASK; pre += (scaled_mv.row >> SUBPEL_BITS) * pre_buf->stride + (scaled_mv.col >> SUBPEL_BITS); av1_make_inter_predictor(pre, pre_buf->stride, dst, ext_dst_stride, subpel_x, subpel_y, sf, w, h, &conv_params, mi->mbmi.interp_filter, #if CONFIG_GLOBAL_MOTION is_global, (mi_x >> pd->subsampling_x) + x, (mi_y >> pd->subsampling_y) + y, plane, ref, #if CONFIG_MOTION_VAR 0, 0, #endif #endif // CONFIG_GLOBAL_MOTION xs, ys, xd); } void av1_build_inter_predictors_for_planes_single_buf( MACROBLOCKD *xd, BLOCK_SIZE bsize, int plane_from, int plane_to, int mi_row, int mi_col, int ref, uint8_t *ext_dst[3], int ext_dst_stride[3]) { int plane; const int mi_x = mi_col * MI_SIZE; const int mi_y = mi_row * MI_SIZE; for (plane = plane_from; plane <= plane_to; ++plane) { const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, &xd->plane[plane]); const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize]; const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize]; const int bw = block_size_wide[plane_bsize]; const int bh = block_size_high[plane_bsize]; if (xd->mi[0]->mbmi.sb_type < BLOCK_8X8 && !CONFIG_CB4X4) { int x, y; assert(bsize == BLOCK_8X8); for (y = 0; y < num_4x4_h; ++y) for (x = 0; x < num_4x4_w; ++x) build_inter_predictors_single_buf( xd, plane, y * 2 + x, bw, bh, 4 * x, 4 * y, 4, 4, mi_x, mi_y, ref, ext_dst[plane], ext_dst_stride[plane]); } else { build_inter_predictors_single_buf(xd, plane, 0, bw, bh, 0, 0, bw, bh, mi_x, mi_y, ref, ext_dst[plane], ext_dst_stride[plane]); } } } static void build_wedge_inter_predictor_from_buf( MACROBLOCKD *xd, int plane, int x, int y, int w, int h, #if CONFIG_SUPERTX int wedge_offset_x, int wedge_offset_y, #endif // CONFIG_SUPERTX uint8_t *ext_dst0, int ext_dst_stride0, uint8_t *ext_dst1, int ext_dst_stride1) { MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; const int is_compound = has_second_ref(mbmi); MACROBLOCKD_PLANE *const pd = &xd->plane[plane]; struct buf_2d *const dst_buf = &pd->dst; uint8_t *const dst = dst_buf->buf + dst_buf->stride * y + x; INTERINTER_COMPOUND_DATA *comp_data = &mbmi->interinter_compound_data; if (is_compound && is_masked_compound_type(mbmi->interinter_compound_data.type)) { #if CONFIG_COMPOUND_SEGMENT #if CONFIG_AOM_HIGHBITDEPTH if (!plane && comp_data->type == COMPOUND_SEG) { if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) build_compound_seg_mask_highbd( comp_data->seg_mask, comp_data->mask_type, CONVERT_TO_BYTEPTR(ext_dst0), ext_dst_stride0, CONVERT_TO_BYTEPTR(ext_dst1), ext_dst_stride1, mbmi->sb_type, h, w, xd->bd); else build_compound_seg_mask(comp_data->seg_mask, comp_data->mask_type, ext_dst0, ext_dst_stride0, ext_dst1, ext_dst_stride1, mbmi->sb_type, h, w); } #else if (!plane && comp_data->type == COMPOUND_SEG) build_compound_seg_mask(comp_data->seg_mask, comp_data->mask_type, ext_dst0, ext_dst_stride0, ext_dst1, ext_dst_stride1, mbmi->sb_type, h, w); #endif // CONFIG_AOM_HIGHBITDEPTH #endif // CONFIG_COMPOUND_SEGMENT #if CONFIG_SUPERTX #if CONFIG_AOM_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) build_masked_compound_wedge_extend_highbd( dst, dst_buf->stride, CONVERT_TO_BYTEPTR(ext_dst0), ext_dst_stride0, CONVERT_TO_BYTEPTR(ext_dst1), ext_dst_stride1, comp_data, mbmi->sb_type, wedge_offset_x, wedge_offset_y, h, w, xd->bd); else #endif // CONFIG_AOM_HIGHBITDEPTH build_masked_compound_wedge_extend( dst, dst_buf->stride, ext_dst0, ext_dst_stride0, ext_dst1, ext_dst_stride1, comp_data, mbmi->sb_type, wedge_offset_x, wedge_offset_y, h, w); #else #if CONFIG_AOM_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) build_masked_compound_highbd( dst, dst_buf->stride, CONVERT_TO_BYTEPTR(ext_dst0), ext_dst_stride0, CONVERT_TO_BYTEPTR(ext_dst1), ext_dst_stride1, comp_data, mbmi->sb_type, h, w, xd->bd); else #endif // CONFIG_AOM_HIGHBITDEPTH build_masked_compound(dst, dst_buf->stride, ext_dst0, ext_dst_stride0, ext_dst1, ext_dst_stride1, comp_data, mbmi->sb_type, h, w); #endif // CONFIG_SUPERTX } else { #if CONFIG_AOM_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) aom_highbd_convolve_copy(CONVERT_TO_BYTEPTR(ext_dst0), ext_dst_stride0, dst, dst_buf->stride, NULL, 0, NULL, 0, w, h, xd->bd); else #endif // CONFIG_AOM_HIGHBITDEPTH aom_convolve_copy(ext_dst0, ext_dst_stride0, dst, dst_buf->stride, NULL, 0, NULL, 0, w, h); } } void av1_build_wedge_inter_predictor_from_buf( MACROBLOCKD *xd, BLOCK_SIZE bsize, int plane_from, int plane_to, #if CONFIG_SUPERTX int wedge_offset_x, int wedge_offset_y, #endif // CONFIG_SUPERTX uint8_t *ext_dst0[3], int ext_dst_stride0[3], uint8_t *ext_dst1[3], int ext_dst_stride1[3]) { int plane; for (plane = plane_from; plane <= plane_to; ++plane) { const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, &xd->plane[plane]); const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize]; const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize]; if (xd->mi[0]->mbmi.sb_type < BLOCK_8X8 && !CONFIG_CB4X4) { int x, y; assert(bsize == BLOCK_8X8); for (y = 0; y < num_4x4_h; ++y) for (x = 0; x < num_4x4_w; ++x) build_wedge_inter_predictor_from_buf( xd, plane, 4 * x, 4 * y, 4, 4, #if CONFIG_SUPERTX wedge_offset_x, wedge_offset_y, #endif // CONFIG_SUPERTX ext_dst0[plane], ext_dst_stride0[plane], ext_dst1[plane], ext_dst_stride1[plane]); } else { const int bw = block_size_wide[plane_bsize]; const int bh = block_size_high[plane_bsize]; build_wedge_inter_predictor_from_buf( xd, plane, 0, 0, bw, bh, #if CONFIG_SUPERTX wedge_offset_x, wedge_offset_y, #endif // CONFIG_SUPERTX ext_dst0[plane], ext_dst_stride0[plane], ext_dst1[plane], ext_dst_stride1[plane]); } } } #endif // CONFIG_EXT_INTER