From f2f3156f9f5dbb77fcee8585ed08da72aac08871 Mon Sep 17 00:00:00 2001 From: David Michael Barr Date: Tue, 31 Oct 2017 22:58:15 +0900 Subject: [PATCH] [CFL] Reorder alpha search for early termination This prepares the way for a heuristic to truncate the search. For a given joint sign value, each plane's alpha is coded independently and so the alpha choice is also independent. Rather than iterating over all possible pairs, first choose for each plane separately and then merge the choices. Merge with RD collection and continuously update the bound. On subset1, then encoder output is identical for most sequences: PSNR | PSNR Cb | PSNR Cr | PSNR HVS | SSIM | MS SSIM | CIEDE 2000 0.0002 | -0.0058 | -0.0016 | 0.0020 | 0.0025 | 0.0040 | -0.0022 Change-Id: Ie7d1e1622d46eaa8cf42f50c9a6bec6e59bdbc39 --- av1/encoder/rdopt.c | 147 +++++++++++++++++++++++--------------------- 1 file changed, 78 insertions(+), 69 deletions(-) diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c index afa230144..6ba22b0e3 100644 --- a/av1/encoder/rdopt.c +++ b/av1/encoder/rdopt.c @@ -5414,18 +5414,8 @@ static int rd_pick_intra_angle_sbuv(const AV1_COMP *const cpi, MACROBLOCK *x, #endif // CONFIG_EXT_INTRA #if CONFIG_CFL -static void txfm_rd_in_plane_once(MACROBLOCK *const x, - const AV1_COMP *const cpi, BLOCK_SIZE bsize, - TX_SIZE tx_size, int plane, int64_t best_rd, - int64_t *dist, int *rate) { - RD_STATS rd_stats; - av1_init_rd_stats(&rd_stats); - txfm_rd_in_plane(x, cpi, &rd_stats, best_rd, plane, bsize, tx_size, - cpi->sf.use_fast_coef_costing); - *dist = rd_stats.dist; - *rate = rd_stats.rate; -} - +#define PLANE_SIGN_TO_JOINT_SIGN(plane, a, b) \ + (plane == CFL_PRED_U ? a * CFL_SIGNS + b - 1 : b * CFL_SIGNS + a - 1) static int cfl_rd_pick_alpha(MACROBLOCK *const x, const AV1_COMP *const cpi, TX_SIZE tx_size, int64_t best_rd) { MACROBLOCKD *const xd = &x->e_mbd; @@ -5441,77 +5431,96 @@ static int cfl_rd_pick_alpha(MACROBLOCK *const x, const AV1_COMP *const cpi, assert(block_size_high[plane_bsize] == tx_size_high[tx_size]); #endif - int rates[CFL_PRED_PLANES][CFL_MAGS_SIZE]; - int64_t dists[CFL_PRED_PLANES][CFL_MAGS_SIZE]; - mbmi->cfl_alpha_idx = 0; - mbmi->cfl_alpha_signs = CFL_SIGN_ZERO * CFL_SIGNS + CFL_SIGN_POS - 1; - txfm_rd_in_plane_once(x, cpi, bsize, tx_size, AOM_PLANE_U, best_rd, - &dists[CFL_PRED_U][0], &rates[CFL_PRED_U][0]); - mbmi->cfl_alpha_signs = CFL_SIGN_POS * CFL_SIGNS + CFL_SIGN_ZERO - 1; - txfm_rd_in_plane_once(x, cpi, bsize, tx_size, AOM_PLANE_V, best_rd, - &dists[CFL_PRED_V][0], &rates[CFL_PRED_V][0]); + const int64_t mode_rd = + RDCOST(x->rdmult, x->intra_uv_mode_cost[mbmi->mode][UV_CFL_PRED], 0); + int64_t best_rd_uv[CFL_JOINT_SIGNS][CFL_PRED_PLANES]; + int best_c[CFL_JOINT_SIGNS][CFL_PRED_PLANES]; +#if CONFIG_DEBUG + int best_rate_uv[CFL_JOINT_SIGNS][CFL_PRED_PLANES]; +#endif // CONFIG_DEBUG - for (int c = 0; c < CFL_ALPHABET_SIZE; c++) { - mbmi->cfl_alpha_idx = (c << CFL_ALPHABET_SIZE_LOG2) + c; - for (int sign = CFL_SIGN_NEG; sign < CFL_SIGNS; sign++) { - const int m = c * 2 + 1 + (sign == CFL_SIGN_NEG); - mbmi->cfl_alpha_signs = sign * CFL_SIGNS + sign - 1; - txfm_rd_in_plane_once(x, cpi, bsize, tx_size, AOM_PLANE_U, best_rd, - &dists[CFL_PRED_U][m], &rates[CFL_PRED_U][m]); - txfm_rd_in_plane_once(x, cpi, bsize, tx_size, AOM_PLANE_V, best_rd, - &dists[CFL_PRED_V][m], &rates[CFL_PRED_V][m]); + for (int plane = 0; plane < CFL_PRED_PLANES; plane++) { + RD_STATS rd_stats; + av1_init_rd_stats(&rd_stats); + for (int joint_sign = 0; joint_sign < CFL_JOINT_SIGNS; joint_sign++) { + best_rd_uv[joint_sign][plane] = INT64_MAX; + best_c[joint_sign][plane] = 0; + } + // Collect RD stats for an alpha value of zero in this plane. + // Skip i == CFL_SIGN_ZERO as (0, 0) is invalid. + for (int i = CFL_SIGN_NEG; i < CFL_SIGNS; i++) { + const int joint_sign = PLANE_SIGN_TO_JOINT_SIGN(plane, CFL_SIGN_ZERO, i); + if (i == CFL_SIGN_NEG) { + mbmi->cfl_alpha_idx = 0; + mbmi->cfl_alpha_signs = joint_sign; + txfm_rd_in_plane(x, cpi, &rd_stats, best_rd, plane + 1, bsize, tx_size, + cpi->sf.use_fast_coef_costing); + if (rd_stats.rate == INT_MAX) break; + } + const int alpha_rate = x->cfl_cost[joint_sign][plane][0]; + best_rd_uv[joint_sign][plane] = + RDCOST(x->rdmult, rd_stats.rate + alpha_rate, rd_stats.dist); +#if CONFIG_DEBUG + best_rate_uv[joint_sign][plane] = rd_stats.rate; +#endif // CONFIG_DEBUG } } - int64_t dist; - int64_t cost; - int64_t best_cost = INT64_MAX; - int best_rate_overhead = INT_MAX; -#if CONFIG_DEBUG - int best_rate = INT_MAX; -#endif // CONFIG_DEBUG + int best_joint_sign = -1; - int ind = 0; - int signs = 0; - - for (int joint_sign = 0; joint_sign < CFL_JOINT_SIGNS; joint_sign++) { - const int sign_u = CFL_SIGN_U(joint_sign); - const int sign_v = CFL_SIGN_V(joint_sign); - const int size_u = (sign_u == CFL_SIGN_ZERO) ? 1 : CFL_ALPHABET_SIZE; - const int size_v = (sign_v == CFL_SIGN_ZERO) ? 1 : CFL_ALPHABET_SIZE; - for (int u = 0; u < size_u; u++) { - const int idx_u = ((sign_u == CFL_SIGN_ZERO) ? 0 : u * 2 + 1) + - (sign_u == CFL_SIGN_NEG); - if (rates[CFL_PRED_U][idx_u] == INT_MAX) continue; - for (int v = 0; v < size_v; v++) { - const int idx_v = ((sign_v == CFL_SIGN_ZERO) ? 0 : v * 2 + 1) + - (sign_v == CFL_SIGN_NEG); - if (rates[CFL_PRED_V][idx_v] == INT_MAX) continue; - dist = dists[CFL_PRED_U][idx_u] + dists[CFL_PRED_V][idx_v]; - int rate_overhead = x->cfl_cost[joint_sign][CFL_PRED_U][u] + - x->cfl_cost[joint_sign][CFL_PRED_V][v]; - int rate = x->intra_uv_mode_cost[mbmi->mode][UV_CFL_PRED] + - rate_overhead + rates[CFL_PRED_U][idx_u] + - rates[CFL_PRED_V][idx_v]; - cost = RDCOST(x->rdmult, rate, dist); - if (cost < best_cost) { - best_cost = cost; - best_rate_overhead = rate_overhead; - ind = (u << CFL_ALPHABET_SIZE_LOG2) + v; - signs = joint_sign; + for (int plane = 0; plane < CFL_PRED_PLANES; plane++) { + for (int pn_sign = CFL_SIGN_NEG; pn_sign < CFL_SIGNS; pn_sign++) { + for (int c = 0; c < CFL_ALPHABET_SIZE; c++) { + RD_STATS rd_stats; + av1_init_rd_stats(&rd_stats); + for (int i = 0; i < CFL_SIGNS; i++) { + const int joint_sign = PLANE_SIGN_TO_JOINT_SIGN(plane, pn_sign, i); + if (i == 0) { + mbmi->cfl_alpha_idx = (c << CFL_ALPHABET_SIZE_LOG2) + c; + mbmi->cfl_alpha_signs = joint_sign; + txfm_rd_in_plane(x, cpi, &rd_stats, best_rd, plane + 1, bsize, + tx_size, cpi->sf.use_fast_coef_costing); + if (rd_stats.rate == INT_MAX) break; + } + const int alpha_rate = x->cfl_cost[joint_sign][plane][c]; + int64_t this_rd = + RDCOST(x->rdmult, rd_stats.rate + alpha_rate, rd_stats.dist); + if (this_rd >= best_rd_uv[joint_sign][plane]) continue; + best_rd_uv[joint_sign][plane] = this_rd; + best_c[joint_sign][plane] = c; #if CONFIG_DEBUG - best_rate = rate; + best_rate_uv[joint_sign][plane] = rd_stats.rate; #endif // CONFIG_DEBUG + if (best_rd_uv[joint_sign][!plane] == INT64_MAX) continue; + this_rd += mode_rd + best_rd_uv[joint_sign][!plane]; + if (this_rd >= best_rd) continue; + best_rd = this_rd; + best_joint_sign = joint_sign; } } } } - mbmi->cfl_alpha_idx = ind; - mbmi->cfl_alpha_signs = signs; + int best_rate_overhead = INT_MAX; + int ind = 0; + if (best_joint_sign >= 0) { + const int u = best_c[best_joint_sign][CFL_PRED_U]; + const int v = best_c[best_joint_sign][CFL_PRED_V]; + ind = (u << CFL_ALPHABET_SIZE_LOG2) + v; + best_rate_overhead = x->cfl_cost[best_joint_sign][CFL_PRED_U][u] + + x->cfl_cost[best_joint_sign][CFL_PRED_V][v]; #if CONFIG_DEBUG - xd->cfl.rate = best_rate; + xd->cfl.rate = x->intra_uv_mode_cost[mbmi->mode][UV_CFL_PRED] + + best_rate_overhead + + best_rate_uv[best_joint_sign][CFL_PRED_U] + + best_rate_uv[best_joint_sign][CFL_PRED_V]; #endif // CONFIG_DEBUG + } else { + best_joint_sign = 0; + } + + mbmi->cfl_alpha_idx = ind; + mbmi->cfl_alpha_signs = best_joint_sign; return best_rate_overhead; } #endif // CONFIG_CFL -- GitLab