hash_based_trellis speed feature

Add speed feature that uses hash tables to
reuse previously found optimized coefficients
in av1_optimize_txb. This skips some expensive
optimize_txb calls.

Currently shows no significant quality
degredation or speed improvement, and set to off
by default. Requires hash_me, lv_map and
lv_map_multi. Adding to speed features required
changing AV1_COMMON *cm to AV1_COMP *cpi in a
chain of functions.

Variations that have been tried:
-varying the maximum eob on which the feature
activates: 16, 32, 64. 16 currently used. 64
has best hit rate but longer execution time.
-varying the data hashed and the length of hashes
(first hash is 16 bit and based on context data,
while second hash is 16 bit and based only on
pre-optimized qcoeff values.)
-softening the data used for the hashes: ideally
this would raise the number of hits, without
compromising quality too much.

Change-Id: I94f22be82f3a46637c0489d512f2e334a307575f
parent 37d88736
......@@ -4453,8 +4453,7 @@ static void encode_superblock(const AV1_COMP *const cpi, TileDataEnc *tile_data,
#endif // CONFIG_CFL
mbmi->skip = 1;
for (int plane = 0; plane < num_planes; ++plane) {
av1_encode_intra_block_plane((AV1_COMMON *)cm, x, bsize, plane, 1, mi_row,
mi_col);
av1_encode_intra_block_plane(cpi, x, bsize, plane, 1, mi_row, mi_col);
}
#if CONFIG_CFL
xd->cfl.store_y = 0;
......@@ -4519,7 +4518,7 @@ static void encode_superblock(const AV1_COMP *const cpi, TileDataEnc *tile_data,
}
#endif
av1_encode_sb((AV1_COMMON *)cm, x, bsize, mi_row, mi_col, dry_run);
av1_encode_sb(cpi, x, bsize, mi_row, mi_col, dry_run);
if (mbmi->skip) mbmi->min_tx_size = mbmi->tx_size;
av1_tokenize_sb_vartx(cpi, td, t, dry_run, mi_row, mi_col, bsize, rate,
tile_data->allow_update_cdf);
......
......@@ -419,8 +419,8 @@ static int optimize_b_greedy(const AV1_COMMON *cm, MACROBLOCK *mb, int plane,
}
#endif // !CONFIG_LV_MAP
int av1_optimize_b(const AV1_COMMON *cm, MACROBLOCK *mb, int plane, int blk_row,
int blk_col, int block, BLOCK_SIZE plane_bsize,
int av1_optimize_b(const AV1_COMP *const cpi, MACROBLOCK *mb, int plane,
int blk_row, int blk_col, int block, BLOCK_SIZE plane_bsize,
TX_SIZE tx_size, const ENTROPY_CONTEXT *a,
const ENTROPY_CONTEXT *l, int fast_mode) {
MACROBLOCKD *const xd = &mb->e_mbd;
......@@ -434,12 +434,13 @@ int av1_optimize_b(const AV1_COMMON *cm, MACROBLOCK *mb, int plane, int blk_row,
(void)blk_row;
(void)blk_col;
int ctx = get_entropy_context(tx_size, a, l);
const AV1_COMMON *const cm = &cpi->common;
return optimize_b_greedy(cm, mb, plane, blk_row, blk_col, block, tx_size, ctx,
fast_mode);
#else // !CONFIG_LV_MAP
TXB_CTX txb_ctx;
get_txb_ctx(plane_bsize, tx_size, plane, a, l, &txb_ctx);
return av1_optimize_txb(cm, mb, plane, blk_row, blk_col, block, tx_size,
return av1_optimize_txb(cpi, mb, plane, blk_row, blk_col, block, tx_size,
&txb_ctx, fast_mode);
#endif // !CONFIG_LV_MAP
}
......@@ -556,7 +557,7 @@ static void encode_block(int plane, int block, int blk_row, int blk_col,
(void)mi_col;
(void)dry_run;
struct encode_b_args *const args = arg;
AV1_COMMON *cm = args->cm;
const AV1_COMMON *const cm = &args->cpi->common;
MACROBLOCK *const x = args->x;
MACROBLOCKD *const xd = &x->e_mbd;
struct macroblock_plane *const p = &x->plane[plane];
......@@ -587,8 +588,8 @@ static void encode_block(int plane, int block, int blk_row, int blk_col,
p->eobs[block] = 0;
}
av1_optimize_b(cm, x, plane, blk_row, blk_col, block, plane_bsize, tx_size, a,
l, CONFIG_LV_MAP);
av1_optimize_b(args->cpi, x, plane, blk_row, blk_col, block, plane_bsize,
tx_size, a, l, CONFIG_LV_MAP);
av1_set_txb_context(x, plane, block, tx_size, a, l);
......@@ -738,13 +739,13 @@ void av1_encode_sby_pass1(AV1_COMMON *cm, MACROBLOCK *x, BLOCK_SIZE bsize) {
encode_block_pass1, &args);
}
void av1_encode_sb(AV1_COMMON *cm, MACROBLOCK *x, BLOCK_SIZE bsize, int mi_row,
int mi_col, RUN_TYPE dry_run) {
void av1_encode_sb(const AV1_COMP *const cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
int mi_row, int mi_col, RUN_TYPE dry_run) {
(void)dry_run;
MACROBLOCKD *const xd = &x->e_mbd;
struct optimize_ctx ctx;
MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
struct encode_b_args arg = { cm, x, &ctx, &mbmi->skip, NULL, NULL, 1 };
struct encode_b_args arg = { cpi, x, &ctx, &mbmi->skip, NULL, NULL, 1 };
int plane;
mbmi->skip = 1;
......@@ -844,7 +845,7 @@ void av1_encode_block_intra(int plane, int block, int blk_row, int blk_col,
BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
void *arg) {
struct encode_b_args *const args = arg;
AV1_COMMON *cm = args->cm;
const AV1_COMMON *const cm = &args->cpi->common;
MACROBLOCK *const x = args->x;
MACROBLOCKD *const xd = &x->e_mbd;
struct macroblock_plane *const p = &x->plane[plane];
......@@ -885,8 +886,8 @@ void av1_encode_block_intra(int plane, int block, int blk_row, int blk_col,
if (args->enable_optimize_b) {
av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
AV1_XFORM_QUANT_FP);
av1_optimize_b(cm, x, plane, blk_row, blk_col, block, plane_bsize, tx_size,
a, l, CONFIG_LV_MAP);
av1_optimize_b(args->cpi, x, plane, blk_row, blk_col, block, plane_bsize,
tx_size, a, l, CONFIG_LV_MAP);
#if CONFIG_TXK_SEL
if (plane == 0 && p->eobs[block] == 0) {
......@@ -913,7 +914,7 @@ void av1_encode_block_intra(int plane, int block, int blk_row, int blk_col,
#endif // CONFIG_CFL
}
void av1_encode_intra_block_plane(AV1_COMMON *cm, MACROBLOCK *x,
void av1_encode_intra_block_plane(const AV1_COMP *const cpi, MACROBLOCK *x,
BLOCK_SIZE bsize, int plane,
int enable_optimize_b, int mi_row,
int mi_col) {
......@@ -922,7 +923,7 @@ void av1_encode_intra_block_plane(AV1_COMMON *cm, MACROBLOCK *x,
ENTROPY_CONTEXT tl[2 * MAX_MIB_SIZE] = { 0 };
struct encode_b_args arg = {
cm, x, NULL, &xd->mi[0]->mbmi.skip, ta, tl, enable_optimize_b
cpi, x, NULL, &xd->mi[0]->mbmi.skip, ta, tl, enable_optimize_b
};
if (!is_chroma_reference(mi_row, mi_col, bsize,
......
......@@ -26,7 +26,7 @@ struct optimize_ctx {
};
struct encode_b_args {
AV1_COMMON *cm;
const struct AV1_COMP *cpi;
MACROBLOCK *x;
struct optimize_ctx *ctx;
int8_t *skip;
......@@ -43,15 +43,15 @@ typedef enum AV1_XFORM_QUANT {
AV1_XFORM_QUANT_TYPES,
} AV1_XFORM_QUANT;
void av1_encode_sb(AV1_COMMON *cm, MACROBLOCK *x, BLOCK_SIZE bsize, int mi_row,
int mi_col, RUN_TYPE dry_run);
void av1_encode_sb(const struct AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
int mi_row, int mi_col, RUN_TYPE dry_run);
void av1_encode_sby_pass1(AV1_COMMON *cm, MACROBLOCK *x, BLOCK_SIZE bsize);
void av1_xform_quant(const AV1_COMMON *cm, MACROBLOCK *x, int plane, int block,
int blk_row, int blk_col, BLOCK_SIZE plane_bsize,
TX_SIZE tx_size, AV1_XFORM_QUANT xform_quant_idx);
int av1_optimize_b(const AV1_COMMON *cm, MACROBLOCK *mb, int plane, int blk_row,
int blk_col, int block, BLOCK_SIZE plane_bsize,
int av1_optimize_b(const struct AV1_COMP *cpi, MACROBLOCK *mb, int plane,
int blk_row, int blk_col, int block, BLOCK_SIZE plane_bsize,
TX_SIZE tx_size, const ENTROPY_CONTEXT *a,
const ENTROPY_CONTEXT *l, int fast_mode);
......@@ -66,7 +66,7 @@ void av1_set_txb_context(MACROBLOCK *x, int plane, int block, TX_SIZE tx_size,
void av1_encode_block_intra(int plane, int block, int blk_row, int blk_col,
BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg);
void av1_encode_intra_block_plane(AV1_COMMON *cm, MACROBLOCK *x,
void av1_encode_intra_block_plane(const struct AV1_COMP *cpi, MACROBLOCK *x,
BLOCK_SIZE bsize, int plane,
int enable_optimize_b, int mi_row,
int mi_col);
......
This diff is collapsed.
......@@ -107,9 +107,10 @@ int64_t av1_search_txk_type(const AV1_COMP *cpi, MACROBLOCK *x, int plane,
const ENTROPY_CONTEXT *a, const ENTROPY_CONTEXT *l,
int use_fast_coef_costing, RD_STATS *rd_stats);
#endif
int av1_optimize_txb(const AV1_COMMON *cm, MACROBLOCK *x, int plane,
int blk_row, int blk_col, int block, TX_SIZE tx_size,
TXB_CTX *txb_ctx, int fast_mode);
int av1_optimize_txb(const AV1_COMP *cpi, MACROBLOCK *x, int plane, int blk_row,
int blk_col, int block, TX_SIZE tx_size, TXB_CTX *txb_ctx,
int fast_mode);
#ifdef __cplusplus
}
#endif
......
......@@ -641,7 +641,7 @@ void av1_first_pass(AV1_COMP *cpi, const struct lookahead_entry *source) {
xd->mi[0]->mbmi.mode = DC_PRED;
xd->mi[0]->mbmi.tx_size =
use_dc_pred ? (bsize >= BLOCK_16X16 ? TX_16X16 : TX_8X8) : TX_4X4;
av1_encode_intra_block_plane(cm, x, bsize, 0, 0, mb_row * 2, mb_col * 2);
av1_encode_intra_block_plane(cpi, x, bsize, 0, 0, mb_row * 2, mb_col * 2);
this_error = aom_get_mb_ss(x->plane[0].src_diff);
// Keep a record of blocks that have almost no intra error residual
......
......@@ -1900,7 +1900,7 @@ static void block_rd_txfm(int plane, int block, int blk_row, int blk_col,
av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
AV1_XFORM_QUANT_FP);
// TX-domain results need to shift down to Q2/D10 to match pixel
/// TX-domain results need to shift down to Q2/D10 to match pixel
// domain distortion values which are in Q2^2
#if CONFIG_DAALA_TX
const int shift = (TX_COEFF_DEPTH - 10) * 2;
......@@ -1929,7 +1929,7 @@ static void block_rd_txfm(int plane, int block, int blk_row, int blk_col,
disable_early_skip ||
#endif
RDCOST(x->rdmult, 0, tmp_dist) + args->this_rd < args->best_rd) {
av1_optimize_b(cm, x, plane, blk_row, blk_col, block, plane_bsize,
av1_optimize_b(cpi, x, plane, blk_row, blk_col, block, plane_bsize,
tx_size, a, l, CONFIG_LV_MAP);
} else {
args->exit_early = 1;
......@@ -3581,7 +3581,7 @@ void av1_tx_block_rd_b(const AV1_COMP *cpi, MACROBLOCK *x, TX_SIZE tx_size,
disable_early_skip ||
#endif
RDCOST(x->rdmult, 0, tmp_dist) < rd_stats->ref_rdcost) {
av1_optimize_b(cm, x, plane, blk_row, blk_col, block, plane_bsize,
av1_optimize_b(cpi, x, plane, blk_row, blk_col, block, plane_bsize,
tx_size, a, l, fast);
} else {
rd_stats->rate += rd_stats->zero_rate;
......@@ -8704,8 +8704,8 @@ void av1_rd_pick_intra_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x, int mi_row,
// during luma RDO, so we can store reconstructed luma values
memcpy(x->blk_skip[0], ctx->blk_skip[0],
sizeof(uint8_t) * ctx->num_4x4_blk);
av1_encode_intra_block_plane((AV1_COMMON *)cm, x, bsize, AOM_PLANE_Y, 1,
mi_row, mi_col);
av1_encode_intra_block_plane(cpi, x, bsize, AOM_PLANE_Y, 1, mi_row,
mi_col);
xd->cfl.store_y = 0;
}
#endif // CONFIG_CFL
......
......@@ -149,6 +149,10 @@ static void set_good_speed_features_framesize_independent(AV1_COMP *cpi,
#if CONFIG_DUAL_FILTER
sf->use_fast_interpolation_filter_search = 1;
#endif // CONFIG_DUAL_FILTER
#if 0 // CONFIG_HASH_ME && CONFIG_LV_MAP && CONFIG_LV_MAP_MULTI
// TODO(mfo): Activate feature once it gives positive results.
sf->use_hash_based_trellis = 1;
#endif // CONFIG_HASH_ME && CONFIG_LV_MAP && CONFIG_LV_MAP_MULTI
}
if (speed >= 2) {
......@@ -515,6 +519,7 @@ void av1_set_speed_features_framesize_independent(AV1_COMP *cpi) {
sf->use_transform_domain_distortion = 0;
sf->gm_search_type = GM_FULL_SEARCH;
sf->use_fast_interpolation_filter_search = 0;
sf->use_hash_based_trellis = 0;
set_dev_sf(cpi, sf, oxcf->dev_sf);
......
......@@ -520,6 +520,10 @@ typedef struct SPEED_FEATURES {
// usually includes EIGHTTAP_REGULAR.
int use_fast_interpolation_filter_search;
// Use a hash table to store previously computed optimized qcoeffs from
// expensive calls to optimize_txb.
int use_hash_based_trellis;
// flag to drop some ref frames in compound motion search
int drop_ref;
} SPEED_FEATURES;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment