hash_based_trellis speed feature

Add speed feature that uses hash tables to
reuse previously found optimized coefficients
in av1_optimize_txb. This skips some expensive
optimize_txb calls.

Currently shows no significant quality
degredation or speed improvement, and set to off
by default. Requires hash_me, lv_map and
lv_map_multi. Adding to speed features required
changing AV1_COMMON *cm to AV1_COMP *cpi in a
chain of functions.

Variations that have been tried:
-varying the maximum eob on which the feature
activates: 16, 32, 64. 16 currently used. 64
has best hit rate but longer execution time.
-varying the data hashed and the length of hashes
(first hash is 16 bit and based on context data,
while second hash is 16 bit and based only on
pre-optimized qcoeff values.)
-softening the data used for the hashes: ideally
this would raise the number of hits, without
compromising quality too much.

Change-Id: I94f22be82f3a46637c0489d512f2e334a307575f
parent 37d88736
......@@ -4453,8 +4453,7 @@ static void encode_superblock(const AV1_COMP *const cpi, TileDataEnc *tile_data,
#endif // CONFIG_CFL
mbmi->skip = 1;
for (int plane = 0; plane < num_planes; ++plane) {
av1_encode_intra_block_plane((AV1_COMMON *)cm, x, bsize, plane, 1, mi_row,
mi_col);
av1_encode_intra_block_plane(cpi, x, bsize, plane, 1, mi_row, mi_col);
}
#if CONFIG_CFL
xd->cfl.store_y = 0;
......@@ -4519,7 +4518,7 @@ static void encode_superblock(const AV1_COMP *const cpi, TileDataEnc *tile_data,
}
#endif
av1_encode_sb((AV1_COMMON *)cm, x, bsize, mi_row, mi_col, dry_run);
av1_encode_sb(cpi, x, bsize, mi_row, mi_col, dry_run);
if (mbmi->skip) mbmi->min_tx_size = mbmi->tx_size;
av1_tokenize_sb_vartx(cpi, td, t, dry_run, mi_row, mi_col, bsize, rate,
tile_data->allow_update_cdf);
......
......@@ -419,8 +419,8 @@ static int optimize_b_greedy(const AV1_COMMON *cm, MACROBLOCK *mb, int plane,
}
#endif // !CONFIG_LV_MAP
int av1_optimize_b(const AV1_COMMON *cm, MACROBLOCK *mb, int plane, int blk_row,
int blk_col, int block, BLOCK_SIZE plane_bsize,
int av1_optimize_b(const AV1_COMP *const cpi, MACROBLOCK *mb, int plane,
int blk_row, int blk_col, int block, BLOCK_SIZE plane_bsize,
TX_SIZE tx_size, const ENTROPY_CONTEXT *a,
const ENTROPY_CONTEXT *l, int fast_mode) {
MACROBLOCKD *const xd = &mb->e_mbd;
......@@ -434,12 +434,13 @@ int av1_optimize_b(const AV1_COMMON *cm, MACROBLOCK *mb, int plane, int blk_row,
(void)blk_row;
(void)blk_col;
int ctx = get_entropy_context(tx_size, a, l);
const AV1_COMMON *const cm = &cpi->common;
return optimize_b_greedy(cm, mb, plane, blk_row, blk_col, block, tx_size, ctx,
fast_mode);
#else // !CONFIG_LV_MAP
TXB_CTX txb_ctx;
get_txb_ctx(plane_bsize, tx_size, plane, a, l, &txb_ctx);
return av1_optimize_txb(cm, mb, plane, blk_row, blk_col, block, tx_size,
return av1_optimize_txb(cpi, mb, plane, blk_row, blk_col, block, tx_size,
&txb_ctx, fast_mode);
#endif // !CONFIG_LV_MAP
}
......@@ -556,7 +557,7 @@ static void encode_block(int plane, int block, int blk_row, int blk_col,
(void)mi_col;
(void)dry_run;
struct encode_b_args *const args = arg;
AV1_COMMON *cm = args->cm;
const AV1_COMMON *const cm = &args->cpi->common;
MACROBLOCK *const x = args->x;
MACROBLOCKD *const xd = &x->e_mbd;
struct macroblock_plane *const p = &x->plane[plane];
......@@ -587,8 +588,8 @@ static void encode_block(int plane, int block, int blk_row, int blk_col,
p->eobs[block] = 0;
}
av1_optimize_b(cm, x, plane, blk_row, blk_col, block, plane_bsize, tx_size, a,
l, CONFIG_LV_MAP);
av1_optimize_b(args->cpi, x, plane, blk_row, blk_col, block, plane_bsize,
tx_size, a, l, CONFIG_LV_MAP);
av1_set_txb_context(x, plane, block, tx_size, a, l);
......@@ -738,13 +739,13 @@ void av1_encode_sby_pass1(AV1_COMMON *cm, MACROBLOCK *x, BLOCK_SIZE bsize) {
encode_block_pass1, &args);
}
void av1_encode_sb(AV1_COMMON *cm, MACROBLOCK *x, BLOCK_SIZE bsize, int mi_row,
int mi_col, RUN_TYPE dry_run) {
void av1_encode_sb(const AV1_COMP *const cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
int mi_row, int mi_col, RUN_TYPE dry_run) {
(void)dry_run;
MACROBLOCKD *const xd = &x->e_mbd;
struct optimize_ctx ctx;
MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
struct encode_b_args arg = { cm, x, &ctx, &mbmi->skip, NULL, NULL, 1 };
struct encode_b_args arg = { cpi, x, &ctx, &mbmi->skip, NULL, NULL, 1 };
int plane;
mbmi->skip = 1;
......@@ -844,7 +845,7 @@ void av1_encode_block_intra(int plane, int block, int blk_row, int blk_col,
BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
void *arg) {
struct encode_b_args *const args = arg;
AV1_COMMON *cm = args->cm;
const AV1_COMMON *const cm = &args->cpi->common;
MACROBLOCK *const x = args->x;
MACROBLOCKD *const xd = &x->e_mbd;
struct macroblock_plane *const p = &x->plane[plane];
......@@ -885,8 +886,8 @@ void av1_encode_block_intra(int plane, int block, int blk_row, int blk_col,
if (args->enable_optimize_b) {
av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
AV1_XFORM_QUANT_FP);
av1_optimize_b(cm, x, plane, blk_row, blk_col, block, plane_bsize, tx_size,
a, l, CONFIG_LV_MAP);
av1_optimize_b(args->cpi, x, plane, blk_row, blk_col, block, plane_bsize,
tx_size, a, l, CONFIG_LV_MAP);
#if CONFIG_TXK_SEL
if (plane == 0 && p->eobs[block] == 0) {
......@@ -913,7 +914,7 @@ void av1_encode_block_intra(int plane, int block, int blk_row, int blk_col,
#endif // CONFIG_CFL
}
void av1_encode_intra_block_plane(AV1_COMMON *cm, MACROBLOCK *x,
void av1_encode_intra_block_plane(const AV1_COMP *const cpi, MACROBLOCK *x,
BLOCK_SIZE bsize, int plane,
int enable_optimize_b, int mi_row,
int mi_col) {
......@@ -922,7 +923,7 @@ void av1_encode_intra_block_plane(AV1_COMMON *cm, MACROBLOCK *x,
ENTROPY_CONTEXT tl[2 * MAX_MIB_SIZE] = { 0 };
struct encode_b_args arg = {
cm, x, NULL, &xd->mi[0]->mbmi.skip, ta, tl, enable_optimize_b
cpi, x, NULL, &xd->mi[0]->mbmi.skip, ta, tl, enable_optimize_b
};
if (!is_chroma_reference(mi_row, mi_col, bsize,
......
......@@ -26,7 +26,7 @@ struct optimize_ctx {
};
struct encode_b_args {
AV1_COMMON *cm;
const struct AV1_COMP *cpi;
MACROBLOCK *x;
struct optimize_ctx *ctx;
int8_t *skip;
......@@ -43,15 +43,15 @@ typedef enum AV1_XFORM_QUANT {
AV1_XFORM_QUANT_TYPES,
} AV1_XFORM_QUANT;
void av1_encode_sb(AV1_COMMON *cm, MACROBLOCK *x, BLOCK_SIZE bsize, int mi_row,
int mi_col, RUN_TYPE dry_run);
void av1_encode_sb(const struct AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
int mi_row, int mi_col, RUN_TYPE dry_run);
void av1_encode_sby_pass1(AV1_COMMON *cm, MACROBLOCK *x, BLOCK_SIZE bsize);
void av1_xform_quant(const AV1_COMMON *cm, MACROBLOCK *x, int plane, int block,
int blk_row, int blk_col, BLOCK_SIZE plane_bsize,
TX_SIZE tx_size, AV1_XFORM_QUANT xform_quant_idx);
int av1_optimize_b(const AV1_COMMON *cm, MACROBLOCK *mb, int plane, int blk_row,
int blk_col, int block, BLOCK_SIZE plane_bsize,
int av1_optimize_b(const struct AV1_COMP *cpi, MACROBLOCK *mb, int plane,
int blk_row, int blk_col, int block, BLOCK_SIZE plane_bsize,
TX_SIZE tx_size, const ENTROPY_CONTEXT *a,
const ENTROPY_CONTEXT *l, int fast_mode);
......@@ -66,7 +66,7 @@ void av1_set_txb_context(MACROBLOCK *x, int plane, int block, TX_SIZE tx_size,
void av1_encode_block_intra(int plane, int block, int blk_row, int blk_col,
BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg);
void av1_encode_intra_block_plane(AV1_COMMON *cm, MACROBLOCK *x,
void av1_encode_intra_block_plane(const struct AV1_COMP *cpi, MACROBLOCK *x,
BLOCK_SIZE bsize, int plane,
int enable_optimize_b, int mi_row,
int mi_col);
......
......@@ -18,11 +18,25 @@
#include "av1/encoder/encodeframe.h"
#include "av1/encoder/cost.h"
#include "av1/encoder/encodetxb.h"
#include "av1/encoder/hash.h"
#include "av1/encoder/rdopt.h"
#include "av1/encoder/tokenize.h"
#define TEST_OPTIMIZE_TXB 0
static int hbt_hash_needs_init = 1;
static CRC_CALCULATOR crc_calculator;
static CRC_CALCULATOR crc_calculator2;
static const int HBT_HASH_EOB = 16; // also the length in opt_qcoeff
typedef struct OptTxbQcoeff {
uint32_t hbt_hash_match;
double hits;
tran_low_t opt_qcoeff[16];
} OptTxbQcoeff;
OptTxbQcoeff hbt_hash_table[65536][16];
typedef struct LevelDownStats {
int update;
tran_low_t low_qc;
......@@ -291,6 +305,16 @@ static void get_dist_cost_stats(LevelDownStats *const stats, const int scan_idx,
stats->update = 0;
stats->rd_low = 0;
stats->rd = 0;
// TODO(mfo): explore if there's a better way to prevent compiler init
// warnings
#if CONFIG_LV_MAP_MULTI
stats->nz_rd = 0;
#else
stats->nz_rate = 0;
#endif
stats->dist_low = 0;
stats->rate_low = 0;
stats->low_qc = 0;
const tran_low_t tqc = txb_info->tcoeff[coeff_idx];
const int dqv = txb_info->dequant[coeff_idx != 0];
......@@ -2196,9 +2220,215 @@ static const int plane_rd_mult[REF_TYPES][PLANE_TYPES] = {
{ 17, 13 }, { 16, 10 },
};
int av1_optimize_txb(const AV1_COMMON *cm, MACROBLOCK *x, int plane,
void hbt_hash_init() {
av1_crc_calculator_init(&crc_calculator, 16, 0x5D6DCB); // ctx 16 bit hash
av1_crc_calculator_init(&crc_calculator2, 16, 0x5D6DCB); // qc 16 bit hash
memset(hbt_hash_table, 0, sizeof(hbt_hash_table[0][0]) * 65536 * 16);
hbt_hash_needs_init = 0;
}
int hbt_hash_miss(int found_index, uint16_t hbt_hash_index,
uint32_t hbt_hash_match, TxbInfo *txb_info,
const LV_MAP_COEFF_COST *txb_costs,
#if CONFIG_LV_MAP_MULTI
const LV_MAP_EOB_COST *txb_eob_costs,
#endif
const struct macroblock_plane *p, int block, int fast_mode) {
const int16_t *scan = txb_info->scan_order->scan;
av1_txb_init_levels(txb_info->qcoeff, txb_info->width, txb_info->height,
txb_info->levels);
// The hash_based_trellis speed feature requires lv_map_multi, so always true.
const int update = optimize_txb(txb_info, txb_costs,
#if CONFIG_LV_MAP_MULTI
txb_eob_costs,
#endif
NULL, 0, fast_mode);
if (update) {
// Overwrite old lowest entry
hbt_hash_table[hbt_hash_index][found_index].hbt_hash_match = hbt_hash_match;
hbt_hash_table[hbt_hash_index][found_index].hits = 1.0;
for (int i = 0; i < txb_info->eob; i++) {
hbt_hash_table[hbt_hash_index][found_index].opt_qcoeff[i] =
txb_info->qcoeff[scan[i]];
}
for (int i = txb_info->eob; i < HBT_HASH_EOB; i++) {
hbt_hash_table[hbt_hash_index][found_index].opt_qcoeff[i] = 0;
}
p->eobs[block] = txb_info->eob;
p->txb_entropy_ctx[block] = av1_get_txb_entropy_context(
txb_info->qcoeff, txb_info->scan_order, txb_info->eob);
}
return txb_info->eob;
}
int hbt_hash_hit(uint16_t hbt_hash_index, int found_index, TxbInfo *txb_info,
const struct macroblock_plane *p, int block) {
const int16_t *scan = txb_info->scan_order->scan;
int new_eob = 0;
int update = 0;
for (int i = 0; i < txb_info->eob; i++) {
if (txb_info->qcoeff[scan[i]] !=
hbt_hash_table[hbt_hash_index][found_index].opt_qcoeff[i]) {
txb_info->qcoeff[scan[i]] =
hbt_hash_table[hbt_hash_index][found_index].opt_qcoeff[i];
update = 1;
update_coeff(scan[i], txb_info->qcoeff[scan[i]], txb_info);
}
if (txb_info->qcoeff[scan[i]]) new_eob = i + 1;
}
if (update) {
txb_info->eob = new_eob;
p->eobs[block] = txb_info->eob;
p->txb_entropy_ctx[block] = av1_get_txb_entropy_context(
txb_info->qcoeff, txb_info->scan_order, txb_info->eob);
}
return txb_info->eob;
}
int search_hbt_hash_match(uint16_t hbt_hash_index, uint32_t hbt_hash_match,
TxbInfo *txb_info, const LV_MAP_COEFF_COST *txb_costs,
#if CONFIG_LV_MAP_MULTI
const LV_MAP_EOB_COST *txb_eob_costs,
#endif
const struct macroblock_plane *p, int block,
int fast_mode) {
// Decay all hits
double lowest_hits = 1.0;
int lowest_index = 0;
for (int i = 0; i < 16; i++) {
hbt_hash_table[hbt_hash_index][i].hits *= 31.0;
hbt_hash_table[hbt_hash_index][i].hits /= 32.0;
if (hbt_hash_table[hbt_hash_index][i].hits < lowest_hits) {
lowest_hits = hbt_hash_table[hbt_hash_index][i].hits;
lowest_index = i;
}
}
// Search soft hash vector for qcoeff match
int found_index = -1;
for (int i = 0; i < 16; i++) { // OptTxbQcoeff array has fixed size of 16.
if (hbt_hash_table[hbt_hash_index][i].hbt_hash_match == hbt_hash_match) {
found_index = i;
hbt_hash_table[hbt_hash_index][i].hits += 1.0;
break; // Found a match and it's at found_index
}
}
if (found_index == -1) { // Add new OptTxbQcoeff into array.
return hbt_hash_miss(lowest_index, hbt_hash_index, hbt_hash_match, txb_info,
txb_costs,
#if CONFIG_LV_MAP_MULTI
txb_eob_costs,
#endif
p, block, fast_mode);
} else { // Retrieve data from array.
return hbt_hash_hit(hbt_hash_index, found_index, txb_info, p, block);
}
}
int hash_based_trellis_mode(TxbInfo *txb_info,
const LV_MAP_COEFF_COST *txb_costs,
#if CONFIG_LV_MAP_MULTI
const LV_MAP_EOB_COST *txb_eob_costs,
#endif
const struct macroblock_plane *p, int block,
int fast_mode, TXB_CTX *txb_ctx) {
// Initialize hash table if needed.
if (hbt_hash_needs_init) {
hbt_hash_init();
}
//// Hash creation
// TODO(mfo): use exact length once input finalized
uint8_t txb_hash_data[256];
const int16_t *scan = txb_info->scan_order->scan;
uint8_t chunk = 0;
uint16_t ctx_hash = 0;
uint32_t qc_hash = 0;
int hash_data_index = 0;
for (int i = 0; i < txb_info->eob; i++) {
// Data softening: data from -3 -> 3 is left alone,
// while 'large' data is put into buckets of 16s
// Consider bucketing less than 16 down to 4 instead of 0
// if(txb_info->qcoeff[scan[i]] < 4 && txb_info->qcoeff[scan[i]] > -4)
chunk = (txb_info->qcoeff[scan[i]]) & 0xff;
/*else if(txb_info->qcoeff[scan[i]] < 16 && txb_info->qcoeff[scan[i]] > -16)
chunk = (txb_info->qcoeff[scan[i]]) & 0xfc; //
else
chunk = (txb_info->qcoeff[scan[i]]) & 0xf0; // greater than 16*/
txb_hash_data[hash_data_index++] = chunk;
chunk = ((txb_info->qcoeff[scan[i]]) & 0xff00) >> 8;
txb_hash_data[hash_data_index++] = chunk;
}
assert(hash_data_index <= 256);
// 16 bit
qc_hash = av1_get_crc_value(&crc_calculator2, txb_hash_data, hash_data_index);
hash_data_index = 0;
// tcoeff
for (int i = 0; i < txb_info->eob; i++) {
chunk = (txb_info->tcoeff[scan[i]] - txb_info->dqcoeff[scan[i]]) & 0xff;
txb_hash_data[hash_data_index++] = chunk;
}
// txb_ctx
chunk = txb_ctx->txb_skip_ctx & 0xff;
txb_hash_data[hash_data_index++] = chunk;
chunk = txb_ctx->dc_sign_ctx & 0xff;
txb_hash_data[hash_data_index++] = chunk;
// dequant
chunk = txb_info->dequant[0] & 0xff;
txb_hash_data[hash_data_index++] = chunk;
chunk = (txb_info->dequant[0] & 0xff00) >> 8;
txb_hash_data[hash_data_index++] = chunk;
chunk = txb_info->dequant[1] & 0xff;
txb_hash_data[hash_data_index++] = chunk;
chunk = (txb_info->dequant[1] & 0xff00) >> 8;
txb_hash_data[hash_data_index++] = chunk;
// txb_skip_cost
/*for (int i = 0; i < 2; i++) {
for (int j = 0; j < TXB_SKIP_CONTEXTS; j++) {
chunk = (txb_costs->txb_skip_cost[j][i] & 0xff00) >> 8;
txb_hash_data[hash_data_index++] = chunk;
}
}
// base_eob_cost
for (int i = 1; i < 3; i++) { // i = 0 are softened away
for (int j = 0; j < SIG_COEF_CONTEXTS_EOB; j++) {
chunk = (txb_costs->base_eob_cost[j][i] & 0xff00) >> 8;
txb_hash_data[hash_data_index++] = chunk;
}
}*/
assert(hash_data_index <= 256);
// Gives 16 bit hash for ctx
ctx_hash = av1_get_crc_value(&crc_calculator, txb_hash_data, hash_data_index);
uint16_t hbt_hash_index = ctx_hash; // 16 bit ctx_hash: index to table
uint32_t hbt_hash_match = qc_hash; // 16 bit qc_hash: matched in array
//// End hash creation
return search_hbt_hash_match(hbt_hash_index, hbt_hash_match, txb_info,
txb_costs,
#if CONFIG_LV_MAP_MULTI
txb_eob_costs,
#endif
p, block, fast_mode);
}
int av1_optimize_txb(const AV1_COMP *const cpi, MACROBLOCK *x, int plane,
int blk_row, int blk_col, int block, TX_SIZE tx_size,
TXB_CTX *txb_ctx, int fast_mode) {
const AV1_COMMON *cm = &cpi->common;
MACROBLOCKD *const xd = &x->e_mbd;
const PLANE_TYPE plane_type = get_plane_type(plane);
const TX_SIZE txs_ctx = get_txsize_entropy_ctx(tx_size);
......@@ -2266,6 +2496,17 @@ int av1_optimize_txb(const AV1_COMMON *cm, MACROBLOCK *x, int plane,
&cm->coeff_ctx_table
};
// Hash based trellis (hbt) speed feature: avoid expensive optimize_txb calls
// by storing the optimized coefficients in a hash table.
// Currently disabled in speedfeatures.c
if (eob <= HBT_HASH_EOB && eob > 0 && cpi->sf.use_hash_based_trellis) {
return hash_based_trellis_mode(&txb_info, &txb_costs,
#if CONFIG_LV_MAP_MULTI
&txb_eob_costs,
#endif
p, block, fast_mode, txb_ctx);
}
av1_txb_init_levels(qcoeff, width, height, levels);
const int update = optimize_txb(&txb_info, &txb_costs,
......@@ -2623,7 +2864,7 @@ int64_t av1_search_txk_type(const AV1_COMP *cpi, MACROBLOCK *x, int plane,
} else {
av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize,
tx_size, AV1_XFORM_QUANT_FP);
av1_optimize_b(cm, x, plane, blk_row, blk_col, block, plane_bsize,
av1_optimize_b(cpi, x, plane, blk_row, blk_col, block, plane_bsize,
tx_size, a, l, 1);
}
av1_dist_block(cpi, x, plane, plane_bsize, block, blk_row, blk_col, tx_size,
......@@ -2662,7 +2903,7 @@ int64_t av1_search_txk_type(const AV1_COMP *cpi, MACROBLOCK *x, int plane,
} else {
av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize,
tx_size, AV1_XFORM_QUANT_FP);
av1_optimize_b(cm, x, plane, blk_row, blk_col, block, plane_bsize,
av1_optimize_b(cpi, x, plane, blk_row, blk_col, block, plane_bsize,
tx_size, a, l, 1);
}
......
......@@ -107,9 +107,10 @@ int64_t av1_search_txk_type(const AV1_COMP *cpi, MACROBLOCK *x, int plane,
const ENTROPY_CONTEXT *a, const ENTROPY_CONTEXT *l,
int use_fast_coef_costing, RD_STATS *rd_stats);
#endif
int av1_optimize_txb(const AV1_COMMON *cm, MACROBLOCK *x, int plane,
int blk_row, int blk_col, int block, TX_SIZE tx_size,
TXB_CTX *txb_ctx, int fast_mode);
int av1_optimize_txb(const AV1_COMP *cpi, MACROBLOCK *x, int plane, int blk_row,
int blk_col, int block, TX_SIZE tx_size, TXB_CTX *txb_ctx,
int fast_mode);
#ifdef __cplusplus
}
#endif
......
......@@ -641,7 +641,7 @@ void av1_first_pass(AV1_COMP *cpi, const struct lookahead_entry *source) {
xd->mi[0]->mbmi.mode = DC_PRED;
xd->mi[0]->mbmi.tx_size =
use_dc_pred ? (bsize >= BLOCK_16X16 ? TX_16X16 : TX_8X8) : TX_4X4;
av1_encode_intra_block_plane(cm, x, bsize, 0, 0, mb_row * 2, mb_col * 2);
av1_encode_intra_block_plane(cpi, x, bsize, 0, 0, mb_row * 2, mb_col * 2);
this_error = aom_get_mb_ss(x->plane[0].src_diff);
// Keep a record of blocks that have almost no intra error residual
......
......@@ -1900,7 +1900,7 @@ static void block_rd_txfm(int plane, int block, int blk_row, int blk_col,
av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
AV1_XFORM_QUANT_FP);
// TX-domain results need to shift down to Q2/D10 to match pixel
/// TX-domain results need to shift down to Q2/D10 to match pixel
// domain distortion values which are in Q2^2
#if CONFIG_DAALA_TX
const int shift = (TX_COEFF_DEPTH - 10) * 2;
......@@ -1929,7 +1929,7 @@ static void block_rd_txfm(int plane, int block, int blk_row, int blk_col,
disable_early_skip ||
#endif
RDCOST(x->rdmult, 0, tmp_dist) + args->this_rd < args->best_rd) {
av1_optimize_b(cm, x, plane, blk_row, blk_col, block, plane_bsize,
av1_optimize_b(cpi, x, plane, blk_row, blk_col, block, plane_bsize,
tx_size, a, l, CONFIG_LV_MAP);
} else {
args->exit_early = 1;
......@@ -3581,7 +3581,7 @@ void av1_tx_block_rd_b(const AV1_COMP *cpi, MACROBLOCK *x, TX_SIZE tx_size,
disable_early_skip ||
#endif
RDCOST(x->rdmult, 0, tmp_dist) < rd_stats->ref_rdcost) {
av1_optimize_b(cm, x, plane, blk_row, blk_col, block, plane_bsize,
av1_optimize_b(cpi, x, plane, blk_row, blk_col, block, plane_bsize,
tx_size, a, l, fast);
} else {
rd_stats->rate += rd_stats->zero_rate;
......@@ -8704,8 +8704,8 @@ void av1_rd_pick_intra_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x, int mi_row,
// during luma RDO, so we can store reconstructed luma values
memcpy(x->blk_skip[0], ctx->blk_skip[0],
sizeof(uint8_t) * ctx->num_4x4_blk);
av1_encode_intra_block_plane((AV1_COMMON *)cm, x, bsize, AOM_PLANE_Y, 1,
mi_row, mi_col);
av1_encode_intra_block_plane(cpi, x, bsize, AOM_PLANE_Y, 1, mi_row,
mi_col);
xd->cfl.store_y = 0;
}
#endif // CONFIG_CFL
......
......@@ -149,6 +149,10 @@ static void set_good_speed_features_framesize_independent(AV1_COMP *cpi,
#if CONFIG_DUAL_FILTER
sf->use_fast_interpolation_filter_search = 1;
#endif // CONFIG_DUAL_FILTER
#if 0 // CONFIG_HASH_ME && CONFIG_LV_MAP && CONFIG_LV_MAP_MULTI
// TODO(mfo): Activate feature once it gives positive results.
sf->use_hash_based_trellis = 1;
#endif // CONFIG_HASH_ME && CONFIG_LV_MAP && CONFIG_LV_MAP_MULTI
}
if (speed >= 2) {
......@@ -515,6 +519,7 @@ void av1_set_speed_features_framesize_independent(AV1_COMP *cpi) {
sf->use_transform_domain_distortion = 0;
sf->gm_search_type = GM_FULL_SEARCH;
sf->use_fast_interpolation_filter_search = 0;
sf->use_hash_based_trellis = 0;
set_dev_sf(cpi, sf, oxcf->dev_sf);
......
......@@ -520,6 +520,10 @@ typedef struct SPEED_FEATURES {
// usually includes EIGHTTAP_REGULAR.
int use_fast_interpolation_filter_search;
// Use a hash table to store previously computed optimized qcoeffs from
// expensive calls to optimize_txb.
int use_hash_based_trellis;
// flag to drop some ref frames in compound motion search
int drop_ref;
} SPEED_FEATURES;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment