Commit edc7346f authored by Debargha Mukherjee's avatar Debargha Mukherjee

Add speed feature to reduce tx size search depth

The speed feature simply restricts the number of depths
searched. Currently it is turned on by default for speeds>=1.
The coding efficiency impact (tested on lowres 30 frames) seems
to be ~0.15% and the speedup is in the order of 15%.

Change-Id: I514832bd7df937292875f73d9c9026e49ac576f2
parent fa97083c
......@@ -73,8 +73,6 @@ extern "C" {
#endif // CONFIG_EXT_TILE
#define MAX_VARTX_DEPTH 2
#define SQR_VARTX_DEPTH_INIT 0
#define RECT_VARTX_DEPTH_INIT 0
#define MI_SIZE_64X64 (64 >> MI_SIZE_LOG2)
......
......@@ -1147,13 +1147,11 @@ static void read_intrabc_info(AV1_COMMON *const cm, MACROBLOCKD *const xd,
const TX_SIZE max_tx_size = max_txsize_rect_lookup[bsize];
const int bh = tx_size_high_unit[max_tx_size];
const int bw = tx_size_wide_unit[max_tx_size];
int init_depth =
(height != width) ? RECT_VARTX_DEPTH_INIT : SQR_VARTX_DEPTH_INIT;
mbmi->min_tx_size = TX_SIZES_ALL;
for (idy = 0; idy < height; idy += bh) {
for (idx = 0; idx < width; idx += bw) {
read_tx_size_vartx(cm, xd, mbmi, xd->counts, max_tx_size, init_depth,
idy, idx, r);
read_tx_size_vartx(cm, xd, mbmi, xd->counts, max_tx_size, 0, idy, idx,
r);
}
}
} else {
......@@ -2872,14 +2870,12 @@ static void read_inter_frame_mode_info(AV1Decoder *const pbi,
const int width = block_size_wide[bsize] >> tx_size_wide_log2[0];
const int height = block_size_high[bsize] >> tx_size_wide_log2[0];
int idx, idy;
int init_depth =
(height != width) ? RECT_VARTX_DEPTH_INIT : SQR_VARTX_DEPTH_INIT;
mbmi->min_tx_size = TX_SIZES_ALL;
for (idy = 0; idy < height; idy += bh)
for (idx = 0; idx < width; idx += bw)
read_tx_size_vartx(cm, xd, mbmi, xd->counts, max_tx_size, init_depth,
idy, idx, r);
read_tx_size_vartx(cm, xd, mbmi, xd->counts, max_tx_size, 0, idy, idx,
r);
#if CONFIG_RECT_TX_EXT
if (is_quarter_tx_allowed(xd, mbmi, inter_block) &&
mbmi->tx_size == max_tx_size) {
......
......@@ -1536,13 +1536,10 @@ static void pack_inter_mode_mvs(AV1_COMP *cpi, const int mi_row,
const int bw = tx_size_wide_unit[max_tx_size];
const int width = block_size_wide[bsize] >> tx_size_wide_log2[0];
const int height = block_size_high[bsize] >> tx_size_wide_log2[0];
int init_depth =
(height != width) ? RECT_VARTX_DEPTH_INIT : SQR_VARTX_DEPTH_INIT;
int idx, idy;
for (idy = 0; idy < height; idy += bh)
for (idx = 0; idx < width; idx += bw)
write_tx_size_vartx(cm, xd, mbmi, max_tx_size, init_depth, idy, idx,
w);
write_tx_size_vartx(cm, xd, mbmi, max_tx_size, 0, idy, idx, w);
#if CONFIG_RECT_TX_EXT
if (is_quarter_tx_allowed(xd, mbmi, is_inter_block(mbmi)) &&
quarter_txsize_lookup[bsize] != max_tx_size &&
......@@ -1778,13 +1775,10 @@ static void write_intrabc_info(AV1_COMMON *cm, MACROBLOCKD *xd,
const int bw = tx_size_wide_unit[max_tx_size];
const int width = block_size_wide[bsize] >> tx_size_wide_log2[0];
const int height = block_size_high[bsize] >> tx_size_wide_log2[0];
int init_depth =
(height != width) ? RECT_VARTX_DEPTH_INIT : SQR_VARTX_DEPTH_INIT;
int idx, idy;
for (idy = 0; idy < height; idy += bh) {
for (idx = 0; idx < width; idx += bw) {
write_tx_size_vartx(cm, xd, mbmi, max_tx_size, init_depth, idy, idx,
w);
write_tx_size_vartx(cm, xd, mbmi, max_tx_size, 0, idy, idx, w);
}
}
} else {
......
......@@ -4333,8 +4333,6 @@ static void tx_partition_count_update(const AV1_COMMON *const cm, MACROBLOCK *x,
const int bh = tx_size_high_unit[max_tx_size];
const int bw = tx_size_wide_unit[max_tx_size];
int idx, idy;
int init_depth =
(mi_height != mi_width) ? RECT_VARTX_DEPTH_INIT : SQR_VARTX_DEPTH_INIT;
xd->above_txfm_context =
cm->above_txfm_context + (mi_col << TX_UNIT_WIDE_LOG2);
......@@ -4343,7 +4341,7 @@ static void tx_partition_count_update(const AV1_COMMON *const cm, MACROBLOCK *x,
for (idy = 0; idy < mi_height; idy += bh)
for (idx = 0; idx < mi_width; idx += bw)
update_txfm_count(x, xd, td_counts, max_tx_size, init_depth, idy, idx);
update_txfm_count(x, xd, td_counts, max_tx_size, 0, idy, idx);
}
static void set_txfm_context(MACROBLOCKD *xd, TX_SIZE tx_size, int blk_row,
......
......@@ -4489,6 +4489,13 @@ static void select_tx_block(const AV1_COMP *cpi, MACROBLOCK *x, int blk_row,
}
}
static int get_search_init_depth(int mi_width, int mi_height,
const SPEED_FEATURES *sf) {
if (sf->tx_size_search_method == USE_LARGESTALL) return MAX_VARTX_DEPTH;
return (mi_height != mi_width) ? sf->tx_size_search_init_depth_rect
: sf->tx_size_search_init_depth_sqr;
}
static void select_inter_block_yrd(const AV1_COMP *cpi, MACROBLOCK *x,
RD_STATS *rd_stats, BLOCK_SIZE bsize,
int64_t ref_best_rd, int fast) {
......@@ -4510,8 +4517,6 @@ static void select_inter_block_yrd(const AV1_COMP *cpi, MACROBLOCK *x,
const int bw = tx_size_wide_unit[max_tx_size];
int idx, idy;
int block = 0;
int init_depth =
(mi_height != mi_width) ? RECT_VARTX_DEPTH_INIT : SQR_VARTX_DEPTH_INIT;
int step = tx_size_wide_unit[max_tx_size] * tx_size_high_unit[max_tx_size];
ENTROPY_CONTEXT ctxa[2 * MAX_MIB_SIZE];
ENTROPY_CONTEXT ctxl[2 * MAX_MIB_SIZE];
......@@ -4519,6 +4524,7 @@ static void select_inter_block_yrd(const AV1_COMP *cpi, MACROBLOCK *x,
TXFM_CONTEXT tx_left[MAX_MIB_SIZE * 2];
RD_STATS pn_rd_stats;
const int init_depth = get_search_init_depth(mi_width, mi_height, &cpi->sf);
av1_init_rd_stats(&pn_rd_stats);
av1_get_entropy_contexts(bsize, 0, pd, ctxa, ctxl);
......@@ -4786,8 +4792,7 @@ int inter_block_yrd(const AV1_COMP *cpi, MACROBLOCK *x, RD_STATS *rd_stats,
const TX_SIZE max_tx_size = max_txsize_rect_lookup[plane_bsize];
const int bh = tx_size_high_unit[max_tx_size];
const int bw = tx_size_wide_unit[max_tx_size];
int init_depth =
(mi_height != mi_width) ? RECT_VARTX_DEPTH_INIT : SQR_VARTX_DEPTH_INIT;
const int init_depth = get_search_init_depth(mi_width, mi_height, &cpi->sf);
int idx, idy;
int block = 0;
int step = tx_size_wide_unit[max_tx_size] * tx_size_high_unit[max_tx_size];
......
......@@ -146,12 +146,15 @@ static void set_good_speed_features_framesize_independent(AV1_COMP *cpi,
const int boosted = frame_is_boosted(cpi);
if (speed >= 1) {
sf->tx_size_search_init_depth_rect = 1;
sf->tx_size_search_init_depth_sqr = 1;
}
if (speed >= 2) {
sf->tx_size_search_method = USE_FAST_RD;
sf->tx_type_search.fast_intra_tx_type_search = 1;
sf->tx_type_search.fast_inter_tx_type_search = 1;
}
if (speed >= 2) {
sf->selective_ref_frame = 1;
if ((cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) ||
......@@ -386,6 +389,8 @@ void av1_set_speed_features_framesize_independent(AV1_COMP *cpi) {
sf->comp_inter_joint_search_thresh = BLOCK_4X4;
sf->adaptive_rd_thresh = 0;
sf->tx_size_search_method = USE_FULL_RD;
sf->tx_size_search_init_depth_sqr = 0;
sf->tx_size_search_init_depth_rect = 0;
sf->adaptive_motion_search = 0;
sf->adaptive_pred_interp_filter = 0;
sf->adaptive_mode_search = 0;
......
......@@ -312,6 +312,12 @@ typedef struct SPEED_FEATURES {
// for intra and model coefs for the rest.
TX_SIZE_SEARCH_METHOD tx_size_search_method;
// Init search depth for square and rectangular transform partitions.
// Values:
// 0 - search full tree, 1: search 1 level, 2: search the highest level only
int tx_size_search_init_depth_sqr;
int tx_size_search_init_depth_rect;
// After looking at the first set of modes (set by index here), skip
// checking modes for reference frames that don't match the reference frame
// of the best so far.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment