Commit debb9c68 authored by Jingning Han's avatar Jingning Han
Browse files

Use low precision 32x32fdct for encodemb in speed1

The low precision 32x32 fdct has all the intermediate steps within
16-bit depth, hence allowing faster SSE2 implementation, at the
expense of larger round-trip error. It was used in the rate-distortion
optimization search loop only.

Using the low precision version, in replace of the high precision one,
affects the compression performance by about 0.7% (derf, stdhd) at
speed 0. For speed 1, it makes derf set down by only 0.017%.

Change-Id: I4e7d18fac5bea5317b91c8e7dabae143bc6b5c8b
parent 78182538
......@@ -144,7 +144,7 @@ struct macroblock {
int optimize;
// indicate if it is in the rd search loop or encoding process
int rd_search;
int use_lp32x32fdct;
int skip_encode;
// Used to store sub partition's choices.
......
......@@ -565,7 +565,7 @@ static void pick_sb_modes(VP9_COMP *cpi, int mi_row, int mi_col,
MACROBLOCK *const x = &cpi->mb;
MACROBLOCKD *const xd = &x->e_mbd;
x->rd_search = 1;
x->use_lp32x32fdct = 1;
if (bsize < BLOCK_8X8) {
// When ab_index = 0 all sub-blocks are handled, so for ab_index != 0
......@@ -2546,7 +2546,7 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled,
const int mis = cm->mode_info_stride;
const int mi_width = num_8x8_blocks_wide_lookup[bsize];
const int mi_height = num_8x8_blocks_high_lookup[bsize];
x->rd_search = 0;
x->use_lp32x32fdct = cpi->sf.use_lp32x32fdct;
x->skip_encode = (!output_enabled && cpi->sf.skip_encode_frame &&
xd->q_index < QIDX_SKIP_THRESH);
if (x->skip_encode)
......
......@@ -475,7 +475,7 @@ void xform_quant(int plane, int block, BLOCK_SIZE_TYPE bsize,
xoff = 32 * (block & twmask);
yoff = 32 * (block >> twl);
src_diff = p->src_diff + 4 * bw * yoff + xoff;
if (x->rd_search)
if (x->use_lp32x32fdct)
vp9_short_fdct32x32_rd(src_diff, coeff, bw * 8);
else
vp9_short_fdct32x32(src_diff, coeff, bw * 8);
......@@ -670,7 +670,7 @@ void encode_block_intra(int plane, int block, BLOCK_SIZE_TYPE bsize,
dst, pd->dst.stride, dst, pd->dst.stride);
vp9_subtract_block(32, 32, src_diff, bw * 4,
src, p->src.stride, dst, pd->dst.stride);
if (x->rd_search)
if (x->use_lp32x32fdct)
vp9_short_fdct32x32_rd(src_diff, coeff, bw * 8);
else
vp9_short_fdct32x32(src_diff, coeff, bw * 8);
......
......@@ -723,6 +723,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
sf->adaptive_rd_thresh = 0;
sf->use_lastframe_partitioning = 0;
sf->tx_size_search_method = USE_FULL_RD;
sf->use_lp32x32fdct = 0;
sf->use_8tap_always = 0;
sf->use_avoid_tested_higherror = 0;
sf->reference_masking = 0;
......@@ -794,6 +795,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
sf->use_uv_intra_rd_estimate = 1;
sf->use_rd_breakout = 1;
sf->skip_encode_sb = 1;
sf->use_lp32x32fdct = 1;
sf->auto_mv_step_size = 1;
sf->auto_min_max_partition_size = 1;
......@@ -825,6 +827,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
sf->use_uv_intra_rd_estimate = 1;
sf->use_rd_breakout = 1;
sf->skip_encode_sb = 1;
sf->use_lp32x32fdct = 1;
sf->using_small_partition_info = 1;
sf->disable_splitmv =
(MIN(cpi->common.width, cpi->common.height) >= 720)? 1 : 0;
......@@ -848,6 +851,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
FLAG_EARLY_TERMINATE;
sf->use_rd_breakout = 1;
sf->skip_encode_sb = 1;
sf->use_lp32x32fdct = 1;
sf->disable_splitmv = 1;
sf->auto_mv_step_size = 1;
sf->search_method = BIGDIA;
......@@ -869,6 +873,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
FLAG_SKIP_INTRA_LOWVAR |
FLAG_EARLY_TERMINATE;
sf->use_rd_breakout = 1;
sf->use_lp32x32fdct = 1;
sf->optimize_coefficients = 0;
sf->auto_mv_step_size = 1;
// sf->reduce_first_step_size = 1;
......
......@@ -257,6 +257,7 @@ typedef struct {
int skip_encode_frame;
int use_lastframe_partitioning;
TX_SIZE_SEARCH_METHOD tx_size_search_method;
int use_lp32x32fdct;
int use_8tap_always;
int use_avoid_tested_higherror;
int skip_lots_of_modes;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment