Commit 94c481f9 authored by Deb Mukherjee's avatar Deb Mukherjee Committed by Gerrit Code Review

Some minor cleanups for efficiency

Implements some of the helper functions more efficiently with
lookups rathers than branches. Modeling function is consolidated
to reduce some computations.

Also merged the two enums BLOCK_SIZE_TYPES and BlockSize into
one because there is no need to keep them separate (even though
the semantics are a little different).

No bitstream or output change.

About 0.5% speedup

Change-Id: I7d71a66e8031ddb340744dc493f22976052b8f9f
parent 72763187
......@@ -91,22 +91,6 @@ static INLINE int is_inter_mode(MB_PREDICTION_MODE mode) {
return mode >= NEARESTMV && mode <= NEWMV;
}
// Segment level features.
typedef enum {
TX_4X4 = 0, // 4x4 dct transform
TX_8X8 = 1, // 8x8 dct transform
TX_16X16 = 2, // 16x16 dct transform
TX_32X32 = 3, // 32x32 dct transform
TX_SIZE_MAX_SB, // Number of transforms available to SBs
} TX_SIZE;
typedef enum {
DCT_DCT = 0, // DCT in both horizontal and vertical
ADST_DCT = 1, // ADST in vertical, DCT in horizontal
DCT_ADST = 2, // DCT in vertical, ADST in horizontal
ADST_ADST = 3 // ADST in both directions
} TX_TYPE;
#define VP9_INTRA_MODES (TM_PRED + 1)
#define VP9_INTER_MODES (1 + NEWMV - NEARESTMV)
......@@ -293,7 +277,7 @@ typedef struct macroblockd {
} MACROBLOCKD;
static int *get_sb_index(MACROBLOCKD *xd, BLOCK_SIZE_TYPE subsize) {
static INLINE int *get_sb_index(MACROBLOCKD *xd, BLOCK_SIZE_TYPE subsize) {
switch (subsize) {
case BLOCK_SIZE_SB64X64:
case BLOCK_SIZE_SB64X32:
......@@ -361,49 +345,8 @@ static INLINE int partition_plane_context(MACROBLOCKD *xd,
static BLOCK_SIZE_TYPE get_subsize(BLOCK_SIZE_TYPE bsize,
PARTITION_TYPE partition) {
BLOCK_SIZE_TYPE subsize = bsize;
switch (partition) {
case PARTITION_NONE:
break;
case PARTITION_HORZ:
if (bsize == BLOCK_SIZE_SB64X64)
subsize = BLOCK_SIZE_SB64X32;
else if (bsize == BLOCK_SIZE_SB32X32)
subsize = BLOCK_SIZE_SB32X16;
else if (bsize == BLOCK_SIZE_MB16X16)
subsize = BLOCK_SIZE_SB16X8;
else if (bsize == BLOCK_SIZE_SB8X8)
subsize = BLOCK_SIZE_SB8X4;
else
assert(0);
break;
case PARTITION_VERT:
if (bsize == BLOCK_SIZE_SB64X64)
subsize = BLOCK_SIZE_SB32X64;
else if (bsize == BLOCK_SIZE_SB32X32)
subsize = BLOCK_SIZE_SB16X32;
else if (bsize == BLOCK_SIZE_MB16X16)
subsize = BLOCK_SIZE_SB8X16;
else if (bsize == BLOCK_SIZE_SB8X8)
subsize = BLOCK_SIZE_SB4X8;
else
assert(0);
break;
case PARTITION_SPLIT:
if (bsize == BLOCK_SIZE_SB64X64)
subsize = BLOCK_SIZE_SB32X32;
else if (bsize == BLOCK_SIZE_SB32X32)
subsize = BLOCK_SIZE_MB16X16;
else if (bsize == BLOCK_SIZE_MB16X16)
subsize = BLOCK_SIZE_SB8X8;
else if (bsize == BLOCK_SIZE_SB8X8)
subsize = BLOCK_SIZE_AB4X4;
else
assert(0);
break;
default:
assert(0);
}
BLOCK_SIZE_TYPE subsize = subsize_lookup[partition][bsize];
assert(subsize != BLOCK_SIZE_TYPES);
return subsize;
}
......@@ -444,31 +387,10 @@ static void setup_block_dptrs(MACROBLOCKD *xd, int ss_x, int ss_y) {
}
static TX_SIZE get_uv_tx_size(const MB_MODE_INFO *mbmi) {
static INLINE TX_SIZE get_uv_tx_size(const MB_MODE_INFO *mbmi) {
const TX_SIZE size = mbmi->txfm_size;
switch (mbmi->sb_type) {
case BLOCK_SIZE_SB64X64:
return size;
case BLOCK_SIZE_SB64X32:
case BLOCK_SIZE_SB32X64:
case BLOCK_SIZE_SB32X32:
if (size == TX_32X32)
return TX_16X16;
else
return size;
case BLOCK_SIZE_SB32X16:
case BLOCK_SIZE_SB16X32:
case BLOCK_SIZE_MB16X16:
if (size == TX_16X16)
return TX_8X8;
else
return size;
default:
return TX_4X4;
}
return size;
const TX_SIZE max_size = max_uv_txsize_lookup[mbmi->sb_type];
return (size > max_size ? max_size : size);
}
struct plane_block_idx {
......@@ -507,6 +429,16 @@ static INLINE int plane_block_height(BLOCK_SIZE_TYPE bsize,
return 4 << (b_height_log2(bsize) - plane->subsampling_y);
}
static INLINE int plane_block_width_log2by4(
BLOCK_SIZE_TYPE bsize, const struct macroblockd_plane* plane) {
return (b_width_log2(bsize) - plane->subsampling_x);
}
static INLINE int plane_block_height_log2by4(
BLOCK_SIZE_TYPE bsize, const struct macroblockd_plane* plane) {
return (b_height_log2(bsize) - plane->subsampling_y);
}
typedef void (*foreach_transformed_block_visitor)(int plane, int block,
BLOCK_SIZE_TYPE bsize,
int ss_txfrm_size,
......
......@@ -8,6 +8,8 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include "vp9/common/vp9_blockd.h"
#include "vp9/common/vp9_common_data.h"
// Log 2 conversion lookup tables for block width and height
......@@ -20,3 +22,57 @@ const int mi_width_log2_lookup[BLOCK_SIZE_TYPES] =
{0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3};
const int mi_height_log2_lookup[BLOCK_SIZE_TYPES] =
{0, 0, 0, 0, 1, 0, 1, 2, 1, 2, 3, 2, 3};
const BLOCK_SIZE_TYPE subsize_lookup[PARTITION_TYPES][BLOCK_SIZE_TYPES] = {
{ // PARTITION_NONE
BLOCK_SIZE_AB4X4, BLOCK_SIZE_SB4X8, BLOCK_SIZE_SB8X4,
BLOCK_SIZE_SB8X8, BLOCK_SIZE_SB8X16, BLOCK_SIZE_SB16X8,
BLOCK_SIZE_MB16X16, BLOCK_SIZE_SB16X32, BLOCK_SIZE_SB32X16,
BLOCK_SIZE_SB32X32, BLOCK_SIZE_SB32X64, BLOCK_SIZE_SB64X32,
BLOCK_SIZE_SB64X64,
}, { // PARTITION_HORZ
BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES,
BLOCK_SIZE_SB8X4, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES,
BLOCK_SIZE_SB16X8, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES,
BLOCK_SIZE_SB32X16, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES,
BLOCK_SIZE_SB64X32,
}, { // PARTITION_VERT
BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES,
BLOCK_SIZE_SB4X8, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES,
BLOCK_SIZE_SB8X16, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES,
BLOCK_SIZE_SB16X32, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES,
BLOCK_SIZE_SB32X64,
}, { // PARTITION_SPLIT
BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES,
BLOCK_SIZE_AB4X4, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES,
BLOCK_SIZE_SB8X8, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES,
BLOCK_SIZE_MB16X16, BLOCK_SIZE_TYPES, BLOCK_SIZE_TYPES,
BLOCK_SIZE_SB32X32,
}
};
const TX_SIZE max_txsize_lookup[BLOCK_SIZE_TYPES] = {
TX_4X4, TX_4X4, TX_4X4,
TX_8X8, TX_8X8, TX_8X8,
TX_16X16, TX_16X16, TX_16X16,
TX_32X32, TX_32X32, TX_32X32, TX_32X32
};
const TX_SIZE max_uv_txsize_lookup[BLOCK_SIZE_TYPES] = {
TX_4X4, TX_4X4, TX_4X4,
TX_4X4, TX_4X4, TX_4X4,
TX_8X8, TX_8X8, TX_8X8,
TX_16X16, TX_16X16, TX_16X16, TX_32X32
};
const BLOCK_SIZE_TYPE bsize_from_dim_lookup[5][5] = {
{BLOCK_SIZE_AB4X4, BLOCK_SIZE_SB4X8, BLOCK_SIZE_SB4X8,
BLOCK_SIZE_SB4X8, BLOCK_SIZE_SB4X8},
{BLOCK_SIZE_SB8X4, BLOCK_SIZE_SB8X8, BLOCK_SIZE_SB8X16,
BLOCK_SIZE_SB8X16, BLOCK_SIZE_SB8X16},
{BLOCK_SIZE_SB16X8, BLOCK_SIZE_SB16X8, BLOCK_SIZE_MB16X16,
BLOCK_SIZE_SB16X32, BLOCK_SIZE_SB16X32},
{BLOCK_SIZE_SB32X16, BLOCK_SIZE_SB32X16, BLOCK_SIZE_SB32X16,
BLOCK_SIZE_SB32X32, BLOCK_SIZE_SB32X64},
{BLOCK_SIZE_SB64X32, BLOCK_SIZE_SB64X32, BLOCK_SIZE_SB64X32,
BLOCK_SIZE_SB64X32, BLOCK_SIZE_SB64X64}
};
......@@ -17,5 +17,9 @@ extern const int b_width_log2_lookup[BLOCK_SIZE_TYPES];
extern const int b_height_log2_lookup[BLOCK_SIZE_TYPES];
extern const int mi_width_log2_lookup[BLOCK_SIZE_TYPES];
extern const int mi_height_log2_lookup[BLOCK_SIZE_TYPES];
extern const BLOCK_SIZE_TYPE subsize_lookup[PARTITION_TYPES][BLOCK_SIZE_TYPES];
extern const TX_SIZE max_txsize_lookup[BLOCK_SIZE_TYPES];
extern const TX_SIZE max_uv_txsize_lookup[BLOCK_SIZE_TYPES];
extern const BLOCK_SIZE_TYPE bsize_from_dim_lookup[5][5];
#endif // VP9_COMMON_VP9_COMMON_DATA_H
......@@ -22,20 +22,20 @@
#define MI_MASK (MI_BLOCK_SIZE - 1)
typedef enum BLOCK_SIZE_TYPE {
BLOCK_SIZE_AB4X4,
BLOCK_SIZE_SB4X8,
BLOCK_SIZE_SB8X4,
BLOCK_SIZE_SB8X8,
BLOCK_SIZE_SB8X16,
BLOCK_SIZE_SB16X8,
BLOCK_SIZE_MB16X16,
BLOCK_SIZE_SB16X32,
BLOCK_SIZE_SB32X16,
BLOCK_SIZE_SB32X32,
BLOCK_SIZE_SB32X64,
BLOCK_SIZE_SB64X32,
BLOCK_SIZE_SB64X64,
BLOCK_SIZE_TYPES
BLOCK_SIZE_AB4X4, BLOCK_4X4 = BLOCK_SIZE_AB4X4,
BLOCK_SIZE_SB4X8, BLOCK_4X8 = BLOCK_SIZE_SB4X8,
BLOCK_SIZE_SB8X4, BLOCK_8X4 = BLOCK_SIZE_SB8X4,
BLOCK_SIZE_SB8X8, BLOCK_8X8 = BLOCK_SIZE_SB8X8,
BLOCK_SIZE_SB8X16, BLOCK_8X16 = BLOCK_SIZE_SB8X16,
BLOCK_SIZE_SB16X8, BLOCK_16X8 = BLOCK_SIZE_SB16X8,
BLOCK_SIZE_MB16X16, BLOCK_16X16 = BLOCK_SIZE_MB16X16,
BLOCK_SIZE_SB16X32, BLOCK_16X32 = BLOCK_SIZE_SB16X32,
BLOCK_SIZE_SB32X16, BLOCK_32X16 = BLOCK_SIZE_SB32X16,
BLOCK_SIZE_SB32X32, BLOCK_32X32 = BLOCK_SIZE_SB32X32,
BLOCK_SIZE_SB32X64, BLOCK_32X64 = BLOCK_SIZE_SB32X64,
BLOCK_SIZE_SB64X32, BLOCK_64X32 = BLOCK_SIZE_SB64X32,
BLOCK_SIZE_SB64X64, BLOCK_64X64 = BLOCK_SIZE_SB64X64,
BLOCK_SIZE_TYPES, BLOCK_MAX_SB_SEGMENTS = BLOCK_SIZE_TYPES,
} BLOCK_SIZE_TYPE;
typedef enum PARTITION_TYPE {
......@@ -49,4 +49,19 @@ typedef enum PARTITION_TYPE {
#define PARTITION_PLOFFSET 4 // number of probability models per block size
#define NUM_PARTITION_CONTEXTS (4 * PARTITION_PLOFFSET)
typedef enum {
TX_4X4 = 0, // 4x4 dct transform
TX_8X8 = 1, // 8x8 dct transform
TX_16X16 = 2, // 16x16 dct transform
TX_32X32 = 3, // 32x32 dct transform
TX_SIZE_MAX_SB, // Number of transforms available to SBs
} TX_SIZE;
typedef enum {
DCT_DCT = 0, // DCT in both horizontal and vertical
ADST_DCT = 1, // ADST in vertical, DCT in horizontal
DCT_ADST = 2, // DCT in vertical, ADST in horizontal
ADST_ADST = 3 // ADST in both directions
} TX_TYPE;
#endif // VP9_COMMON_VP9_ENUMS_H_
......@@ -370,19 +370,6 @@ static void zz_motion_search(VP9_COMP *cpi, MACROBLOCK *x, YV12_BUFFER_CONFIG *r
}
}
static enum BlockSize get_bs(BLOCK_SIZE_TYPE b) {
switch (b) {
case BLOCK_SIZE_SB8X8:
return BLOCK_8X8;
case BLOCK_SIZE_SB16X8:
return BLOCK_16X8;
case BLOCK_SIZE_SB8X16:
return BLOCK_8X16;
default:
return BLOCK_16X16;
}
}
static void first_pass_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
int_mv *ref_mv, MV *best_mv,
YV12_BUFFER_CONFIG *recon_buffer,
......@@ -398,7 +385,7 @@ static void first_pass_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
int further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param;
int n;
vp9_variance_fn_ptr_t v_fn_ptr =
cpi->fn_ptr[get_bs(xd->mode_info_context->mbmi.sb_type)];
cpi->fn_ptr[xd->mode_info_context->mbmi.sb_type];
int new_mv_mode_penalty = 256;
int sr = 0;
......
......@@ -278,23 +278,6 @@ typedef struct {
int use_rd_breakout;
} SPEED_FEATURES;
enum BlockSize {
BLOCK_4X4,
BLOCK_4X8,
BLOCK_8X4,
BLOCK_8X8,
BLOCK_8X16,
BLOCK_16X8,
BLOCK_16X16,
BLOCK_32X32,
BLOCK_32X16,
BLOCK_16X32,
BLOCK_64X32,
BLOCK_32X64,
BLOCK_64X64,
BLOCK_MAX_SB_SEGMENTS,
};
typedef struct VP9_COMP {
DECLARE_ALIGNED(16, int16_t, y_quant[QINDEX_RANGE][8]);
......@@ -540,7 +523,7 @@ typedef struct VP9_COMP {
vp9_full_search_fn_t full_search_sad;
vp9_refining_search_fn_t refining_search_sad;
vp9_diamond_search_fn_t diamond_search_sad;
vp9_variance_fn_ptr_t fn_ptr[BLOCK_MAX_SB_SEGMENTS];
vp9_variance_fn_ptr_t fn_ptr[BLOCK_SIZE_TYPES];
uint64_t time_receive_data;
uint64_t time_compress_data;
uint64_t time_pick_lpf;
......
......@@ -289,77 +289,43 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi, int qindex) {
}
}
static enum BlockSize get_block_size(int bw, int bh) {
if (bw == 4 && bh == 4)
return BLOCK_4X4;
if (bw == 4 && bh == 8)
return BLOCK_4X8;
if (bw == 8 && bh == 4)
return BLOCK_8X4;
if (bw == 8 && bh == 8)
return BLOCK_8X8;
if (bw == 8 && bh == 16)
return BLOCK_8X16;
if (bw == 16 && bh == 8)
return BLOCK_16X8;
if (bw == 16 && bh == 16)
return BLOCK_16X16;
if (bw == 32 && bh == 32)
return BLOCK_32X32;
if (bw == 32 && bh == 16)
return BLOCK_32X16;
if (bw == 16 && bh == 32)
return BLOCK_16X32;
if (bw == 64 && bh == 32)
return BLOCK_64X32;
if (bw == 32 && bh == 64)
return BLOCK_32X64;
if (bw == 64 && bh == 64)
return BLOCK_64X64;
assert(0);
return -1;
static INLINE BLOCK_SIZE_TYPE get_block_size(int bwl, int bhl) {
return bsize_from_dim_lookup[bwl][bhl];
}
static enum BlockSize get_plane_block_size(BLOCK_SIZE_TYPE bsize,
struct macroblockd_plane *pd) {
return get_block_size(plane_block_width(bsize, pd),
plane_block_height(bsize, pd));
static BLOCK_SIZE_TYPE get_plane_block_size(BLOCK_SIZE_TYPE bsize,
struct macroblockd_plane *pd) {
return get_block_size(plane_block_width_log2by4(bsize, pd),
plane_block_height_log2by4(bsize, pd));
}
static double linear_interpolate(double x, int ntab, int inv_step,
const double *tab) {
static inline void linear_interpolate2(double x, int ntab, int inv_step,
const double *tab1, const double *tab2,
double *v1, double *v2) {
double y = x * inv_step;
int d = (int) y;
if (d >= ntab - 1) {
return tab[ntab - 1];
*v1 = tab1[ntab - 1];
*v2 = tab2[ntab - 1];
} else {
double a = y - d;
return tab[d] * (1 - a) + tab[d + 1] * a;
*v1 = tab1[d] * (1 - a) + tab1[d + 1] * a;
*v2 = tab2[d] * (1 - a) + tab2[d + 1] * a;
}
}
static double model_rate_norm(double x) {
static void model_rd_norm(double x, double *R, double *D) {
static const int inv_tab_step = 8;
static const int tab_size = 120;
// NOTE: The tables below must be of the same size
//
// Normalized rate
// This function models the rate for a Laplacian source
// This table models the rate for a Laplacian source
// source with given variance when quantized with a uniform quantizer
// with given stepsize. The closed form expression is:
// Rn(x) = H(sqrt(r)) + sqrt(r)*[1 + H(r)/(1 - r)],
// where r = exp(-sqrt(2) * x) and x = qpstep / sqrt(variance),
// and H(x) is the binary entropy function.
static const int inv_rate_tab_step = 8;
static const double rate_tab[] = {
64.00, 4.944, 3.949, 3.372, 2.966, 2.655, 2.403, 2.194,
2.014, 1.858, 1.720, 1.596, 1.485, 1.384, 1.291, 1.206,
......@@ -377,20 +343,13 @@ static double model_rate_norm(double x) {
0.002, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001,
0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.000,
};
const int rate_tab_num = sizeof(rate_tab)/sizeof(rate_tab[0]);
assert(x >= 0.0);
return linear_interpolate(x, rate_tab_num, inv_rate_tab_step, rate_tab);
}
static double model_dist_norm(double x) {
// Normalized distortion
// This function models the normalized distortion for a Laplacian source
// This table models the normalized distortion for a Laplacian source
// source with given variance when quantized with a uniform quantizer
// with given stepsize. The closed form expression is:
// Dn(x) = 1 - 1/sqrt(2) * x / sinh(x/sqrt(2))
// where x = qpstep / sqrt(variance)
// Note the actual distortion is Dn * variance.
static const int inv_dist_tab_step = 8;
static const double dist_tab[] = {
0.000, 0.001, 0.005, 0.012, 0.021, 0.032, 0.045, 0.061,
0.079, 0.098, 0.119, 0.142, 0.166, 0.190, 0.216, 0.242,
......@@ -408,9 +367,14 @@ static double model_dist_norm(double x) {
0.998, 0.998, 0.998, 0.999, 0.999, 0.999, 0.999, 0.999,
0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 1.000,
};
const int dist_tab_num = sizeof(dist_tab)/sizeof(dist_tab[0]);
/*
assert(sizeof(rate_tab) == tab_size * sizeof(rate_tab[0]);
assert(sizeof(dist_tab) == tab_size * sizeof(dist_tab[0]);
assert(sizeof(rate_tab) == sizeof(dist_tab));
*/
assert(x >= 0.0);
return linear_interpolate(x, dist_tab_num, inv_dist_tab_step, dist_tab);
linear_interpolate2(x, tab_size, inv_tab_step,
rate_tab, dist_tab, R, D);
}
static void model_rd_from_var_lapndz(int var, int n, int qstep,
......@@ -429,14 +393,9 @@ static void model_rd_from_var_lapndz(int var, int n, int qstep,
double D, R;
double s2 = (double) var / n;
double x = qstep / sqrt(s2);
D = model_dist_norm(x);
R = model_rate_norm(x);
if (R < 0) {
R = 0;
D = var;
}
*rate = (n * R * 256 + 0.5);
*dist = (n * D * s2 + 0.5);
model_rd_norm(x, &R, &D);
*rate = ((n << 8) * R + 0.5);
*dist = (var * D + 0.5);
}
vp9_clear_system_state();
}
......@@ -454,16 +413,17 @@ static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE_TYPE bsize,
struct macroblockd_plane *const pd = &xd->plane[i];
// TODO(dkovalev) the same code in get_plane_block_size
const int bw = plane_block_width(bsize, pd);
const int bh = plane_block_height(bsize, pd);
const enum BlockSize bs = get_block_size(bw, bh);
const int bwl = plane_block_width_log2by4(bsize, pd);
const int bhl = plane_block_height_log2by4(bsize, pd);
const BLOCK_SIZE_TYPE bs = get_block_size(bwl, bhl);
unsigned int sse;
int rate;
int64_t dist;
(void) cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride,
pd->dst.buf, pd->dst.stride, &sse);
// sse works better than var, since there is no dc prediction used
model_rd_from_var_lapndz(sse, bw * bh, pd->dequant[1] >> 3, &rate, &dist);
model_rd_from_var_lapndz(sse, 16 << (bwl + bhl),
pd->dequant[1] >> 3, &rate, &dist);
rate_sum += rate;
dist_sum += dist;
......@@ -483,16 +443,17 @@ static void model_rd_for_sb_y(VP9_COMP *cpi, BLOCK_SIZE_TYPE bsize,
struct macroblockd_plane *const pd = &xd->plane[0];
// TODO(dkovalev) the same code in get_plane_block_size
const int bw = plane_block_width(bsize, pd);
const int bh = plane_block_height(bsize, pd);
const enum BlockSize bs = get_block_size(bw, bh);
const int bwl = plane_block_width_log2by4(bsize, pd);
const int bhl = plane_block_height_log2by4(bsize, pd);
const BLOCK_SIZE_TYPE bs = get_block_size(bwl, bhl);
unsigned int sse;
int rate;
int64_t dist;
(void) cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride,
pd->dst.buf, pd->dst.stride, &sse);
// sse works better than var, since there is no dc prediction used
model_rd_from_var_lapndz(sse, bw * bh, pd->dequant[1] >> 3, &rate, &dist);
model_rd_from_var_lapndz(sse, 16 << (bwl + bhl),
pd->dequant[1] >> 3, &rate, &dist);
*out_rate_sum = rate;
*out_dist_sum = dist << 4;
......@@ -504,11 +465,13 @@ static void model_rd_for_sb_y_tx(VP9_COMP *cpi, BLOCK_SIZE_TYPE bsize,
int *out_rate_sum, int64_t *out_dist_sum,
int *out_skip) {
int t = 4, j, k;
enum BlockSize bs = BLOCK_4X4;
BLOCK_SIZE_TYPE bs = BLOCK_SIZE_AB4X4;
struct macroblock_plane *const p = &x->plane[0];
struct macroblockd_plane *const pd = &xd->plane[0];
const int bw = plane_block_width(bsize, pd);
const int bh = plane_block_height(bsize, pd);
const int bwl = plane_block_width_log2by4(bsize, pd);
const int bhl = plane_block_height_log2by4(bsize, pd);
const int bw = 4 << bwl;
const int bh = 4 << bhl;
int rate_sum = 0;
int64_t dist_sum = 0;
......@@ -527,7 +490,7 @@ static void model_rd_for_sb_y_tx(VP9_COMP *cpi, BLOCK_SIZE_TYPE bsize,
} else {
assert(0);
}
assert(bs <= get_block_size(bw, bh));
assert(bs <= get_block_size(bwl, bhl));
*out_skip = 1;
for (j = 0; j < bh; j+=t) {
for (k = 0; k < bw; k+=t) {
......@@ -772,10 +735,10 @@ static int rdcost_uv(VP9_COMMON *const cm, MACROBLOCK *x,
static int block_error_sby(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize,
int shift, int64_t *sse) {
struct macroblockd_plane *p = &x->e_mbd.plane[0];
const int bw = plane_block_width(bsize, p);
const int bh = plane_block_height(bsize, p);
const int bwl = plane_block_width_log2by4(bsize, p);
const int bhl = plane_block_height_log2by4(bsize, p);
int64_t e = vp9_block_error(x->plane[0].coeff, x->e_mbd.plane[0].dqcoeff,
bw * bh, sse) >> shift;
16 << (bwl + bhl), sse) >> shift;
*sse >>= shift;
return e;
}
......@@ -788,10 +751,10 @@ static int64_t block_error_sbuv(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize,
*sse = 0;
for (plane = 1; plane < MAX_MB_PLANE; plane++) {
struct macroblockd_plane *p = &x->e_mbd.plane[plane];
const int bw = plane_block_width(bsize, p);
const int bh = plane_block_height(bsize, p);
const int bwl = plane_block_width_log2by4(bsize, p);
const int bhl = plane_block_height_log2by4(bsize, p);
sum += vp9_block_error(x->plane[plane].coeff, x->e_mbd.plane[plane].dqcoeff,
bw * bh, &this_sse);
16 << (bwl + bhl), &this_sse);
*sse += this_sse;
}
*sse >>= shift;
......@@ -850,6 +813,7 @@ static void super_block_yrd_for_txfm(VP9_COMMON *const cm, MACROBLOCK *x,
static void choose_largest_txfm_size(VP9_COMP *cpi, MACROBLOCK *x,
int *rate, int64_t *distortion,
int *skip, int64_t *sse,
int64_t ref_best_rd,
BLOCK_SIZE_TYPE bs) {
const TX_SIZE max_txfm_size = TX_32X32
- (bs < BLOCK_SIZE_SB32X32) - (bs < BLOCK_SIZE_MB16X16);
......@@ -871,7 +835,7 @@ static void choose_largest_txfm_size(VP9_COMP *cpi, MACROBLOCK *x,
mbmi->txfm_size = TX_4X4;
}
super_block_yrd_for_txfm(cm, x, rate, distortion, skip,
&sse[mbmi->txfm_size], INT64_MAX, bs,
&sse[mbmi->txfm_size], ref_best_rd, bs,
mbmi->txfm_size);
cpi->txfm_stepdown_count[0]++;
}
......@@ -984,6 +948,7 @@ static void choose_txfm_size_from_modelrd(VP9_COMP *cpi, MACROBLOCK *x,
int (*r)[2], int *rate,
int64_t *d, int64_t *distortion,
int *s, int *skip, int64_t *sse,
int64_t ref_best_rd,
BLOCK_SIZE_TYPE bs,
int *model_used) {
const TX_SIZE max_txfm_size = TX_32X32
......@@ -1058,7 +1023,7 @@ static void choose_txfm_size_from_modelrd(VP9_COMP *cpi, MACROBLOCK *x,
// Actually encode using the chosen mode if a model was used, but do not
// update the r, d costs
super_block_yrd_for_txfm(cm, x, rate, distortion, skip,
&sse[mbmi->txfm_size], INT64_MAX,
&sse[mbmi->txfm_size], ref_best_rd,
bs, mbmi->txfm_si