Commit 54b2a596 authored by Ronald S. Bultje's avatar Ronald S. Bultje
Browse files

Implement SSE2 block_error.

Change vp9_block_error() to return a 64bit error variable, change all
callers to expect a 64bit return value (this will prevent overflows,
which we basically don't check for at all right now). Remove duplicate
block_error() function, which fixed that through truncation. Remove
old (incompatible) mmx/sse2 block_error SIMD versions and replace with
a new one that returns a 64bit value.

Encoding time of first 50 frames of bus @ 1500kbps goes from 3min29 to
3min23, i.e. a 3% overall speedup.

Change-Id: Ib71ac5508b5ee8a80f1753cd85d72df1629abe68
parent 7756e989
...@@ -529,9 +529,8 @@ prototype unsigned int vp9_get_mb_ss "const int16_t *" ...@@ -529,9 +529,8 @@ prototype unsigned int vp9_get_mb_ss "const int16_t *"
specialize vp9_get_mb_ss mmx sse2 specialize vp9_get_mb_ss mmx sse2
# ENCODEMB INVOKE # ENCODEMB INVOKE
prototype int vp9_block_error "int16_t *coeff, int16_t *dqcoeff, int block_size" prototype int64_t vp9_block_error "int16_t *coeff, int16_t *dqcoeff, intptr_t block_size"
specialize vp9_block_error mmx sse2 specialize vp9_block_error sse2
vp9_block_error_sse2=vp9_block_error_xmm
prototype void vp9_subtract_block "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride" prototype void vp9_subtract_block "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride"
specialize vp9_subtract_block sse2 specialize vp9_subtract_block sse2
......
...@@ -582,7 +582,7 @@ static void set_offsets(VP9_COMP *cpi, int mi_row, int mi_col, ...@@ -582,7 +582,7 @@ static void set_offsets(VP9_COMP *cpi, int mi_row, int mi_col,
} }
static void pick_sb_modes(VP9_COMP *cpi, int mi_row, int mi_col, static void pick_sb_modes(VP9_COMP *cpi, int mi_row, int mi_col,
TOKENEXTRA **tp, int *totalrate, int *totaldist, TOKENEXTRA **tp, int *totalrate, int64_t *totaldist,
BLOCK_SIZE_TYPE bsize, PICK_MODE_CONTEXT *ctx) { BLOCK_SIZE_TYPE bsize, PICK_MODE_CONTEXT *ctx) {
VP9_COMMON * const cm = &cpi->common; VP9_COMMON * const cm = &cpi->common;
MACROBLOCK * const x = &cpi->mb; MACROBLOCK * const x = &cpi->mb;
...@@ -1195,7 +1195,7 @@ static void choose_partitioning(VP9_COMP *cpi, MODE_INFO *m, int mi_row, ...@@ -1195,7 +1195,7 @@ static void choose_partitioning(VP9_COMP *cpi, MODE_INFO *m, int mi_row,
} }
static void rd_use_partition(VP9_COMP *cpi, MODE_INFO *m, TOKENEXTRA **tp, static void rd_use_partition(VP9_COMP *cpi, MODE_INFO *m, TOKENEXTRA **tp,
int mi_row, int mi_col, BLOCK_SIZE_TYPE bsize, int mi_row, int mi_col, BLOCK_SIZE_TYPE bsize,
int *rate, int *dist) { int *rate, int64_t *dist) {
VP9_COMMON * const cm = &cpi->common; VP9_COMMON * const cm = &cpi->common;
MACROBLOCK * const x = &cpi->mb; MACROBLOCK * const x = &cpi->mb;
MACROBLOCKD *xd = &cpi->mb.e_mbd; MACROBLOCKD *xd = &cpi->mb.e_mbd;
...@@ -1211,7 +1211,8 @@ static void rd_use_partition(VP9_COMP *cpi, MODE_INFO *m, TOKENEXTRA **tp, ...@@ -1211,7 +1211,8 @@ static void rd_use_partition(VP9_COMP *cpi, MODE_INFO *m, TOKENEXTRA **tp,
BLOCK_SIZE_TYPE subsize; BLOCK_SIZE_TYPE subsize;
ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE]; ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE];
PARTITION_CONTEXT sl[8], sa[8]; PARTITION_CONTEXT sl[8], sa[8];
int r = 0, d = 0; int r = 0;
int64_t d = 0;
if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
return; return;
...@@ -1252,7 +1253,8 @@ static void rd_use_partition(VP9_COMP *cpi, MODE_INFO *m, TOKENEXTRA **tp, ...@@ -1252,7 +1253,8 @@ static void rd_use_partition(VP9_COMP *cpi, MODE_INFO *m, TOKENEXTRA **tp,
pick_sb_modes(cpi, mi_row, mi_col, tp, &r, &d, subsize, pick_sb_modes(cpi, mi_row, mi_col, tp, &r, &d, subsize,
get_block_context(x, subsize)); get_block_context(x, subsize));
if (mi_row + (bh >> 1) <= cm->mi_rows) { if (mi_row + (bh >> 1) <= cm->mi_rows) {
int rt, dt; int rt;
int64_t dt;
update_state(cpi, get_block_context(x, subsize), subsize, 0); update_state(cpi, get_block_context(x, subsize), subsize, 0);
encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize); encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize);
*(get_sb_index(xd, subsize)) = 1; *(get_sb_index(xd, subsize)) = 1;
...@@ -1270,7 +1272,8 @@ static void rd_use_partition(VP9_COMP *cpi, MODE_INFO *m, TOKENEXTRA **tp, ...@@ -1270,7 +1272,8 @@ static void rd_use_partition(VP9_COMP *cpi, MODE_INFO *m, TOKENEXTRA **tp,
pick_sb_modes(cpi, mi_row, mi_col, tp, &r, &d, subsize, pick_sb_modes(cpi, mi_row, mi_col, tp, &r, &d, subsize,
get_block_context(x, subsize)); get_block_context(x, subsize));
if (mi_col + (bs >> 1) <= cm->mi_cols) { if (mi_col + (bs >> 1) <= cm->mi_cols) {
int rt, dt; int rt;
int64_t dt;
update_state(cpi, get_block_context(x, subsize), subsize, 0); update_state(cpi, get_block_context(x, subsize), subsize, 0);
encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize); encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize);
*(get_sb_index(xd, subsize)) = 1; *(get_sb_index(xd, subsize)) = 1;
...@@ -1289,7 +1292,8 @@ static void rd_use_partition(VP9_COMP *cpi, MODE_INFO *m, TOKENEXTRA **tp, ...@@ -1289,7 +1292,8 @@ static void rd_use_partition(VP9_COMP *cpi, MODE_INFO *m, TOKENEXTRA **tp,
int x_idx = (i & 1) * (bs >> 2); int x_idx = (i & 1) * (bs >> 2);
int y_idx = (i >> 1) * (bs >> 2); int y_idx = (i >> 1) * (bs >> 2);
int jj = i >> 1, ii = i & 0x01; int jj = i >> 1, ii = i & 0x01;
int rt, dt; int rt;
int64_t dt;
if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols)) if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols))
continue; continue;
...@@ -1323,7 +1327,7 @@ static void rd_use_partition(VP9_COMP *cpi, MODE_INFO *m, TOKENEXTRA **tp, ...@@ -1323,7 +1327,7 @@ static void rd_use_partition(VP9_COMP *cpi, MODE_INFO *m, TOKENEXTRA **tp,
// results, for encoding speed-up. // results, for encoding speed-up.
static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp, int mi_row, static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp, int mi_row,
int mi_col, BLOCK_SIZE_TYPE bsize, int *rate, int mi_col, BLOCK_SIZE_TYPE bsize, int *rate,
int *dist) { int64_t *dist) {
VP9_COMMON * const cm = &cpi->common; VP9_COMMON * const cm = &cpi->common;
MACROBLOCK * const x = &cpi->mb; MACROBLOCK * const x = &cpi->mb;
MACROBLOCKD * const xd = &x->e_mbd; MACROBLOCKD * const xd = &x->e_mbd;
...@@ -1334,7 +1338,8 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp, int mi_row, ...@@ -1334,7 +1338,8 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp, int mi_row,
TOKENEXTRA *tp_orig = *tp; TOKENEXTRA *tp_orig = *tp;
int i, pl; int i, pl;
BLOCK_SIZE_TYPE subsize; BLOCK_SIZE_TYPE subsize;
int srate = INT_MAX, sdist = INT_MAX; int srate = INT_MAX;
int64_t sdist = INT_MAX;
if (bsize < BLOCK_SIZE_SB8X8) if (bsize < BLOCK_SIZE_SB8X8)
if (xd->ab_index != 0) { if (xd->ab_index != 0) {
...@@ -1351,14 +1356,16 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp, int mi_row, ...@@ -1351,14 +1356,16 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp, int mi_row,
|| (cpi->sf.use_partitions_greater_than || (cpi->sf.use_partitions_greater_than
&& bsize > cpi->sf.greater_than_block_size)) { && bsize > cpi->sf.greater_than_block_size)) {
if (bsize >= BLOCK_SIZE_SB8X8) { if (bsize >= BLOCK_SIZE_SB8X8) {
int r4 = 0, d4 = 0; int r4 = 0;
int64_t d4 = 0;
subsize = get_subsize(bsize, PARTITION_SPLIT); subsize = get_subsize(bsize, PARTITION_SPLIT);
*(get_sb_partitioning(x, bsize)) = subsize; *(get_sb_partitioning(x, bsize)) = subsize;
for (i = 0; i < 4; ++i) { for (i = 0; i < 4; ++i) {
int x_idx = (i & 1) * (ms >> 1); int x_idx = (i & 1) * (ms >> 1);
int y_idx = (i >> 1) * (ms >> 1); int y_idx = (i >> 1) * (ms >> 1);
int r = 0, d = 0; int r = 0;
int64_t d = 0;
if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols)) if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols))
continue; continue;
...@@ -1386,8 +1393,8 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp, int mi_row, ...@@ -1386,8 +1393,8 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp, int mi_row,
&& bsize <= cpi->sf.less_than_block_size)) { && bsize <= cpi->sf.less_than_block_size)) {
// PARTITION_HORZ // PARTITION_HORZ
if (bsize >= BLOCK_SIZE_SB8X8 && mi_col + (ms >> 1) < cm->mi_cols) { if (bsize >= BLOCK_SIZE_SB8X8 && mi_col + (ms >> 1) < cm->mi_cols) {
int r2, d2; int r2, r = 0;
int r = 0, d = 0; int64_t d2, d = 0;
subsize = get_subsize(bsize, PARTITION_HORZ); subsize = get_subsize(bsize, PARTITION_HORZ);
*(get_sb_index(xd, subsize)) = 0; *(get_sb_index(xd, subsize)) = 0;
pick_sb_modes(cpi, mi_row, mi_col, tp, &r2, &d2, subsize, pick_sb_modes(cpi, mi_row, mi_col, tp, &r2, &d2, subsize,
...@@ -1418,13 +1425,15 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp, int mi_row, ...@@ -1418,13 +1425,15 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp, int mi_row,
// PARTITION_VERT // PARTITION_VERT
if (bsize >= BLOCK_SIZE_SB8X8 && mi_row + (ms >> 1) < cm->mi_rows) { if (bsize >= BLOCK_SIZE_SB8X8 && mi_row + (ms >> 1) < cm->mi_rows) {
int r2, d2; int r2;
int64_t d2;
subsize = get_subsize(bsize, PARTITION_VERT); subsize = get_subsize(bsize, PARTITION_VERT);
*(get_sb_index(xd, subsize)) = 0; *(get_sb_index(xd, subsize)) = 0;
pick_sb_modes(cpi, mi_row, mi_col, tp, &r2, &d2, subsize, pick_sb_modes(cpi, mi_row, mi_col, tp, &r2, &d2, subsize,
get_block_context(x, subsize)); get_block_context(x, subsize));
if (mi_col + (ms >> 1) < cm->mi_cols) { if (mi_col + (ms >> 1) < cm->mi_cols) {
int r = 0, d = 0; int r = 0;
int64_t d = 0;
update_state(cpi, get_block_context(x, subsize), subsize, 0); update_state(cpi, get_block_context(x, subsize), subsize, 0);
encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize); encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize);
...@@ -1450,7 +1459,8 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp, int mi_row, ...@@ -1450,7 +1459,8 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp, int mi_row,
// PARTITION_NONE // PARTITION_NONE
if ((mi_row + (ms >> 1) < cm->mi_rows) && if ((mi_row + (ms >> 1) < cm->mi_rows) &&
(mi_col + (ms >> 1) < cm->mi_cols)) { (mi_col + (ms >> 1) < cm->mi_cols)) {
int r, d; int r;
int64_t d;
pick_sb_modes(cpi, mi_row, mi_col, tp, &r, &d, bsize, pick_sb_modes(cpi, mi_row, mi_col, tp, &r, &d, bsize,
get_block_context(x, bsize)); get_block_context(x, bsize));
if (bsize >= BLOCK_SIZE_SB8X8) { if (bsize >= BLOCK_SIZE_SB8X8) {
...@@ -1497,7 +1507,8 @@ static void encode_sb_row(VP9_COMP *cpi, int mi_row, TOKENEXTRA **tp, ...@@ -1497,7 +1507,8 @@ static void encode_sb_row(VP9_COMP *cpi, int mi_row, TOKENEXTRA **tp,
// Code each SB in the row // Code each SB in the row
for (mi_col = cm->cur_tile_mi_col_start; mi_col < cm->cur_tile_mi_col_end; for (mi_col = cm->cur_tile_mi_col_start; mi_col < cm->cur_tile_mi_col_end;
mi_col += 64 / MI_SIZE) { mi_col += 64 / MI_SIZE) {
int dummy_rate, dummy_dist; int dummy_rate;
int64_t dummy_dist;
if (cpi->sf.partition_by_variance || cpi->sf.use_lastframe_partitioning || if (cpi->sf.partition_by_variance || cpi->sf.use_lastframe_partitioning ||
cpi->sf.use_one_partition_size_always ) { cpi->sf.use_one_partition_size_always ) {
const int idx_str = cm->mode_info_stride * mi_row + mi_col; const int idx_str = cm->mode_info_stride * mi_row + mi_col;
......
...@@ -274,12 +274,14 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi, int qindex) { ...@@ -274,12 +274,14 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi, int qindex) {
} }
} }
int vp9_block_error_c(int16_t *coeff, int16_t *dqcoeff, int block_size) { int64_t vp9_block_error_c(int16_t *coeff, int16_t *dqcoeff,
int i, error = 0; intptr_t block_size) {
int i;
int64_t error = 0;
for (i = 0; i < block_size; i++) { for (i = 0; i < block_size; i++) {
int this_diff = coeff[i] - dqcoeff[i]; int this_diff = coeff[i] - dqcoeff[i];
error += this_diff * this_diff; error += (unsigned)this_diff * this_diff;
} }
return error; return error;
...@@ -417,7 +419,7 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb, ...@@ -417,7 +419,7 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb,
static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x, static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
int (*r)[2], int *rate, int (*r)[2], int *rate,
int *d, int *distortion, int64_t *d, int64_t *distortion,
int *s, int *skip, int *s, int *skip,
int64_t txfm_cache[NB_TXFM_MODES], int64_t txfm_cache[NB_TXFM_MODES],
TX_SIZE max_txfm_size) { TX_SIZE max_txfm_size) {
...@@ -496,27 +498,15 @@ static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x, ...@@ -496,27 +498,15 @@ static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
rd[TX_4X4][1] : rd[TX_8X8][1]; rd[TX_4X4][1] : rd[TX_8X8][1];
} }
static int block_error(int16_t *coeff, int16_t *dqcoeff, static int64_t block_error_sby(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize,
int block_size, int shift) { int shift) {
int i;
int64_t error = 0;
for (i = 0; i < block_size; i++) {
int this_diff = coeff[i] - dqcoeff[i];
error += (unsigned)this_diff * this_diff;
}
error >>= shift;
return error > INT_MAX ? INT_MAX : (int)error;
}
static int block_error_sby(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize, int shift) {
const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize); const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize);
return block_error(x->plane[0].coeff, x->e_mbd.plane[0].dqcoeff, return vp9_block_error(x->plane[0].coeff, x->e_mbd.plane[0].dqcoeff,
16 << (bwl + bhl), shift); 16 << (bwl + bhl)) >> shift;
} }
static int block_error_sbuv(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize, int shift) { static int64_t block_error_sbuv(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize,
int shift) {
const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize); const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize);
int64_t sum = 0; int64_t sum = 0;
int plane; int plane;
...@@ -524,11 +514,10 @@ static int block_error_sbuv(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize, int shift) { ...@@ -524,11 +514,10 @@ static int block_error_sbuv(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize, int shift) {
for (plane = 1; plane < MAX_MB_PLANE; plane++) { for (plane = 1; plane < MAX_MB_PLANE; plane++) {
const int subsampling = x->e_mbd.plane[plane].subsampling_x + const int subsampling = x->e_mbd.plane[plane].subsampling_x +
x->e_mbd.plane[plane].subsampling_y; x->e_mbd.plane[plane].subsampling_y;
sum += block_error(x->plane[plane].coeff, x->e_mbd.plane[plane].dqcoeff, sum += vp9_block_error(x->plane[plane].coeff, x->e_mbd.plane[plane].dqcoeff,
16 << (bwl + bhl - subsampling), 0); 16 << (bwl + bhl - subsampling));
} }
sum >>= shift; return sum >> shift;
return sum > INT_MAX ? INT_MAX : (int)sum;
} }
struct rdcost_block_args { struct rdcost_block_args {
...@@ -586,7 +575,8 @@ static int rdcost_uv(VP9_COMMON *const cm, MACROBLOCK *x, ...@@ -586,7 +575,8 @@ static int rdcost_uv(VP9_COMMON *const cm, MACROBLOCK *x,
} }
static void super_block_yrd_for_txfm(VP9_COMMON *const cm, MACROBLOCK *x, static void super_block_yrd_for_txfm(VP9_COMMON *const cm, MACROBLOCK *x,
int *rate, int *distortion, int *skippable, int *rate, int64_t *distortion,
int *skippable,
BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) { BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) {
MACROBLOCKD *const xd = &x->e_mbd; MACROBLOCKD *const xd = &x->e_mbd;
xd->mode_info_context->mbmi.txfm_size = tx_size; xd->mode_info_context->mbmi.txfm_size = tx_size;
...@@ -602,11 +592,12 @@ static void super_block_yrd_for_txfm(VP9_COMMON *const cm, MACROBLOCK *x, ...@@ -602,11 +592,12 @@ static void super_block_yrd_for_txfm(VP9_COMMON *const cm, MACROBLOCK *x,
} }
static void super_block_yrd(VP9_COMP *cpi, static void super_block_yrd(VP9_COMP *cpi,
MACROBLOCK *x, int *rate, int *distortion, MACROBLOCK *x, int *rate, int64_t *distortion,
int *skip, BLOCK_SIZE_TYPE bs, int *skip, BLOCK_SIZE_TYPE bs,
int64_t txfm_cache[NB_TXFM_MODES]) { int64_t txfm_cache[NB_TXFM_MODES]) {
VP9_COMMON *const cm = &cpi->common; VP9_COMMON *const cm = &cpi->common;
int r[TX_SIZE_MAX_SB][2], d[TX_SIZE_MAX_SB], s[TX_SIZE_MAX_SB]; int r[TX_SIZE_MAX_SB][2], s[TX_SIZE_MAX_SB];
int64_t d[TX_SIZE_MAX_SB];
MACROBLOCKD *xd = &x->e_mbd; MACROBLOCKD *xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi; MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
...@@ -651,13 +642,13 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib, ...@@ -651,13 +642,13 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
int *bmode_costs, int *bmode_costs,
ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
int *bestrate, int *bestratey, int *bestrate, int *bestratey,
int *bestdistortion, int64_t *bestdistortion,
BLOCK_SIZE_TYPE bsize) { BLOCK_SIZE_TYPE bsize) {
MB_PREDICTION_MODE mode; MB_PREDICTION_MODE mode;
MACROBLOCKD *xd = &x->e_mbd; MACROBLOCKD *xd = &x->e_mbd;
int64_t best_rd = INT64_MAX; int64_t best_rd = INT64_MAX;
int rate = 0; int rate = 0;
int distortion; int64_t distortion;
VP9_COMMON *const cm = &cpi->common; VP9_COMMON *const cm = &cpi->common;
const int src_stride = x->plane[0].src.stride; const int src_stride = x->plane[0].src.stride;
uint8_t *src, *dst; uint8_t *src, *dst;
...@@ -777,7 +768,7 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib, ...@@ -777,7 +768,7 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
static int64_t rd_pick_intra4x4mby_modes(VP9_COMP *cpi, MACROBLOCK *mb, static int64_t rd_pick_intra4x4mby_modes(VP9_COMP *cpi, MACROBLOCK *mb,
int *Rate, int *rate_y, int *Rate, int *rate_y,
int *Distortion, int64_t best_rd) { int64_t *Distortion, int64_t best_rd) {
int i, j; int i, j;
MACROBLOCKD *const xd = &mb->e_mbd; MACROBLOCKD *const xd = &mb->e_mbd;
BLOCK_SIZE_TYPE bsize = xd->mode_info_context->mbmi.sb_type; BLOCK_SIZE_TYPE bsize = xd->mode_info_context->mbmi.sb_type;
...@@ -785,7 +776,7 @@ static int64_t rd_pick_intra4x4mby_modes(VP9_COMP *cpi, MACROBLOCK *mb, ...@@ -785,7 +776,7 @@ static int64_t rd_pick_intra4x4mby_modes(VP9_COMP *cpi, MACROBLOCK *mb,
int bh = 1 << b_height_log2(bsize); int bh = 1 << b_height_log2(bsize);
int idx, idy; int idx, idy;
int cost = 0; int cost = 0;
int distortion = 0; int64_t distortion = 0;
int tot_rate_y = 0; int tot_rate_y = 0;
int64_t total_rd = 0; int64_t total_rd = 0;
ENTROPY_CONTEXT t_above[4], t_left[4]; ENTROPY_CONTEXT t_above[4], t_left[4];
...@@ -802,7 +793,7 @@ static int64_t rd_pick_intra4x4mby_modes(VP9_COMP *cpi, MACROBLOCK *mb, ...@@ -802,7 +793,7 @@ static int64_t rd_pick_intra4x4mby_modes(VP9_COMP *cpi, MACROBLOCK *mb,
const int mis = xd->mode_info_stride; const int mis = xd->mode_info_stride;
MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(best_mode); MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(best_mode);
int UNINITIALIZED_IS_SAFE(r), UNINITIALIZED_IS_SAFE(ry); int UNINITIALIZED_IS_SAFE(r), UNINITIALIZED_IS_SAFE(ry);
int UNINITIALIZED_IS_SAFE(d); int64_t UNINITIALIZED_IS_SAFE(d);
i = idy * 2 + idx; i = idy * 2 + idx;
if (xd->frame_type == KEY_FRAME) { if (xd->frame_type == KEY_FRAME) {
...@@ -844,14 +835,14 @@ static int64_t rd_pick_intra4x4mby_modes(VP9_COMP *cpi, MACROBLOCK *mb, ...@@ -844,14 +835,14 @@ static int64_t rd_pick_intra4x4mby_modes(VP9_COMP *cpi, MACROBLOCK *mb,
static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x, static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x,
int *rate, int *rate_tokenonly, int *rate, int *rate_tokenonly,
int *distortion, int *skippable, int64_t *distortion, int *skippable,
BLOCK_SIZE_TYPE bsize, BLOCK_SIZE_TYPE bsize,
int64_t txfm_cache[NB_TXFM_MODES]) { int64_t txfm_cache[NB_TXFM_MODES]) {
MB_PREDICTION_MODE mode; MB_PREDICTION_MODE mode;
MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected); MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected);
MACROBLOCKD *const xd = &x->e_mbd; MACROBLOCKD *const xd = &x->e_mbd;
int this_rate, this_rate_tokenonly; int this_rate, this_rate_tokenonly, s;
int this_distortion, s; int64_t this_distortion;
int64_t best_rd = INT64_MAX, this_rd; int64_t best_rd = INT64_MAX, this_rd;
TX_SIZE UNINITIALIZED_IS_SAFE(best_tx); TX_SIZE UNINITIALIZED_IS_SAFE(best_tx);
int i; int i;
...@@ -912,7 +903,7 @@ static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x, ...@@ -912,7 +903,7 @@ static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x,
} }
static void super_block_uvrd_for_txfm(VP9_COMMON *const cm, MACROBLOCK *x, static void super_block_uvrd_for_txfm(VP9_COMMON *const cm, MACROBLOCK *x,
int *rate, int *distortion, int *rate, int64_t *distortion,
int *skippable, BLOCK_SIZE_TYPE bsize, int *skippable, BLOCK_SIZE_TYPE bsize,
TX_SIZE uv_tx_size) { TX_SIZE uv_tx_size) {
MACROBLOCKD *const xd = &x->e_mbd; MACROBLOCKD *const xd = &x->e_mbd;
...@@ -927,7 +918,7 @@ static void super_block_uvrd_for_txfm(VP9_COMMON *const cm, MACROBLOCK *x, ...@@ -927,7 +918,7 @@ static void super_block_uvrd_for_txfm(VP9_COMMON *const cm, MACROBLOCK *x,
} }
static void super_block_uvrd(VP9_COMMON *const cm, MACROBLOCK *x, static void super_block_uvrd(VP9_COMMON *const cm, MACROBLOCK *x,
int *rate, int *distortion, int *skippable, int *rate, int64_t *distortion, int *skippable,
BLOCK_SIZE_TYPE bsize) { BLOCK_SIZE_TYPE bsize) {
MACROBLOCKD *const xd = &x->e_mbd; MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi; MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
...@@ -952,13 +943,13 @@ static void super_block_uvrd(VP9_COMMON *const cm, MACROBLOCK *x, ...@@ -952,13 +943,13 @@ static void super_block_uvrd(VP9_COMMON *const cm, MACROBLOCK *x,
static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x, static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x,
int *rate, int *rate_tokenonly, int *rate, int *rate_tokenonly,
int *distortion, int *skippable, int64_t *distortion, int *skippable,
BLOCK_SIZE_TYPE bsize) { BLOCK_SIZE_TYPE bsize) {
MB_PREDICTION_MODE mode; MB_PREDICTION_MODE mode;
MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected); MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected);
int64_t best_rd = INT64_MAX, this_rd; int64_t best_rd = INT64_MAX, this_rd;
int this_rate_tokenonly, this_rate; int this_rate_tokenonly, this_rate, s;
int this_distortion, s; int64_t this_distortion;
for (mode = DC_PRED; mode <= TM_PRED; mode++) { for (mode = DC_PRED; mode <= TM_PRED; mode++) {
x->e_mbd.mode_info_context->mbmi.uv_mode = mode; x->e_mbd.mode_info_context->mbmi.uv_mode = mode;
...@@ -1101,7 +1092,7 @@ static int64_t encode_inter_mb_segment(VP9_COMMON *const cm, ...@@ -1101,7 +1092,7 @@ static int64_t encode_inter_mb_segment(VP9_COMMON *const cm,
MACROBLOCK *x, MACROBLOCK *x,
int i, int i,
int *labelyrate, int *labelyrate,
int *distortion, int64_t *distortion,
ENTROPY_CONTEXT *ta, ENTROPY_CONTEXT *ta,
ENTROPY_CONTEXT *tl) { ENTROPY_CONTEXT *tl) {
int k; int k;
...@@ -1126,7 +1117,7 @@ static int64_t encode_inter_mb_segment(VP9_COMMON *const cm, ...@@ -1126,7 +1117,7 @@ static int64_t encode_inter_mb_segment(VP9_COMMON *const cm,
raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i, raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i,
xd->plane[0].dst.buf, xd->plane[0].dst.buf,
xd->plane[0].dst.stride); xd->plane[0].dst.stride);
int thisdistortion = 0; int64_t thisdistortion = 0;
int thisrate = 0; int thisrate = 0;
*labelyrate = 0; *labelyrate = 0;
...@@ -1189,7 +1180,7 @@ typedef struct { ...@@ -1189,7 +1180,7 @@ typedef struct {
int64_t segment_rd; int64_t segment_rd;
int r; int r;
int d; int64_t d;
int segment_yrate; int segment_yrate;
MB_PREDICTION_MODE modes[4]; MB_PREDICTION_MODE modes[4];
int_mv mvs[4], second_mvs[4]; int_mv mvs[4], second_mvs[4];
...@@ -1281,21 +1272,18 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, ...@@ -1281,21 +1272,18 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
BEST_SEG_INFO *bsi, BEST_SEG_INFO *bsi,
int_mv seg_mvs[4][MAX_REF_FRAMES], int_mv seg_mvs[4][MAX_REF_FRAMES],
int mi_row, int mi_col) { int mi_row, int mi_col) {
int i, j; int i, j, br = 0, rate = 0, sbr = 0, idx, idy;
int br = 0, bd = 0; int64_t bd = 0, sbd = 0;
MB_PREDICTION_MODE this_mode; MB_PREDICTION_MODE this_mode;
MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi; MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi;
const int label_count = 4; const int label_count = 4;
int64_t this_segment_rd = 0, other_segment_rd; int64_t this_segment_rd = 0, other_segment_rd;
int label_mv_thresh; int label_mv_thresh;
int rate = 0;
int sbr = 0, sbd = 0;
int segmentyrate = 0; int segmentyrate = 0;
int best_eobs[4] = { 0 }; int best_eobs[4] = { 0 };
BLOCK_SIZE_TYPE bsize = mbmi->sb_type; BLOCK_SIZE_TYPE bsize = mbmi->sb_type;
int bwl = b_width_log2(bsize), bw = 1 << bwl; int bwl = b_width_log2(bsize), bw = 1 << bwl;
int bhl = b_height_log2(bsize), bh = 1 << bhl; int bhl = b_height_log2(bsize), bh = 1 << bhl;
int idx, idy;
vp9_variance_fn_ptr_t *v_fn_ptr; vp9_variance_fn_ptr_t *v_fn_ptr;
ENTROPY_CONTEXT t_above[4], t_left[4]; ENTROPY_CONTEXT t_above[4], t_left[4];
ENTROPY_CONTEXT t_above_b[4], t_left_b[4]; ENTROPY_CONTEXT t_above_b[4], t_left_b[4];
...@@ -1340,7 +1328,7 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, ...@@ -1340,7 +1328,7 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
// search for the best motion vector on this segment // search for the best motion vector on this segment
for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) { for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) {
int64_t this_rd; int64_t this_rd;
int distortion; int64_t distortion;
int labelyrate;