Commit 72033fcf authored by Jingning Han's avatar Jingning Han
Browse files

Make memory alloc in pick_mode_context bsize aware

This commit makes the buffer allocation of zcoeff_blk array in
pick_mode_context block size aware. It calculates the number of
4x4 blocks in the partition and assigns the memory space accordingly.
This process (and the uninitialization) is done once for each encoding
pass. It allows memory copy of smaller buffer when possible.

For football at 600kbps, the runtimes improve by about 1%:
speed 1, 45961ms -> 45472ms
speed 2, 23863ms -> 23598ms

Change-Id: Id2ca24906fa89f46fa5fe742ec4b8efc2a61f877
parent ea77b034
...@@ -26,7 +26,8 @@ typedef struct { ...@@ -26,7 +26,8 @@ typedef struct {
// Structure to hold snapshot of coding context during the mode picking process // Structure to hold snapshot of coding context during the mode picking process
typedef struct { typedef struct {
MODE_INFO mic; MODE_INFO mic;
uint8_t zcoeff_blk[256]; uint8_t *zcoeff_blk;
int num_4x4_blk;
int skip; int skip;
int_mv best_ref_mv; int_mv best_ref_mv;
int_mv second_best_ref_mv; int_mv second_best_ref_mv;
...@@ -177,6 +178,45 @@ struct macroblock { ...@@ -177,6 +178,45 @@ struct macroblock {
int y_blocks); int y_blocks);
}; };
// TODO(jingning): the variables used here are little complicated. need further
// refactoring on organizing the temporary buffers, when recursive
// partition down to 4x4 block size is enabled.
static PICK_MODE_CONTEXT *get_block_context(MACROBLOCK *x, BLOCK_SIZE bsize) {
MACROBLOCKD *const xd = &x->e_mbd;
switch (bsize) {
case BLOCK_64X64:
return &x->sb64_context;
case BLOCK_64X32:
return &x->sb64x32_context[xd->sb_index];
case BLOCK_32X64:
return &x->sb32x64_context[xd->sb_index];
case BLOCK_32X32:
return &x->sb32_context[xd->sb_index];
case BLOCK_32X16:
return &x->sb32x16_context[xd->sb_index][xd->mb_index];
case BLOCK_16X32:
return &x->sb16x32_context[xd->sb_index][xd->mb_index];
case BLOCK_16X16:
return &x->mb_context[xd->sb_index][xd->mb_index];
case BLOCK_16X8:
return &x->sb16x8_context[xd->sb_index][xd->mb_index][xd->b_index];
case BLOCK_8X16:
return &x->sb8x16_context[xd->sb_index][xd->mb_index][xd->b_index];
case BLOCK_8X8:
return &x->sb8x8_context[xd->sb_index][xd->mb_index][xd->b_index];
case BLOCK_8X4:
return &x->sb8x4_context[xd->sb_index][xd->mb_index][xd->b_index];
case BLOCK_4X8:
return &x->sb4x8_context[xd->sb_index][xd->mb_index][xd->b_index];
case BLOCK_4X4:
return &x->ab4x4_context[xd->sb_index][xd->mb_index][xd->b_index];
default:
assert(0);
return NULL;
}
}
struct rdcost_block_args { struct rdcost_block_args {
MACROBLOCK *x; MACROBLOCK *x;
ENTROPY_CONTEXT t_above[16]; ENTROPY_CONTEXT t_above[16];
......
...@@ -419,7 +419,7 @@ static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx, ...@@ -419,7 +419,7 @@ static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
x->skip = ctx->skip; x->skip = ctx->skip;
vpx_memcpy(x->zcoeff_blk[mbmi->tx_size], ctx->zcoeff_blk, vpx_memcpy(x->zcoeff_blk[mbmi->tx_size], ctx->zcoeff_blk,
sizeof(ctx->zcoeff_blk)); sizeof(uint8_t) * ctx->num_4x4_blk);
if (!output_enabled) if (!output_enabled)
return; return;
...@@ -699,45 +699,6 @@ static void update_stats(VP9_COMP *cpi) { ...@@ -699,45 +699,6 @@ static void update_stats(VP9_COMP *cpi) {
} }
} }
// TODO(jingning): the variables used here are little complicated. need further
// refactoring on organizing the temporary buffers, when recursive
// partition down to 4x4 block size is enabled.
static PICK_MODE_CONTEXT *get_block_context(MACROBLOCK *x, BLOCK_SIZE bsize) {
MACROBLOCKD *const xd = &x->e_mbd;
switch (bsize) {
case BLOCK_64X64:
return &x->sb64_context;
case BLOCK_64X32:
return &x->sb64x32_context[xd->sb_index];
case BLOCK_32X64:
return &x->sb32x64_context[xd->sb_index];
case BLOCK_32X32:
return &x->sb32_context[xd->sb_index];
case BLOCK_32X16:
return &x->sb32x16_context[xd->sb_index][xd->mb_index];
case BLOCK_16X32:
return &x->sb16x32_context[xd->sb_index][xd->mb_index];
case BLOCK_16X16:
return &x->mb_context[xd->sb_index][xd->mb_index];
case BLOCK_16X8:
return &x->sb16x8_context[xd->sb_index][xd->mb_index][xd->b_index];
case BLOCK_8X16:
return &x->sb8x16_context[xd->sb_index][xd->mb_index][xd->b_index];
case BLOCK_8X8:
return &x->sb8x8_context[xd->sb_index][xd->mb_index][xd->b_index];
case BLOCK_8X4:
return &x->sb8x4_context[xd->sb_index][xd->mb_index][xd->b_index];
case BLOCK_4X8:
return &x->sb4x8_context[xd->sb_index][xd->mb_index][xd->b_index];
case BLOCK_4X4:
return &x->ab4x4_context[xd->sb_index][xd->mb_index][xd->b_index];
default:
assert(0);
return NULL;
}
}
static BLOCK_SIZE *get_sb_partitioning(MACROBLOCK *x, BLOCK_SIZE bsize) { static BLOCK_SIZE *get_sb_partitioning(MACROBLOCK *x, BLOCK_SIZE bsize) {
MACROBLOCKD *const xd = &x->e_mbd; MACROBLOCKD *const xd = &x->e_mbd;
switch (bsize) { switch (bsize) {
......
...@@ -1414,6 +1414,94 @@ static void cal_nmvsadcosts_hp(int *mvsadcost[2]) { ...@@ -1414,6 +1414,94 @@ static void cal_nmvsadcosts_hp(int *mvsadcost[2]) {
} while (++i <= MV_MAX); } while (++i <= MV_MAX);
} }
static void init_pick_mode_context(VP9_COMP *cpi) {
int i;
MACROBLOCK *x = &cpi->mb;
MACROBLOCKD *xd = &x->e_mbd;
VP9_COMMON *cm = &cpi->common;
for (i = 0; i < BLOCK_SIZES; ++i) {
const int num_4x4_w = num_4x4_blocks_wide_lookup[i];
const int num_4x4_h = num_4x4_blocks_high_lookup[i];
const int num_4x4_blk = MAX(4, num_4x4_w * num_4x4_h);
if (i < BLOCK_16X16) {
for (xd->sb_index = 0; xd->sb_index < 4; ++xd->sb_index) {
for (xd->mb_index = 0; xd->mb_index < 4; ++xd->mb_index) {
for (xd->b_index = 0; xd->b_index < 16 / num_4x4_blk; ++xd->b_index) {
PICK_MODE_CONTEXT *ctx = get_block_context(x, i);
ctx->num_4x4_blk = num_4x4_blk;
CHECK_MEM_ERROR(cm, ctx->zcoeff_blk,
vpx_calloc(num_4x4_blk, sizeof(uint8_t)));
}
}
}
} else if (i < BLOCK_32X32) {
for (xd->sb_index = 0; xd->sb_index < 4; ++xd->sb_index) {
for (xd->mb_index = 0; xd->mb_index < 64 / num_4x4_blk;
++xd->mb_index) {
PICK_MODE_CONTEXT *ctx = get_block_context(x, i);
ctx->num_4x4_blk = num_4x4_blk;
CHECK_MEM_ERROR(cm, ctx->zcoeff_blk,
vpx_calloc(num_4x4_blk, sizeof(uint8_t)));
}
}
} else if (i < BLOCK_64X64) {
for (xd->sb_index = 0; xd->sb_index < 256 / num_4x4_blk; ++xd->sb_index) {
PICK_MODE_CONTEXT *ctx = get_block_context(x, i);
ctx->num_4x4_blk = num_4x4_blk;
CHECK_MEM_ERROR(cm, ctx->zcoeff_blk,
vpx_calloc(num_4x4_blk, sizeof(uint8_t)));
}
} else {
PICK_MODE_CONTEXT *ctx = get_block_context(x, i);
ctx->num_4x4_blk = num_4x4_blk;
CHECK_MEM_ERROR(cm, ctx->zcoeff_blk,
vpx_calloc(num_4x4_blk, sizeof(uint8_t)));
}
}
}
static void free_pick_mode_context(MACROBLOCK *x) {
int i;
MACROBLOCKD *xd = &x->e_mbd;
for (i = 0; i < BLOCK_SIZES; ++i) {
const int num_4x4_w = num_4x4_blocks_wide_lookup[i];
const int num_4x4_h = num_4x4_blocks_high_lookup[i];
const int num_4x4_blk = MAX(4, num_4x4_w * num_4x4_h);
if (i < BLOCK_16X16) {
for (xd->sb_index = 0; xd->sb_index < 4; ++xd->sb_index) {
for (xd->mb_index = 0; xd->mb_index < 4; ++xd->mb_index) {
for (xd->b_index = 0; xd->b_index < 16 / num_4x4_blk; ++xd->b_index) {
PICK_MODE_CONTEXT *ctx = get_block_context(x, i);
vpx_free(ctx->zcoeff_blk);
ctx->zcoeff_blk = 0;
}
}
}
} else if (i < BLOCK_32X32) {
for (xd->sb_index = 0; xd->sb_index < 4; ++xd->sb_index) {
for (xd->mb_index = 0; xd->mb_index < 64 / num_4x4_blk;
++xd->mb_index) {
PICK_MODE_CONTEXT *ctx = get_block_context(x, i);
vpx_free(ctx->zcoeff_blk);
ctx->zcoeff_blk = 0;
}
}
} else if (i < BLOCK_64X64) {
for (xd->sb_index = 0; xd->sb_index < 256 / num_4x4_blk; ++xd->sb_index) {
PICK_MODE_CONTEXT *ctx = get_block_context(x, i);
vpx_free(ctx->zcoeff_blk);
ctx->zcoeff_blk = 0;
}
} else {
PICK_MODE_CONTEXT *ctx = get_block_context(x, i);
vpx_free(ctx->zcoeff_blk);
ctx->zcoeff_blk = 0;
}
}
}
VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) { VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) {
int i, j; int i, j;
volatile union { volatile union {
...@@ -1450,6 +1538,8 @@ VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) { ...@@ -1450,6 +1538,8 @@ VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) {
init_config((VP9_PTR)cpi, oxcf); init_config((VP9_PTR)cpi, oxcf);
init_pick_mode_context(cpi);
cm->current_video_frame = 0; cm->current_video_frame = 0;
cpi->kf_overspend_bits = 0; cpi->kf_overspend_bits = 0;
cpi->kf_bitrate_adjustment = 0; cpi->kf_bitrate_adjustment = 0;
...@@ -1913,6 +2003,7 @@ void vp9_remove_compressor(VP9_PTR *ptr) { ...@@ -1913,6 +2003,7 @@ void vp9_remove_compressor(VP9_PTR *ptr) {
#endif #endif
} }
free_pick_mode_context(&cpi->mb);
dealloc_compressor_data(cpi); dealloc_compressor_data(cpi);
vpx_free(cpi->mb.ss); vpx_free(cpi->mb.ss);
vpx_free(cpi->tok); vpx_free(cpi->tok);
......
...@@ -3587,7 +3587,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, ...@@ -3587,7 +3587,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
best_mbmode = *mbmi; best_mbmode = *mbmi;
best_skip2 = this_skip2; best_skip2 = this_skip2;
vpx_memcpy(ctx->zcoeff_blk, x->zcoeff_blk[mbmi->tx_size], vpx_memcpy(ctx->zcoeff_blk, x->zcoeff_blk[mbmi->tx_size],
sizeof(ctx->zcoeff_blk)); sizeof(uint8_t) * ctx->num_4x4_blk);
// TODO(debargha): enhance this test with a better distortion prediction // TODO(debargha): enhance this test with a better distortion prediction
// based on qp, activity mask and history // based on qp, activity mask and history
...@@ -4327,7 +4327,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, ...@@ -4327,7 +4327,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
best_mbmode = *mbmi; best_mbmode = *mbmi;
best_skip2 = this_skip2; best_skip2 = this_skip2;
vpx_memcpy(ctx->zcoeff_blk, x->zcoeff_blk[mbmi->tx_size], vpx_memcpy(ctx->zcoeff_blk, x->zcoeff_blk[mbmi->tx_size],
sizeof(ctx->zcoeff_blk)); sizeof(uint8_t) * ctx->num_4x4_blk);
for (i = 0; i < 4; i++) for (i = 0; i < 4; i++)
best_bmodes[i] = xd->mi_8x8[0]->bmi[i]; best_bmodes[i] = xd->mi_8x8[0]->bmi[i];
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment