Commit 8783a8a9 authored by Jingning Han's avatar Jingning Han

Refactor transform block loop for inter mode decoding

Rework the inter mode transform block decoding loop. Replace the
block index with the row and col index as the input argument. It
saves function call to compute the row and col index according to
the block index and overall block size, and many if statements
associated with the transform block position relative to the coding
block. For the test bit-stream pedestrian_area 1080p at 5 Mbps,
the decoding speed goes up from 81.13 fps to 81.92 fps.

Note that the intra coded block decoding needs more refactoring
work than the inter ones. So keep it using foreach_transforme_block
as for now.

Change-Id: I5622bdae7be28ed5af96693274057f55ba9b4fb4
parent 8bf791e7
......@@ -406,20 +406,20 @@ struct inter_args {
int seg_id;
};
static void reconstruct_inter_block(int plane, int block,
static void reconstruct_inter_block(int plane, int row, int col,
BLOCK_SIZE plane_bsize,
TX_SIZE tx_size, void *arg) {
TX_SIZE tx_size, struct inter_args *arg) {
struct inter_args *args = (struct inter_args *)arg;
MACROBLOCKD *const xd = args->xd;
struct macroblockd_plane *const pd = &xd->plane[plane];
int x, y, eob;
int eob;
const scan_order *sc = &vp9_default_scan_orders[tx_size];
txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &x, &y);
eob = vp9_decode_block_tokens(xd, plane, sc, plane_bsize,
x, y, tx_size, args->r, args->seg_id);
col, row, tx_size, args->r, args->seg_id);
inverse_transform_block_inter(xd, plane, tx_size,
&pd->dst.buf[4 * y * pd->dst.stride + 4 * x],
pd->dst.stride, eob);
&pd->dst.buf[4 * row * pd->dst.stride + 4 * col],
pd->dst.stride, eob);
*args->eobtotal += eob;
}
......@@ -838,7 +838,27 @@ static void decode_block(VP9Decoder *const pbi, MACROBLOCKD *const xd,
if (!mbmi->skip) {
int eobtotal = 0;
struct inter_args arg = {xd, r, &eobtotal, mbmi->segment_id};
vp9_foreach_transformed_block(xd, bsize, reconstruct_inter_block, &arg);
int plane;
for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
const struct macroblockd_plane *const pd = &xd->plane[plane];
const TX_SIZE tx_size = plane ? get_uv_tx_size(mbmi, pd)
: mbmi->tx_size;
const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
const int step = (1 << tx_size);
int r, c;
const int max_blocks_wide = num_4x4_w + (xd->mb_to_right_edge >= 0 ?
0 : xd->mb_to_right_edge >> (5 + pd->subsampling_x));
const int max_blocks_high = num_4x4_h + (xd->mb_to_bottom_edge >= 0 ?
0 : xd->mb_to_bottom_edge >> (5 + pd->subsampling_y));
for (r = 0; r < max_blocks_high; r += step)
for (c = 0; c < max_blocks_wide; c += step)
reconstruct_inter_block(plane, r, c, plane_bsize, tx_size, &arg);
}
if (!less8x8 && eobtotal == 0)
mbmi->skip = 1; // skip loopfilter
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment