Commit 9ce3a7d7 authored by Hangyu Kuang's avatar Hangyu Kuang Committed by hkuang

Implement frame parallel decode for VP9.

Using 4 threads, frame parallel decode is ~3x faster than single thread
decode and around 30% faster than tile parallel decode for frame parallel
encoded video on both Android and desktop with 4 threads. Decode speed is
scalable to threads too which means decode could be even faster with more threads.

Change-Id: Ia0a549aaa3e83b5a17b31d8299aa496ea4f21e3e
parent 4d0d7842
......@@ -12,11 +12,37 @@
#include "vpx_mem/vpx_mem.h"
#include "vp9/common/vp9_blockd.h"
#include "vp9/common/vp9_common.h"
#include "vp9/common/vp9_entropymode.h"
#include "vp9/common/vp9_entropymv.h"
#include "vp9/common/vp9_onyxc_int.h"
#include "vp9/common/vp9_systemdependent.h"
// TODO(hkuang): Don't need to lock the whole pool after implementing atomic
// frame reference count.
void lock_buffer_pool(BufferPool *const pool) {
#if CONFIG_MULTITHREAD
pthread_mutex_lock(&pool->pool_mutex);
#else
(void)pool;
#endif
}
void unlock_buffer_pool(BufferPool *const pool) {
#if CONFIG_MULTITHREAD
pthread_mutex_unlock(&pool->pool_mutex);
#else
(void)pool;
#endif
}
static INLINE void alloc_mi_array(VP9_COMMON *cm, int mi_size, int idx) {
CHECK_MEM_ERROR(cm, cm->mip_array[idx],
vpx_calloc(mi_size, sizeof(*cm->mip_array[0])));
CHECK_MEM_ERROR(cm, cm->mi_grid_base_array[idx],
vpx_calloc(mi_size, sizeof(*cm->mi_grid_base_array[0])));
}
static void clear_mi_border(const VP9_COMMON *cm, MODE_INFO *mi) {
int i;
......@@ -49,40 +75,47 @@ static void setup_mi(VP9_COMMON *cm) {
vpx_memset(cm->mi_grid_base, 0, cm->mi_stride * (cm->mi_rows + 1) *
sizeof(*cm->mi_grid_base));
clear_mi_border(cm, cm->prev_mip);
// Only clear mi border in non frame-parallel decode. In frame-parallel
// decode, prev_mip is managed by previous decoding thread. While in
// non frame-parallel decode, prev_mip and mip are both managed by
// current decoding thread.
if (!cm->frame_parallel_decode)
clear_mi_border(cm, cm->prev_mip);
}
static int alloc_mi(VP9_COMMON *cm, int mi_size) {
int i;
for (i = 0; i < NUM_PING_PONG_BUFFERS; ++i) {
cm->mip_array[i] =
(MODE_INFO *)vpx_calloc(mi_size, sizeof(*cm->mip));
if (cm->mip_array[i] == NULL)
return 1;
cm->mi_grid_base_array[i] =
(MODE_INFO **)vpx_calloc(mi_size, sizeof(*cm->mi_grid_base));
if (cm->mi_grid_base_array[i] == NULL)
return 1;
// Delay reallocation as another thread is accessing prev_mi.
if (cm->frame_parallel_decode && i == cm->prev_mi_idx) {
cm->update_prev_mi = 1;
continue;
}
alloc_mi_array(cm, mi_size, i);
}
// Init the index.
cm->mi_idx = 0;
cm->prev_mi_idx = 1;
cm->mip = cm->mip_array[cm->mi_idx];
cm->prev_mip = cm->mip_array[cm->prev_mi_idx];
cm->mi_grid_base = cm->mi_grid_base_array[cm->mi_idx];
cm->prev_mi_grid_base = cm->mi_grid_base_array[cm->prev_mi_idx];
if (!cm->frame_parallel_decode) {
cm->mi_idx = 0;
cm->prev_mi_idx = 1;
// In frame-parallel decode, prev_mip comes from another thread,
// so current decoding thread should not touch it.
cm->prev_mip = cm->mip_array[cm->prev_mi_idx];
cm->prev_mi_grid_base = cm->mi_grid_base_array[cm->prev_mi_idx];
}
return 0;
}
static void free_mi(VP9_COMMON *cm) {
static void free_mi(VP9_COMMON *cm, int decode_done) {
int i;
for (i = 0; i < NUM_PING_PONG_BUFFERS; ++i) {
if (cm->frame_parallel_decode && i == cm->prev_mi_idx && !decode_done)
continue;
vpx_free(cm->mip_array[i]);
cm->mip_array[i] = NULL;
vpx_free(cm->mi_grid_base_array[i]);
......@@ -90,9 +123,12 @@ static void free_mi(VP9_COMMON *cm) {
}
cm->mip = NULL;
cm->prev_mip = NULL;
cm->mi_grid_base = NULL;
cm->prev_mi_grid_base = NULL;
if (!cm->frame_parallel_decode) {
cm->prev_mip = NULL;
cm->prev_mi_grid_base = NULL;
}
}
static int alloc_seg_map(VP9_COMMON *cm, int seg_map_size) {
......@@ -109,7 +145,10 @@ static int alloc_seg_map(VP9_COMMON *cm, int seg_map_size) {
cm->prev_seg_map_idx = 1;
cm->current_frame_seg_map = cm->seg_map_array[cm->seg_map_idx];
cm->last_frame_seg_map = cm->seg_map_array[cm->prev_seg_map_idx];
if (!cm->frame_parallel_decode) {
cm->last_frame_seg_map = cm->seg_map_array[cm->prev_seg_map_idx];
}
return 0;
}
......@@ -123,7 +162,10 @@ static void free_seg_map(VP9_COMMON *cm) {
}
cm->current_frame_seg_map = NULL;
cm->last_frame_seg_map = NULL;
if (!cm->frame_parallel_decode) {
cm->last_frame_seg_map = NULL;
}
}
void vp9_free_frame_buffers(VP9_COMMON *cm) {
......@@ -144,8 +186,7 @@ void vp9_free_frame_buffers(VP9_COMMON *cm) {
}
void vp9_free_context_buffers(VP9_COMMON *cm) {
free_mi(cm);
free_mi(cm, 1);
free_seg_map(cm);
vpx_free(cm->above_context);
......@@ -170,7 +211,7 @@ int vp9_resize_frame_buffers(VP9_COMMON *cm, int width, int height) {
set_mb_mi(cm, aligned_width, aligned_height);
free_mi(cm);
free_mi(cm, 0);
if (alloc_mi(cm, cm->mi_stride * (cm->mi_rows + MI_BLOCK_SIZE)))
goto fail;
......@@ -288,7 +329,6 @@ int vp9_alloc_context_buffers(VP9_COMMON *cm, int width, int height) {
void vp9_remove_common(VP9_COMMON *cm) {
vp9_free_frame_buffers(cm);
vp9_free_context_buffers(cm);
vp9_free_internal_frame_buffers(&cm->buffer_pool->int_frame_buffers);
}
void vp9_update_frame_size(VP9_COMMON *cm) {
......@@ -306,6 +346,20 @@ void vp9_update_frame_size(VP9_COMMON *cm) {
void vp9_swap_mi_and_prev_mi(VP9_COMMON *cm) {
// Swap indices.
const int tmp = cm->mi_idx;
// Only used in frame parallel decode: Update the prev_mi buffer if
// needed. The worker that was accessing it must already finish decoding.
// So it can be resized safely now.
if (cm->update_prev_mi) {
const int mi_size = cm->mi_stride * (cm->mi_rows + MI_BLOCK_SIZE);
vpx_free(cm->mip_array[cm->prev_mi_idx]);
vpx_free(cm->mi_grid_base_array[cm->prev_mi_idx]);
cm->mip_array[cm->prev_mi_idx] = NULL;
cm->mi_grid_base_array[cm->prev_mi_idx] = NULL;
alloc_mi_array(cm, mi_size, cm->prev_mi_idx);
cm->update_prev_mi = 0;
}
cm->mi_idx = cm->prev_mi_idx;
cm->prev_mi_idx = tmp;
......
......@@ -439,7 +439,8 @@ void vp9_setup_past_independence(VP9_COMMON *cm) {
int i;
vp9_clearall_segfeatures(&cm->seg);
cm->seg.abs_delta = SEGMENT_DELTADATA;
if (cm->last_frame_seg_map)
if (cm->last_frame_seg_map && !cm->frame_parallel_decode)
vpx_memset(cm->last_frame_seg_map, 0, (cm->mi_rows * cm->mi_cols));
if (cm->current_frame_seg_map)
......@@ -467,7 +468,7 @@ void vp9_setup_past_independence(VP9_COMMON *cm) {
cm->frame_contexts[cm->frame_context_idx] = cm->fc;
}
if (frame_is_intra_only(cm))
if (frame_is_intra_only(cm) && !cm->frame_parallel_decode)
vpx_memset(cm->prev_mip, 0, cm->mi_stride * (cm->mi_rows + 1) *
sizeof(*cm->prev_mip));
......
......@@ -17,14 +17,12 @@ static void find_mv_refs_idx(const VP9_COMMON *cm, const MACROBLOCKD *xd,
const TileInfo *const tile,
MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame,
int_mv *mv_ref_list,
int block, int mi_row, int mi_col) {
int block, int mi_row, int mi_col,
find_mv_refs_sync sync, void *const data) {
const int *ref_sign_bias = cm->ref_frame_sign_bias;
int i, refmv_count = 0;
const MODE_INFO *prev_mi = cm->coding_use_prev_mi && cm->prev_mi
? cm->prev_mi_grid_visible[mi_row * xd->mi_stride + mi_col]
: NULL;
const MB_MODE_INFO *const prev_mbmi = prev_mi ? &prev_mi->mbmi : NULL;
MODE_INFO *prev_mi = NULL;
MB_MODE_INFO *prev_mbmi = NULL;
const POSITION *const mv_ref_search = mv_ref_blocks[mi->mbmi.sb_type];
......@@ -71,6 +69,14 @@ static void find_mv_refs_idx(const VP9_COMMON *cm, const MACROBLOCKD *xd,
}
}
// Synchronize here for frame parallel decode if sync function is provided.
if (sync != NULL) {
sync(data, mi_row);
}
prev_mi = cm->coding_use_prev_mi && cm->prev_mi ?
cm->prev_mi_grid_visible[mi_row * xd->mi_stride + mi_col] : NULL;
prev_mbmi = prev_mi ? &prev_mi->mbmi : NULL;
// Check the last frame's mode and mv info.
if (prev_mbmi) {
if (prev_mbmi->ref_frame[0] == ref_frame)
......@@ -109,12 +115,13 @@ static void find_mv_refs_idx(const VP9_COMMON *cm, const MACROBLOCKD *xd,
}
void vp9_find_mv_refs(const VP9_COMMON *cm, const MACROBLOCKD *xd,
const TileInfo *const tile,
MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame,
int_mv *mv_ref_list,
int mi_row, int mi_col) {
const TileInfo *const tile,
MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame,
int_mv *mv_ref_list,
int mi_row, int mi_col,
find_mv_refs_sync sync, void *const data) {
find_mv_refs_idx(cm, xd, tile, mi, ref_frame, mv_ref_list, -1,
mi_row, mi_col);
mi_row, mi_col, sync, data);
}
static void lower_mv_precision(MV *mv, int allow_hp) {
......@@ -152,7 +159,7 @@ void vp9_append_sub8x8_mvs_for_idx(VP9_COMMON *cm, MACROBLOCKD *xd,
assert(MAX_MV_REF_CANDIDATES == 2);
find_mv_refs_idx(cm, xd, tile, mi, mi->mbmi.ref_frame[ref], mv_list, block,
mi_row, mi_col);
mi_row, mi_col, NULL, NULL);
near->as_int = 0;
switch (block) {
......
......@@ -204,10 +204,12 @@ static INLINE void clamp_mv2(MV *mv, const MACROBLOCKD *xd) {
xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN);
}
typedef void (*find_mv_refs_sync)(void *const data, int mi_row);
void vp9_find_mv_refs(const VP9_COMMON *cm, const MACROBLOCKD *xd,
const TileInfo *const tile,
MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame,
int_mv *mv_ref_list, int mi_row, int mi_col);
int_mv *mv_ref_list, int mi_row, int mi_col,
find_mv_refs_sync sync, void *const data);
// check a list of motion vectors by sad score using a number rows of pixels
// above and a number cols of pixels in the left to select the one with best
......
......@@ -36,10 +36,13 @@ extern "C" {
#define REF_FRAMES_LOG2 3
#define REF_FRAMES (1 << REF_FRAMES_LOG2)
// 1 scratch frame for the new frame, 3 for scaled references on the encoder
// 4 scratch frames for the new frames to support a maximum of 4 cores decoding
// in parallel, 3 for scaled references on the encoder.
// TODO(hkuang): Add ondemand frame buffers instead of hardcoding the number
// of framebuffers.
// TODO(jkoleszar): These 3 extra references could probably come from the
// normal reference pool.
#define FRAME_BUFFERS (REF_FRAMES + 4)
#define FRAME_BUFFERS (REF_FRAMES + 7)
#define FRAME_CONTEXTS_LOG2 2
#define FRAME_CONTEXTS (1 << FRAME_CONTEXTS_LOG2)
......@@ -64,6 +67,18 @@ typedef struct {
int ref_count;
vpx_codec_frame_buffer_t raw_frame_buffer;
YV12_BUFFER_CONFIG buf;
// The Following variables will only be used in frame parallel decode.
// frame_worker_owner indicates which FrameWorker owns this buffer. NULL means
// that no FrameWorker owns, or is decoding, this buffer.
VP9Worker *frame_worker_owner;
// row and col indicate which position frame has been decoded to in real
// pixel unit. They are reset to -1 when decoding begins and set to INT_MAX
// when the frame is fully decoded.
int row;
int col;
} RefCntBuffer;
typedef struct {
......@@ -114,6 +129,10 @@ typedef struct VP9Common {
int ref_frame_map[REF_FRAMES]; /* maps fb_idx to reference slot */
// Prepare ref_frame_map for the next frame.
// Only used in frame parallel decode.
int next_ref_frame_map[REF_FRAMES];
// TODO(jkoleszar): could expand active_ref_idx to 4, with 0 as intra, and
// roll new_fb_idx into it.
......@@ -178,6 +197,9 @@ typedef struct VP9Common {
MODE_INFO **prev_mi_grid_base;
MODE_INFO **prev_mi_grid_visible;
// Used in frame parallel decode for delay resizing prev_mi.
int update_prev_mi;
// Persistent mb segment id map used in prediction.
int seg_map_idx;
int prev_seg_map_idx;
......@@ -197,6 +219,10 @@ typedef struct VP9Common {
struct loopfilter lf;
struct segmentation seg;
// TODO(hkuang): Remove this as it is the same as frame_parallel_decode
// in pbi.
int frame_parallel_decode; // frame-based threading.
// Context probabilities for reference frame prediction
int allow_comp_inter_inter;
MV_REFERENCE_FRAME comp_fixed_ref;
......@@ -235,6 +261,11 @@ typedef struct VP9Common {
ENTROPY_CONTEXT *above_context;
} VP9_COMMON;
// TODO(hkuang): Don't need to lock the whole pool after implementing atomic
// frame reference count.
void lock_buffer_pool(BufferPool *const pool);
void unlock_buffer_pool(BufferPool *const pool);
static INLINE YV12_BUFFER_CONFIG *get_frame_new_buffer(VP9_COMMON *cm) {
return &cm->buffer_pool->frame_bufs[cm->new_fb_idx].buf;
}
......@@ -242,12 +273,15 @@ static INLINE YV12_BUFFER_CONFIG *get_frame_new_buffer(VP9_COMMON *cm) {
static INLINE int get_free_fb(VP9_COMMON *cm) {
RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs;
int i;
lock_buffer_pool(cm->buffer_pool);
for (i = 0; i < FRAME_BUFFERS; ++i)
if (frame_bufs[i].ref_count == 0)
break;
assert(i < FRAME_BUFFERS);
frame_bufs[i].ref_count = 1;
unlock_buffer_pool(cm->buffer_pool);
return i;
}
......
......@@ -327,21 +327,24 @@ static void set_ref(VP9_COMMON *const cm, MACROBLOCKD *const xd,
MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
RefBuffer *ref_buffer = &cm->frame_refs[mbmi->ref_frame[idx] - LAST_FRAME];
xd->block_refs[idx] = ref_buffer;
if (!vp9_is_valid_scale(&ref_buffer->sf))
vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM,
"Invalid scale factors");
vp9_setup_pre_planes(xd, idx, ref_buffer->buf, mi_row, mi_col,
&ref_buffer->sf);
xd->corrupted |= ref_buffer->buf->corrupted;
if (!cm->frame_parallel_decode)
xd->corrupted |= ref_buffer->buf->corrupted;
}
static void decode_block(VP9_COMMON *const cm, MACROBLOCKD *const xd,
static void decode_block(VP9Decoder *const pbi, MACROBLOCKD *const xd,
const TileInfo *const tile,
int mi_row, int mi_col,
vp9_reader *r, BLOCK_SIZE bsize) {
VP9_COMMON *const cm = &pbi->common;
const int less8x8 = bsize < BLOCK_8X8;
MB_MODE_INFO *mbmi = set_offsets(cm, xd, tile, bsize, mi_row, mi_col);
vp9_read_mode_info(cm, xd, tile, mi_row, mi_col, r);
vp9_read_mode_info(pbi, xd, tile, mi_row, mi_col, r);
if (less8x8)
bsize = BLOCK_8X8;
......@@ -365,7 +368,7 @@ static void decode_block(VP9_COMMON *const cm, MACROBLOCKD *const xd,
set_ref(cm, xd, 1, mi_row, mi_col);
// Prediction
vp9_dec_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
vp9_dec_build_inter_predictors_sb(pbi, xd, mi_row, mi_col, bsize);
// Reconstruction
if (!mbmi->skip) {
......@@ -404,10 +407,11 @@ static PARTITION_TYPE read_partition(VP9_COMMON *cm, MACROBLOCKD *xd, int hbs,
return p;
}
static void decode_partition(VP9_COMMON *const cm, MACROBLOCKD *const xd,
static void decode_partition(VP9Decoder *const pbi, MACROBLOCKD *const xd,
const TileInfo *const tile,
int mi_row, int mi_col,
vp9_reader* r, BLOCK_SIZE bsize) {
VP9_COMMON *const cm = &pbi->common;
const int hbs = num_8x8_blocks_wide_lookup[bsize] / 2;
PARTITION_TYPE partition;
BLOCK_SIZE subsize, uv_subsize;
......@@ -422,27 +426,27 @@ static void decode_partition(VP9_COMMON *const cm, MACROBLOCKD *const xd,
vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME,
"Invalid block size.");
if (subsize < BLOCK_8X8) {
decode_block(cm, xd, tile, mi_row, mi_col, r, subsize);
decode_block(pbi, xd, tile, mi_row, mi_col, r, subsize);
} else {
switch (partition) {
case PARTITION_NONE:
decode_block(cm, xd, tile, mi_row, mi_col, r, subsize);
decode_block(pbi, xd, tile, mi_row, mi_col, r, subsize);
break;
case PARTITION_HORZ:
decode_block(cm, xd, tile, mi_row, mi_col, r, subsize);
decode_block(pbi, xd, tile, mi_row, mi_col, r, subsize);
if (mi_row + hbs < cm->mi_rows)
decode_block(cm, xd, tile, mi_row + hbs, mi_col, r, subsize);
decode_block(pbi, xd, tile, mi_row + hbs, mi_col, r, subsize);
break;
case PARTITION_VERT:
decode_block(cm, xd, tile, mi_row, mi_col, r, subsize);
decode_block(pbi, xd, tile, mi_row, mi_col, r, subsize);
if (mi_col + hbs < cm->mi_cols)
decode_block(cm, xd, tile, mi_row, mi_col + hbs, r, subsize);
decode_block(pbi, xd, tile, mi_row, mi_col + hbs, r, subsize);
break;
case PARTITION_SPLIT:
decode_partition(cm, xd, tile, mi_row, mi_col, r, subsize);
decode_partition(cm, xd, tile, mi_row, mi_col + hbs, r, subsize);
decode_partition(cm, xd, tile, mi_row + hbs, mi_col, r, subsize);
decode_partition(cm, xd, tile, mi_row + hbs, mi_col + hbs, r, subsize);
decode_partition(pbi, xd, tile, mi_row, mi_col, r, subsize);
decode_partition(pbi, xd, tile, mi_row, mi_col + hbs, r, subsize);
decode_partition(pbi, xd, tile, mi_row + hbs, mi_col, r, subsize);
decode_partition(pbi, xd, tile, mi_row + hbs, mi_col + hbs, r, subsize);
break;
default:
assert(0 && "Invalid partition type");
......@@ -638,6 +642,7 @@ static void apply_frame_size(VP9_COMMON *cm, int width, int height) {
vp9_update_frame_size(cm);
}
lock_buffer_pool(pool);
if (vp9_realloc_frame_buffer(
get_frame_new_buffer(cm), cm->width, cm->height,
cm->subsampling_x, cm->subsampling_y, VP9_DEC_BORDER_IN_PIXELS,
......@@ -646,6 +651,7 @@ static void apply_frame_size(VP9_COMMON *cm, int width, int height) {
vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
"Failed to allocate frame buffer");
}
unlock_buffer_pool(pool);
}
static void setup_frame_size(VP9_COMMON *cm, struct vp9_read_bit_buffer *rb) {
......@@ -778,7 +784,7 @@ static const uint8_t *decode_tiles(VP9Decoder *pbi,
const int tile_rows = 1 << cm->log2_tile_rows;
TileBuffer tile_buffers[4][1 << 6];
int tile_row, tile_col;
int mi_row, mi_col;
int mi_row = 0, mi_col = 0;
TileData *tile_data = NULL;
if (cm->lf.filter_level && pbi->lf_worker.data1 == NULL) {
......@@ -798,7 +804,6 @@ static const uint8_t *decode_tiles(VP9Decoder *pbi,
vp9_copy(lf_data->planes, pbi->mb.plane);
lf_data->stop = 0;
lf_data->y_only = 0;
vp9_loop_filter_frame_init(cm, cm->lf.filter_level);
}
assert(tile_rows <= 4);
......@@ -856,7 +861,7 @@ static const uint8_t *decode_tiles(VP9Decoder *pbi,
vp9_zero(tile_data->xd.left_seg_context);
for (mi_col = tile.mi_col_start; mi_col < tile.mi_col_end;
mi_col += MI_BLOCK_SIZE) {
decode_partition(tile_data->cm, &tile_data->xd, &tile, mi_row, mi_col,
decode_partition(pbi, &tile_data->xd, &tile, mi_row, mi_col,
&tile_data->bit_reader, BLOCK_64X64);
}
}
......@@ -880,6 +885,12 @@ static const uint8_t *decode_tiles(VP9Decoder *pbi,
winterface->execute(&pbi->lf_worker);
}
}
// After loopfiltering, the last 7 row pixels in each superblock row may
// still be changed by the longest loopfilter of the next superblock
// row.
if (pbi->frame_parallel_decode)
vp9_frameworker_broadcast(pbi->cur_buf,
mi_row << MI_BLOCK_SIZE_LOG2);
}
}
......@@ -895,6 +906,8 @@ static const uint8_t *decode_tiles(VP9Decoder *pbi,
// Get last tile data.
tile_data = pbi->tile_data + tile_cols * tile_rows - 1;
if (pbi->frame_parallel_decode)
vp9_frameworker_broadcast(pbi->cur_buf, INT_MAX);
return vp9_reader_find_end(&tile_data->bit_reader);
}
......@@ -909,7 +922,7 @@ static int tile_worker_hook(void *arg1, void *arg2) {
vp9_zero(tile_data->xd.left_seg_context);
for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end;
mi_col += MI_BLOCK_SIZE) {
decode_partition(tile_data->cm, &tile_data->xd, tile,
decode_partition(tile_data->pbi, &tile_data->xd, tile,
mi_row, mi_col, &tile_data->bit_reader, BLOCK_64X64);
}
}
......@@ -1015,10 +1028,10 @@ static const uint8_t *decode_tiles_mt(VP9Decoder *pbi,
TileInfo *const tile = (TileInfo*)worker->data2;
TileBuffer *const buf = &tile_buffers[0][n];
tile_data->cm = cm;
tile_data->pbi = pbi;
tile_data->xd = pbi->mb;
tile_data->xd.corrupted = 0;
vp9_tile_init(tile, tile_data->cm, 0, buf->col);
vp9_tile_init(tile, &pbi->common, 0, buf->col);
setup_token_decoder(buf->data, data_end, buf->size, &cm->error,
&tile_data->bit_reader, pbi->decrypt_cb,
pbi->decrypt_state);
......@@ -1078,8 +1091,9 @@ static size_t read_uncompressed_header(VP9Decoder *pbi,
struct vp9_read_bit_buffer *rb) {
VP9_COMMON *const cm = &pbi->common;
RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs;
BufferPool *const pool = pbi->common.buffer_pool;
int i, mask, ref_index = 0;
size_t sz;
int i;
cm->last_frame_type = cm->frame_type;
......@@ -1096,16 +1110,22 @@ static size_t read_uncompressed_header(VP9Decoder *pbi,
if (cm->show_existing_frame) {
// Show an existing frame directly.
const int frame_to_show = cm->ref_frame_map[vp9_rb_read_literal(rb, 3)];
lock_buffer_pool(pool);
if (frame_to_show < 0 || frame_bufs[frame_to_show].ref_count < 1)
vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM,
"Buffer %d does not contain a decoded frame",
frame_to_show);
ref_cnt_fb(frame_bufs, &cm->new_fb_idx, frame_to_show);
unlock_buffer_pool(pool);
pbi->refresh_frame_flags = 0;
cm->lf.filter_level = 0;
cm->show_frame = 1;
if (pbi->frame_parallel_decode) {
for (i = 0; i < REF_FRAMES; ++i)
cm->next_ref_frame_map[i] = cm->ref_frame_map[i];
}
return 0;
}
......@@ -1166,7 +1186,6 @@ static size_t read_uncompressed_header(VP9Decoder *pbi,
ref_frame->buf = &frame_bufs[idx].buf;
cm->ref_frame_sign_bias[LAST_FRAME + i] = vp9_rb_read_bit(rb);
}
setup_frame_size_with_refs(cm, rb);
cm->allow_high_precision_mv = vp9_rb_read_bit(rb);
......@@ -1198,6 +1217,29 @@ static size_t read_uncompressed_header(VP9Decoder *pbi,
// below, forcing the use of context 0 for those frame types.
cm->frame_context_idx = vp9_rb_read_literal(rb, FRAME_CONTEXTS_LOG2);
// Generate next_ref_frame_map.
lock_buffer_pool(pool);
for (mask = pbi->refresh_frame_flags; mask; mask >>= 1) {
if (mask & 1) {
cm->next_ref_frame_map[ref_index] = cm->new_fb_idx;
++frame_bufs[cm->new_fb_idx].ref_count;
} else {
cm->next_ref_frame_map[ref_index] = cm->ref_frame_map[ref_index];
}
// Current thread holds the reference frame.
if (cm->ref_frame_map[ref_index] >= 0)
++frame_bufs[cm->ref_frame_map[ref_index]].ref_count;
++ref_index;
}
for (; ref_index < REF_FRAMES; ++ref_index) {
cm->next_ref_frame_map[ref_index] = cm->ref_frame_map[ref_index];
// Current thread holds the reference frame.
if (cm->ref_frame_map[ref_index] >= 0)
++frame_bufs[cm->ref_frame_map[ref_index]].ref_count;
}
unlock_buffer_pool(pool);
if (frame_is_intra_only(cm) || cm->error_resilient_mode)
vp9_setup_past_independence(cm);
......@@ -1343,6 +1385,7 @@ void vp9_decode_frame(VP9Decoder *pbi,
VP9_COMMON *const cm = &pbi->common;
MACROBLOCKD *const xd = &pbi->mb;
struct vp9_read_bit_buffer rb = { NULL, NULL, 0, NULL, 0};
int context_updated = 0;
uint8_t clear_data[MAX_VP9_HEADER_SIZE];
const size_t first_partition_size = read_uncompressed_header(pbi,
......@@ -1380,6 +1423,28 @@ void vp9_decode_frame(VP9Decoder *pbi,
xd->corrupted = 0;
new_fb->corrupted = read_compressed_header(pbi, data, first_partition_size);
if (cm->lf.filter_level) {
vp9_loop_filter_frame_init(cm, cm->lf.filter_level);
}
// If encoded in frame parallel mode, frame context is ready after decoding
// the frame header.