Commit 490ba1ad authored by Geza Lore's avatar Geza Lore

Port large scale tile coding features from nextgen.

If configured with --enable-ext-tile, the codec uses an alternative
tile coding syntax in the bitstream. Changes include::
 - The maximum number of tile rows and columns is extended to 1024
   each.
 - The minimum tile width/height is 64 pixels (1 superblock).
 - A tile copy mode is added where a tile directly reuse the coded
   data of a previous tile
 - The meaning of the tile-columns and tile-rows codec parameters are
   overloaded to mean tile-width and tile-height in units of 64
   pixels.
 - All tiles should now be independent, including rows within the
   same columns, so large scale parallel, or independent decoding is
   possible.
 - vpxdec also gained the options to decode only a particular tile,
   tile row, or tile column.

Changes without --enable-ext-tile:
 - All tiles should now be independent, including rows within the
   same columns, so large scale parallel, or independent decoding is
   possible.
 - vpxenc default tile configuration changed to use 1 tile column.

Change-Id: I0cd08ad550967ac18622dae5e98ad23d581cb33e
parent b4334460
......@@ -131,28 +131,32 @@ int vp10_alloc_context_buffers(VP10_COMMON *cm, int width, int height) {
}
if (cm->above_context_alloc_cols < cm->mi_cols) {
// TODO(geza.lore): These are bigger than they need to be.
// cm->tile_width would be enough but it complicates indexing a
// little elsewhere.
const int aligned_mi_cols = mi_cols_aligned_to_sb(cm->mi_cols);
int i;
for (i = 0 ; i < MAX_MB_PLANE ; i++) {
vpx_free(cm->above_context[i]);
vpx_free(cm->above_context[i]);
cm->above_context[i] = (ENTROPY_CONTEXT *)vpx_calloc(
2 * mi_cols_aligned_to_sb(cm->mi_cols),
sizeof(*cm->above_context[0]));
2 * aligned_mi_cols, sizeof(*cm->above_context[0]));
if (!cm->above_context[i]) goto fail;
}
vpx_free(cm->above_seg_context);
cm->above_seg_context = (PARTITION_CONTEXT *)vpx_calloc(
mi_cols_aligned_to_sb(cm->mi_cols), sizeof(*cm->above_seg_context));
aligned_mi_cols, sizeof(*cm->above_seg_context));
if (!cm->above_seg_context) goto fail;
#if CONFIG_VAR_TX
vpx_free(cm->above_txfm_context);
cm->above_txfm_context = (TXFM_CONTEXT *)vpx_calloc(
mi_cols_aligned_to_sb(cm->mi_cols), sizeof(*cm->above_txfm_context));
aligned_mi_cols, sizeof(*cm->above_txfm_context));
if (!cm->above_txfm_context) goto fail;
#endif
cm->above_context_alloc_cols = cm->mi_cols;
cm->above_context_alloc_cols = aligned_mi_cols;
}
return 0;
......
......@@ -26,6 +26,14 @@ extern "C" {
#define MI_MASK (MI_BLOCK_SIZE - 1)
#if CONFIG_EXT_TILE
# define MAX_TILE_ROWS 1024
# define MAX_TILE_COLS 1024
#else
# define MAX_TILE_ROWS 4
# define MAX_TILE_COLS 64
#endif // CONFIG_EXT_TILE
// Bitstream profiles indicated by 2-3 bits in the uncompressed header.
// 00: Profile 0. 8-bit 4:2:0 only.
// 10: Profile 1. 8-bit 4:4:4, 4:2:2, and 4:4:0.
......
......@@ -211,10 +211,18 @@ static INLINE int_mv scale_mv(const MB_MODE_INFO *mbmi, int ref,
static INLINE int is_inside(const TileInfo *const tile,
int mi_col, int mi_row, int mi_rows,
const POSITION *mi_pos) {
#if CONFIG_EXT_TILE
(void) mi_rows;
return !(mi_row + mi_pos->row < tile->mi_row_start ||
mi_col + mi_pos->col < tile->mi_col_start ||
mi_row + mi_pos->row >= tile->mi_row_end ||
mi_col + mi_pos->col >= tile->mi_col_end);
#else
return !(mi_row + mi_pos->row < 0 ||
mi_col + mi_pos->col < tile->mi_col_start ||
mi_row + mi_pos->row >= mi_rows ||
mi_col + mi_pos->col >= tile->mi_col_end);
#endif // CONFIG_EXT_TILE
}
static INLINE void lower_mv_precision(MV *mv, int allow_hp) {
......
......@@ -308,8 +308,12 @@ typedef struct VP10Common {
int error_resilient_mode;
#if !CONFIG_EXT_TILE
int log2_tile_cols, log2_tile_rows;
int tile_sz_mag;
#endif // !CONFIG_EXT_TILE
int tile_cols, tile_rows;
int tile_width, tile_height;
int byte_alignment;
int skip_loop_filter;
......@@ -436,7 +440,7 @@ static INLINE void vp10_init_macroblockd(VP10_COMMON *cm, MACROBLOCKD *xd,
static INLINE void set_skip_context(MACROBLOCKD *xd, int mi_row, int mi_col) {
const int above_idx = mi_col * 2;
const int left_idx = (mi_row * 2) & 15;
const int left_idx = (mi_row * 2) & 15; // FIXME: Mask should be CU_SIZE*2-1
int i;
for (i = 0; i < MAX_MB_PLANE; ++i) {
struct macroblockd_plane *const pd = &xd->plane[i];
......@@ -460,7 +464,11 @@ static INLINE void set_mi_row_col(MACROBLOCKD *xd, const TileInfo *const tile,
xd->mb_to_right_edge = ((mi_cols - bw - mi_col) * MI_SIZE) * 8;
// Are edges available for intra prediction?
#if CONFIG_EXT_TILE
xd->up_available = (mi_row > tile->mi_row_start);
#else
xd->up_available = (mi_row != 0);
#endif // CONFIG_EXT_TILE
xd->left_available = (mi_col > tile->mi_col_start);
if (xd->up_available) {
xd->above_mi = xd->mi[-xd->mi_stride];
......@@ -586,11 +594,18 @@ static INLINE int partition_plane_context(const MACROBLOCKD *xd,
static INLINE void vp10_zero_above_context(VP10_COMMON *const cm,
int mi_col_start, int mi_col_end) {
const int width = mi_col_end - mi_col_start;
int i;
for (i = 0 ; i < MAX_MB_PLANE ; i++)
vp10_zero_array(cm->above_context[i] + 2 * mi_col_start, 2 * width);
const int offset_y = 2 * mi_col_start;
const int width_y = 2 * width;
const int offset_uv = offset_y >> cm->subsampling_x;
const int width_uv = width_y >> cm->subsampling_x;
vp10_zero_array(cm->above_context[0] + offset_y, width_y);
vp10_zero_array(cm->above_context[1] + offset_uv, width_uv);
vp10_zero_array(cm->above_context[2] + offset_uv, width_uv);
vp10_zero_array(cm->above_seg_context + mi_col_start, width);
#if CONFIG_VAR_TX
vp10_zero_array(cm->above_txfm_context + mi_col_start, width);
#endif // CONFIG_VAR_TX
......
......@@ -172,7 +172,7 @@ static void loop_filter_rows_mt(YV12_BUFFER_CONFIG *frame,
const int sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2;
// Decoder may allocate more threads than number of tiles based on user's
// input.
const int tile_cols = 1 << cm->log2_tile_cols;
const int tile_cols = cm->tile_cols;
const int num_workers = VPXMIN(nworkers, tile_cols);
int i;
......
......@@ -15,20 +15,16 @@
#define MIN_TILE_WIDTH_B64 4
#define MAX_TILE_WIDTH_B64 64
static int get_tile_offset(int idx, int mis, int log2) {
const int sb_cols = mi_cols_aligned_to_sb(mis) >> MI_BLOCK_SIZE_LOG2;
const int offset = ((idx * sb_cols) >> log2) << MI_BLOCK_SIZE_LOG2;
return VPXMIN(offset, mis);
}
void vp10_tile_set_row(TileInfo *tile, const VP10_COMMON *cm, int row) {
tile->mi_row_start = get_tile_offset(row, cm->mi_rows, cm->log2_tile_rows);
tile->mi_row_end = get_tile_offset(row + 1, cm->mi_rows, cm->log2_tile_rows);
tile->mi_row_start = row * cm->tile_height;
tile->mi_row_end = VPXMIN(tile->mi_row_start + cm->tile_height,
cm->mi_rows);
}
void vp10_tile_set_col(TileInfo *tile, const VP10_COMMON *cm, int col) {
tile->mi_col_start = get_tile_offset(col, cm->mi_cols, cm->log2_tile_cols);
tile->mi_col_end = get_tile_offset(col + 1, cm->mi_cols, cm->log2_tile_cols);
tile->mi_col_start = col * cm->tile_width;
tile->mi_col_end = VPXMIN(tile->mi_col_start + cm->tile_width,
cm->mi_cols);
}
void vp10_tile_init(TileInfo *tile, const VP10_COMMON *cm, int row, int col) {
......@@ -36,6 +32,8 @@ void vp10_tile_init(TileInfo *tile, const VP10_COMMON *cm, int row, int col) {
vp10_tile_set_col(tile, cm, col);
}
#if !CONFIG_EXT_TILE
// TODO(geza.lore): CU_SIZE dependent.
static int get_min_log2_tile_cols(const int sb64_cols) {
int min_log2 = 0;
while ((MAX_TILE_WIDTH_B64 << min_log2) < sb64_cols)
......@@ -51,9 +49,10 @@ static int get_max_log2_tile_cols(const int sb64_cols) {
}
void vp10_get_tile_n_bits(int mi_cols,
int *min_log2_tile_cols, int *max_log2_tile_cols) {
int *min_log2_tile_cols, int *max_log2_tile_cols) {
const int sb64_cols = mi_cols_aligned_to_sb(mi_cols) >> MI_BLOCK_SIZE_LOG2;
*min_log2_tile_cols = get_min_log2_tile_cols(sb64_cols);
*max_log2_tile_cols = get_max_log2_tile_cols(sb64_cols);
assert(*min_log2_tile_cols <= *max_log2_tile_cols);
}
#endif // !CONFIG_EXT_TILE
This diff is collapsed.
......@@ -57,6 +57,12 @@ typedef struct TileWorkerData {
struct vpx_internal_error_info error_info;
} TileWorkerData;
typedef struct TileBufferDec {
const uint8_t *data;
size_t size;
int col; // only used with multi-threaded decoding
} TileBufferDec;
typedef struct VP10Decoder {
DECLARE_ALIGNED(16, MACROBLOCKD, mb);
......@@ -78,7 +84,9 @@ typedef struct VP10Decoder {
int num_tile_workers;
TileData *tile_data;
int total_tiles;
int allocated_tiles;
TileBufferDec tile_buffers[MAX_TILE_ROWS][MAX_TILE_COLS];
VP9LfSync lf_row_sync;
......@@ -89,6 +97,12 @@ typedef struct VP10Decoder {
int inv_tile_order;
int need_resync; // wait for key/intra-only frame.
int hold_ref_buf; // hold the reference buffer.
int tile_size_bytes;
#if CONFIG_EXT_TILE
int tile_col_size_bytes;
int dec_tile_row, dec_tile_col;
#endif // CONFIG_EXT_TILE
} VP10Decoder;
int vp10_receive_compressed_data(struct VP10Decoder *pbi,
......
This diff is collapsed.
......@@ -18,7 +18,6 @@ extern "C" {
#include "vp10/encoder/encoder.h"
void vp10_encode_token_init();
void vp10_pack_bitstream(VP10_COMP *const cpi, uint8_t *dest, size_t *size);
void vp10_encode_token_init();
......
......@@ -4144,12 +4144,13 @@ static void encode_rd_sb_row(VP10_COMP *cpi,
int mi_row,
TOKENEXTRA **tp) {
VP10_COMMON *const cm = &cpi->common;
TileInfo *const tile_info = &tile_data->tile_info;
const TileInfo *const tile_info = &tile_data->tile_info;
MACROBLOCK *const x = &td->mb;
MACROBLOCKD *const xd = &x->e_mbd;
SPEED_FEATURES *const sf = &cpi->sf;
int mi_col;
// Initialize the left context for the new SB row
vp10_zero_left_context(xd);
// Code each SB in the row
......@@ -4269,14 +4270,11 @@ static void init_encode_frame_mb_context(VP10_COMP *cpi) {
MACROBLOCK *const x = &cpi->td.mb;
VP10_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &x->e_mbd;
const int aligned_mi_cols = mi_cols_aligned_to_sb(cm->mi_cols);
// Copy data over into macro block data structures.
vp10_setup_src_planes(x, cpi->Source, 0, 0);
vp10_setup_block_planes(xd, cm->subsampling_x, cm->subsampling_y);
vp10_zero_above_context(cm, 0, aligned_mi_cols);
}
static int check_dual_ref_flags(VP10_COMP *cpi) {
......@@ -4338,11 +4336,11 @@ static TX_MODE select_tx_mode(const VP10_COMP *cpi, MACROBLOCKD *const xd) {
void vp10_init_tile_data(VP10_COMP *cpi) {
VP10_COMMON *const cm = &cpi->common;
const int tile_cols = 1 << cm->log2_tile_cols;
const int tile_rows = 1 << cm->log2_tile_rows;
const int tile_cols = cm->tile_cols;
const int tile_rows = cm->tile_rows;
int tile_col, tile_row;
TOKENEXTRA *pre_tok = cpi->tile_tok[0][0];
int tile_tok = 0;
unsigned int tile_tok = 0;
if (cpi->tile_data == NULL || cpi->allocated_tiles < tile_cols * tile_rows) {
if (cpi->tile_data != NULL)
......@@ -4353,7 +4351,7 @@ void vp10_init_tile_data(VP10_COMP *cpi) {
for (tile_row = 0; tile_row < tile_rows; ++tile_row)
for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
TileDataEnc *tile_data =
TileDataEnc *const tile_data =
&cpi->tile_data[tile_row * tile_cols + tile_col];
int i, j;
for (i = 0; i < BLOCK_SIZES; ++i) {
......@@ -4367,7 +4365,7 @@ void vp10_init_tile_data(VP10_COMP *cpi) {
for (tile_row = 0; tile_row < tile_rows; ++tile_row) {
for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
TileInfo *tile_info =
TileInfo *const tile_info =
&cpi->tile_data[tile_row * tile_cols + tile_col].tile_info;
vp10_tile_init(tile_info, cm, tile_row, tile_col);
......@@ -4381,13 +4379,14 @@ void vp10_init_tile_data(VP10_COMP *cpi) {
void vp10_encode_tile(VP10_COMP *cpi, ThreadData *td,
int tile_row, int tile_col) {
VP10_COMMON *const cm = &cpi->common;
const int tile_cols = 1 << cm->log2_tile_cols;
TileDataEnc *this_tile =
&cpi->tile_data[tile_row * tile_cols + tile_col];
TileDataEnc *const this_tile =
&cpi->tile_data[tile_row * cm->tile_cols + tile_col];
const TileInfo * const tile_info = &this_tile->tile_info;
TOKENEXTRA *tok = cpi->tile_tok[tile_row][tile_col];
int mi_row;
vp10_zero_above_context(cm, tile_info->mi_col_start, tile_info->mi_col_end);
// Set up pointers to per thread motion search counters.
td->mb.m_search_count_ptr = &td->rd_counts.m_search_count;
td->mb.ex_search_count_ptr = &td->rd_counts.ex_search_count;
......@@ -4396,22 +4395,20 @@ void vp10_encode_tile(VP10_COMP *cpi, ThreadData *td,
mi_row += MI_BLOCK_SIZE) {
encode_rd_sb_row(cpi, td, this_tile, mi_row, &tok);
}
cpi->tok_count[tile_row][tile_col] =
(unsigned int)(tok - cpi->tile_tok[tile_row][tile_col]);
assert(tok - cpi->tile_tok[tile_row][tile_col] <=
allocated_tokens(*tile_info));
assert(cpi->tok_count[tile_row][tile_col] <= allocated_tokens(*tile_info));
}
static void encode_tiles(VP10_COMP *cpi) {
VP10_COMMON *const cm = &cpi->common;
const int tile_cols = 1 << cm->log2_tile_cols;
const int tile_rows = 1 << cm->log2_tile_rows;
int tile_col, tile_row;
vp10_init_tile_data(cpi);
for (tile_row = 0; tile_row < tile_rows; ++tile_row)
for (tile_col = 0; tile_col < tile_cols; ++tile_col)
for (tile_row = 0; tile_row < cm->tile_rows; ++tile_row)
for (tile_col = 0; tile_col < cm->tile_cols; ++tile_col)
vp10_encode_tile(cpi, &cpi->td, tile_row, tile_col);
}
......@@ -4497,7 +4494,10 @@ static void encode_frame_internal(VP10_COMP *cpi) {
#endif
// If allowed, encoding tiles in parallel with one thread handling one tile.
if (VPXMIN(cpi->oxcf.max_threads, 1 << cm->log2_tile_cols) > 1)
// TODO(geza.lore): The multi-threaded encoder is not safe with more than
// 1 tile rows, as it uses the single above_context et al arrays from
// cpi->common
if (VPXMIN(cpi->oxcf.max_threads, cm->tile_cols) > 1 && cm->tile_rows == 1)
vp10_encode_tiles_mt(cpi);
else
encode_tiles(cpi);
......
......@@ -788,13 +788,38 @@ void vp10_new_framerate(VP10_COMP *cpi, double framerate) {
static void set_tile_limits(VP10_COMP *cpi) {
VP10_COMMON *const cm = &cpi->common;
#if CONFIG_EXT_TILE
cm->tile_width = clamp(cpi->oxcf.tile_columns, 1, 64) << MI_BLOCK_SIZE_LOG2;
cm->tile_height = clamp(cpi->oxcf.tile_rows, 1, 64) << MI_BLOCK_SIZE_LOG2;
cm->tile_width = VPXMIN(cm->tile_width, cm->mi_cols);
cm->tile_height = VPXMIN(cm->tile_height, cm->mi_rows);
// Get the number of tiles
cm->tile_cols = 1;
while (cm->tile_cols * cm->tile_width < cm->mi_cols)
++cm->tile_cols;
cm->tile_rows = 1;
while (cm->tile_rows * cm->tile_height < cm->mi_rows)
++cm->tile_rows;
#else
int min_log2_tile_cols, max_log2_tile_cols;
vp10_get_tile_n_bits(cm->mi_cols, &min_log2_tile_cols, &max_log2_tile_cols);
cm->log2_tile_cols = clamp(cpi->oxcf.tile_columns,
min_log2_tile_cols, max_log2_tile_cols);
cm->log2_tile_rows = cpi->oxcf.tile_rows;
cm->tile_cols = 1 << cm->log2_tile_cols;
cm->tile_rows = 1 << cm->log2_tile_rows;
cm->tile_width = (mi_cols_aligned_to_sb(cm->mi_cols) >> cm->log2_tile_cols);
cm->tile_height = (mi_cols_aligned_to_sb(cm->mi_rows) >> cm->log2_tile_rows);
// round to integer multiples of 8
cm->tile_width = mi_cols_aligned_to_sb(cm->tile_width);
cm->tile_height = mi_cols_aligned_to_sb(cm->tile_height);
#endif // CONFIG_EXT_TILE
}
static void update_frame_size(VP10_COMP *cpi) {
......@@ -3843,7 +3868,9 @@ static void encode_with_recode_loop(VP10_COMP *cpi,
// to recode.
if (cpi->sf.recode_loop >= ALLOW_RECODE_KFARFGF) {
save_coding_context(cpi);
vp10_pack_bitstream(cpi, dest, size);
rc->projected_frame_size = (int)(*size) << 3;
restore_coding_context(cpi);
......
......@@ -303,6 +303,11 @@ typedef struct SUBFRAME_STATS {
} SUBFRAME_STATS;
#endif // CONFIG_ENTROPY
typedef struct TileBufferEnc {
uint8_t *data;
size_t size;
} TileBufferEnc;
typedef struct VP10_COMP {
QUANTS quants;
ThreadData td;
......@@ -325,9 +330,6 @@ typedef struct VP10_COMP {
EncRefCntBuffer upsampled_ref_bufs[MAX_REF_FRAMES];
int upsampled_ref_idx[MAX_REF_FRAMES];
TileDataEnc *tile_data;
int allocated_tiles; // Keep track of memory allocated for tiles.
// For a still frame, this flag is set to 1 to skip partition search.
int partition_search_skippable_frame;
......@@ -369,9 +371,6 @@ typedef struct VP10_COMP {
YV12_BUFFER_CONFIG last_frame_db;
#endif // CONFIG_LOOP_RESTORATION
TOKENEXTRA *tile_tok[4][1 << 6];
unsigned int tok_count[4][1 << 6];
// Ambient reconstruction err target for force key frames
int64_t ambient_err;
......@@ -444,7 +443,6 @@ typedef struct VP10_COMP {
YV12_BUFFER_CONFIG alt_ref_buffer;
#if CONFIG_INTERNAL_STATS
unsigned int mode_chosen_counts[MAX_MODES];
......@@ -543,6 +541,15 @@ typedef struct VP10_COMP {
int multi_arf_allowed;
int multi_arf_enabled;
int multi_arf_last_grp_enabled;
TileDataEnc *tile_data;
int allocated_tiles; // Keep track of memory allocated for tiles.
TOKENEXTRA *tile_tok[MAX_TILE_ROWS][MAX_TILE_COLS];
unsigned int tok_count[MAX_TILE_ROWS][MAX_TILE_COLS];
TileBufferEnc tile_buffers[MAX_TILE_ROWS][MAX_TILE_COLS];
#if CONFIG_VP9_TEMPORAL_DENOISING
VP9_DENOISER denoiser;
#endif
......@@ -659,7 +666,7 @@ static INLINE YV12_BUFFER_CONFIG *get_ref_frame_buffer(
buf_idx != INVALID_IDX ? &cm->buffer_pool->frame_bufs[buf_idx].buf : NULL;
}
static INLINE int get_token_alloc(int mb_rows, int mb_cols) {
static INLINE unsigned int get_token_alloc(int mb_rows, int mb_cols) {
// TODO(JBB): double check we can't exceed this token count if we have a
// 32x32 transform crossing a boundary at a multiple of 16.
// mb_rows, cols are in units of 16 pixels. We assume 3 planes all at full
......@@ -670,7 +677,7 @@ static INLINE int get_token_alloc(int mb_rows, int mb_cols) {
// Get the allocated token size for a tile. It does the same calculation as in
// the frame token allocation.
static INLINE int allocated_tokens(TileInfo tile) {
static INLINE unsigned int allocated_tokens(TileInfo tile) {
int tile_mb_rows = (tile.mi_row_end - tile.mi_row_start + 1) >> 1;
int tile_mb_cols = (tile.mi_col_end - tile.mi_col_start + 1) >> 1;
......
......@@ -37,8 +37,8 @@ static void accumulate_rd_opt(ThreadData *td, ThreadData *td_t) {
static int enc_worker_hook(EncWorkerData *const thread_data, void *unused) {
VP10_COMP *const cpi = thread_data->cpi;
const VP10_COMMON *const cm = &cpi->common;
const int tile_cols = 1 << cm->log2_tile_cols;
const int tile_rows = 1 << cm->log2_tile_rows;
const int tile_cols = cm->tile_cols;
const int tile_rows = cm->tile_rows;
int t;
(void) unused;
......@@ -56,7 +56,7 @@ static int enc_worker_hook(EncWorkerData *const thread_data, void *unused) {
void vp10_encode_tiles_mt(VP10_COMP *cpi) {
VP10_COMMON *const cm = &cpi->common;
const int tile_cols = 1 << cm->log2_tile_cols;
const int tile_cols = cm->tile_cols;
const VPxWorkerInterface *const winterface = vpx_get_worker_interface();
const int num_workers = VPXMIN(cpi->oxcf.max_threads, tile_cols);
int i;
......@@ -65,24 +65,23 @@ void vp10_encode_tiles_mt(VP10_COMP *cpi) {
// Only run once to create threads and allocate thread data.
if (cpi->num_workers == 0) {
int allocated_workers = num_workers;
CHECK_MEM_ERROR(cm, cpi->workers,
vpx_malloc(allocated_workers * sizeof(*cpi->workers)));
vpx_malloc(num_workers * sizeof(*cpi->workers)));
CHECK_MEM_ERROR(cm, cpi->tile_thr_data,
vpx_calloc(allocated_workers,
vpx_calloc(num_workers,
sizeof(*cpi->tile_thr_data)));
for (i = 0; i < allocated_workers; i++) {
for (i = 0; i < num_workers; i++) {
VPxWorker *const worker = &cpi->workers[i];
EncWorkerData *thread_data = &cpi->tile_thr_data[i];
EncWorkerData *const thread_data = &cpi->tile_thr_data[i];
++cpi->num_workers;
winterface->init(worker);
if (i < allocated_workers - 1) {
thread_data->cpi = cpi;
thread_data->cpi = cpi;
if (i < num_workers - 1) {
// Allocate thread data.
CHECK_MEM_ERROR(cm, thread_data->td,
......@@ -104,7 +103,6 @@ void vp10_encode_tiles_mt(VP10_COMP *cpi) {
"Tile encoder thread creation failed");
} else {
// Main thread acts as a worker and uses the thread data in cpi.
thread_data->cpi = cpi;
thread_data->td = &cpi->td;
}
......
......@@ -199,5 +199,12 @@ void vp10_pick_filter_level(const YV12_BUFFER_CONFIG *sd, VP10_COMP *cpi,
lf->filter_level = vp10_search_filter_level(
sd, cpi, method == LPF_PICK_FROM_SUBIMAGE, NULL);
}
#if CONFIG_EXT_TILE
// TODO(any): 0 loopfilter level is only necessary if individual tile
// decoding is required. We need to communicate this requirement to this
// code and force loop filter level 0 only if required.
lf->filter_level = 0;
#endif // CONFIG_EXT_TILE
}
#endif // !CONFIG_LOOP_RESTORATION
......@@ -305,7 +305,7 @@ void vp10_choose_segmap_coding_method(VP10_COMMON *cm, MACROBLOCKD *xd) {
int no_pred_cost;
int t_pred_cost = INT_MAX;
int i, tile_col, mi_row, mi_col;
int i, tile_col, tile_row, mi_row, mi_col;
unsigned (*temporal_predictor_count)[2] = cm->counts.seg.pred;
unsigned *no_pred_segcounts = cm->counts.seg.tree_total;
......@@ -319,23 +319,28 @@ void vp10_choose_segmap_coding_method(VP10_COMMON *cm, MACROBLOCKD *xd) {
// First of all generate stats regarding how well the last segment map
// predicts this one
for (tile_col = 0; tile_col < 1 << cm->log2_tile_cols; tile_col++) {
TileInfo tile;
MODE_INFO **mi_ptr;
vp10_tile_init(&tile, cm, 0, tile_col);
mi_ptr = cm->mi_grid_visible + tile.mi_col_start;
for (mi_row = 0; mi_row < cm->mi_rows;
mi_row += 8, mi_ptr += 8 * cm->mi_stride) {
MODE_INFO **mi = mi_ptr;
for (mi_col = tile.mi_col_start; mi_col < tile.mi_col_end;
mi_col += 8, mi += 8)
count_segs_sb(cm, xd, &tile, mi, no_pred_segcounts,
temporal_predictor_count, t_unpred_seg_counts,
mi_row, mi_col, BLOCK_64X64);
for (tile_row = 0; tile_row < cm->tile_rows; tile_row++) {
TileInfo tile_info;
vp10_tile_set_row(&tile_info, cm, tile_row);
for (tile_col = 0; tile_col < cm->tile_cols; tile_col++) {
MODE_INFO **mi_ptr;
vp10_tile_set_col(&tile_info, cm, tile_col);
mi_ptr = cm->mi_grid_visible + tile_info.mi_row_start * cm->mi_stride +
tile_info.mi_col_start;
for (mi_row = tile_info.mi_row_start; mi_row < tile_info.mi_row_end;
mi_row += 8, mi_ptr += 8 * cm->mi_stride) {
MODE_INFO **mi = mi_ptr;
for (mi_col = tile_info.mi_col_start; mi_col < tile_info.mi_col_end;
mi_col += 8, mi += 8) {
count_segs_sb(cm, xd, &tile_info, mi, no_pred_segcounts,
temporal_predictor_count, t_unpred_seg_counts,
mi_row, mi_col, BLOCK_64X64);
}
}
}
}
// Work out probability tree for coding segments without prediction
// and the cost.
calc_segtree_probs(no_pred_segcounts, no_pred_tree, segp->tree_probs);
......
......@@ -57,8 +57,13 @@ static struct vp10_extracfg default_extra_cfg = {
0, // noise_sensitivity
0, // sharpness
0, // static_thresh
6, // tile_columns
#if CONFIG_EXT_TILE
64, // tile_columns
64, // tile_rows
#else
0, // tile_columns
0, // tile_rows
#endif // CONFIG_EXT_TILE
7, // arnr_max_frames
5, // arnr_strength
0, // min_gf_interval; 0 -> default decision
......@@ -207,8 +212,16 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx,
RANGE_CHECK(extra_cfg, enable_auto_alt_ref, 0, 2);
RANGE_CHECK(extra_cfg, cpu_used, -8, 8);
RANGE_CHECK_HI(extra_cfg, noise_sensitivity, 6);
#if CONFIG_EXT_TILE
// TODO(any): Waring. If CONFIG_EXT_TILE is true, tile_columns really
// means tile_width, and tile_rows really means tile_hight. The interface
// should be sanitized.
RANGE_CHECK(extra_cfg, tile_columns, 1, 64);
RANGE_CHECK(extra_cfg, tile_rows, 1, 64);
#else
RANGE_CHECK(extra_cfg, tile_columns, 0, 6);
RANGE_CHECK(extra_cfg, tile_rows, 0, 2);
#endif // CONFIG_EXT_TILE
RANGE_CHECK_HI(extra_cfg, sharpness, 7);
RANGE_CHECK(extra_cfg, arnr_max_frames, 0, 15);
RANGE_CHECK_HI(extra_cfg, arnr_strength, 6);
......
......@@ -23,6 +23,7 @@
#include "vp10/common/alloccommon.h"
#include "vp10/common/frame_buffers.h"
#include "vp10/common/enums.h"
#include "vp10/decoder/decoder.h"
#include "vp10/decoder/decodeframe.h"
......@@ -499,6 +500,11 @@ static vpx_codec_err_t decode_one(vpx_codec_alg_priv_t *ctx,
frame_worker_data->pbi->decrypt_cb = ctx->decrypt_cb;
frame_worker_data->pbi->decrypt_state = ctx->decrypt_state;
#if CONFIG_EXT_TILE
frame_worker_data->pbi->dec_tile_row = ctx->cfg.tile_row;
frame_worker_data->pbi->dec_tile_col = ctx->cfg.tile_col;
#endif // CONFIG_EXT_TILE
worker->had_error = 0;
winterface->execute(worker);
......@@ -775,6 +781,39 @@ static vpx_image_t *decoder_get_frame(vpx_codec_alg_priv_t *ctx,
if (ctx->need_resync)
return NULL;
yuvconfig2image(&ctx->img, &sd, frame_worker_data->user_priv);
#if CONFIG_EXT_TILE
if (frame_worker_data->pbi->dec_tile_row >= 0) {
const int tile_row = VPXMIN(frame_worker_data->pbi->dec_tile_row,
cm->tile_rows - 1);
const int mi_row = tile_row * cm->tile_height;
const int ssy = ctx->img.y_chroma_shift;
int plane;
ctx->img.planes[0] += mi_row * MI_SIZE * ctx->img.stride[0];
for (plane = 1; plane < MAX_MB_PLANE; ++plane) {
ctx->img.planes[plane] += mi_row * (MI_SIZE >> ssy) *
ctx->img.stride[plane];
}
ctx->img.d_h = VPXMIN(cm->tile_height, cm->mi_rows - mi_row) *