Commit 490ba1ad authored by Geza Lore's avatar Geza Lore

Port large scale tile coding features from nextgen.

If configured with --enable-ext-tile, the codec uses an alternative
tile coding syntax in the bitstream. Changes include::
 - The maximum number of tile rows and columns is extended to 1024
   each.
 - The minimum tile width/height is 64 pixels (1 superblock).
 - A tile copy mode is added where a tile directly reuse the coded
   data of a previous tile
 - The meaning of the tile-columns and tile-rows codec parameters are
   overloaded to mean tile-width and tile-height in units of 64
   pixels.
 - All tiles should now be independent, including rows within the
   same columns, so large scale parallel, or independent decoding is
   possible.
 - vpxdec also gained the options to decode only a particular tile,
   tile row, or tile column.

Changes without --enable-ext-tile:
 - All tiles should now be independent, including rows within the
   same columns, so large scale parallel, or independent decoding is
   possible.
 - vpxenc default tile configuration changed to use 1 tile column.

Change-Id: I0cd08ad550967ac18622dae5e98ad23d581cb33e
parent b4334460
...@@ -131,28 +131,32 @@ int vp10_alloc_context_buffers(VP10_COMMON *cm, int width, int height) { ...@@ -131,28 +131,32 @@ int vp10_alloc_context_buffers(VP10_COMMON *cm, int width, int height) {
} }
if (cm->above_context_alloc_cols < cm->mi_cols) { if (cm->above_context_alloc_cols < cm->mi_cols) {
// TODO(geza.lore): These are bigger than they need to be.
// cm->tile_width would be enough but it complicates indexing a
// little elsewhere.
const int aligned_mi_cols = mi_cols_aligned_to_sb(cm->mi_cols);
int i; int i;
for (i = 0 ; i < MAX_MB_PLANE ; i++) { for (i = 0 ; i < MAX_MB_PLANE ; i++) {
vpx_free(cm->above_context[i]); vpx_free(cm->above_context[i]);
cm->above_context[i] = (ENTROPY_CONTEXT *)vpx_calloc( cm->above_context[i] = (ENTROPY_CONTEXT *)vpx_calloc(
2 * mi_cols_aligned_to_sb(cm->mi_cols), 2 * aligned_mi_cols, sizeof(*cm->above_context[0]));
sizeof(*cm->above_context[0]));
if (!cm->above_context[i]) goto fail; if (!cm->above_context[i]) goto fail;
} }
vpx_free(cm->above_seg_context); vpx_free(cm->above_seg_context);
cm->above_seg_context = (PARTITION_CONTEXT *)vpx_calloc( cm->above_seg_context = (PARTITION_CONTEXT *)vpx_calloc(
mi_cols_aligned_to_sb(cm->mi_cols), sizeof(*cm->above_seg_context)); aligned_mi_cols, sizeof(*cm->above_seg_context));
if (!cm->above_seg_context) goto fail; if (!cm->above_seg_context) goto fail;
#if CONFIG_VAR_TX #if CONFIG_VAR_TX
vpx_free(cm->above_txfm_context); vpx_free(cm->above_txfm_context);
cm->above_txfm_context = (TXFM_CONTEXT *)vpx_calloc( cm->above_txfm_context = (TXFM_CONTEXT *)vpx_calloc(
mi_cols_aligned_to_sb(cm->mi_cols), sizeof(*cm->above_txfm_context)); aligned_mi_cols, sizeof(*cm->above_txfm_context));
if (!cm->above_txfm_context) goto fail; if (!cm->above_txfm_context) goto fail;
#endif #endif
cm->above_context_alloc_cols = cm->mi_cols; cm->above_context_alloc_cols = aligned_mi_cols;
} }
return 0; return 0;
......
...@@ -26,6 +26,14 @@ extern "C" { ...@@ -26,6 +26,14 @@ extern "C" {
#define MI_MASK (MI_BLOCK_SIZE - 1) #define MI_MASK (MI_BLOCK_SIZE - 1)
#if CONFIG_EXT_TILE
# define MAX_TILE_ROWS 1024
# define MAX_TILE_COLS 1024
#else
# define MAX_TILE_ROWS 4
# define MAX_TILE_COLS 64
#endif // CONFIG_EXT_TILE
// Bitstream profiles indicated by 2-3 bits in the uncompressed header. // Bitstream profiles indicated by 2-3 bits in the uncompressed header.
// 00: Profile 0. 8-bit 4:2:0 only. // 00: Profile 0. 8-bit 4:2:0 only.
// 10: Profile 1. 8-bit 4:4:4, 4:2:2, and 4:4:0. // 10: Profile 1. 8-bit 4:4:4, 4:2:2, and 4:4:0.
......
...@@ -211,10 +211,18 @@ static INLINE int_mv scale_mv(const MB_MODE_INFO *mbmi, int ref, ...@@ -211,10 +211,18 @@ static INLINE int_mv scale_mv(const MB_MODE_INFO *mbmi, int ref,
static INLINE int is_inside(const TileInfo *const tile, static INLINE int is_inside(const TileInfo *const tile,
int mi_col, int mi_row, int mi_rows, int mi_col, int mi_row, int mi_rows,
const POSITION *mi_pos) { const POSITION *mi_pos) {
#if CONFIG_EXT_TILE
(void) mi_rows;
return !(mi_row + mi_pos->row < tile->mi_row_start ||
mi_col + mi_pos->col < tile->mi_col_start ||
mi_row + mi_pos->row >= tile->mi_row_end ||
mi_col + mi_pos->col >= tile->mi_col_end);
#else
return !(mi_row + mi_pos->row < 0 || return !(mi_row + mi_pos->row < 0 ||
mi_col + mi_pos->col < tile->mi_col_start || mi_col + mi_pos->col < tile->mi_col_start ||
mi_row + mi_pos->row >= mi_rows || mi_row + mi_pos->row >= mi_rows ||
mi_col + mi_pos->col >= tile->mi_col_end); mi_col + mi_pos->col >= tile->mi_col_end);
#endif // CONFIG_EXT_TILE
} }
static INLINE void lower_mv_precision(MV *mv, int allow_hp) { static INLINE void lower_mv_precision(MV *mv, int allow_hp) {
......
...@@ -308,8 +308,12 @@ typedef struct VP10Common { ...@@ -308,8 +308,12 @@ typedef struct VP10Common {
int error_resilient_mode; int error_resilient_mode;
#if !CONFIG_EXT_TILE
int log2_tile_cols, log2_tile_rows; int log2_tile_cols, log2_tile_rows;
int tile_sz_mag; #endif // !CONFIG_EXT_TILE
int tile_cols, tile_rows;
int tile_width, tile_height;
int byte_alignment; int byte_alignment;
int skip_loop_filter; int skip_loop_filter;
...@@ -436,7 +440,7 @@ static INLINE void vp10_init_macroblockd(VP10_COMMON *cm, MACROBLOCKD *xd, ...@@ -436,7 +440,7 @@ static INLINE void vp10_init_macroblockd(VP10_COMMON *cm, MACROBLOCKD *xd,
static INLINE void set_skip_context(MACROBLOCKD *xd, int mi_row, int mi_col) { static INLINE void set_skip_context(MACROBLOCKD *xd, int mi_row, int mi_col) {
const int above_idx = mi_col * 2; const int above_idx = mi_col * 2;
const int left_idx = (mi_row * 2) & 15; const int left_idx = (mi_row * 2) & 15; // FIXME: Mask should be CU_SIZE*2-1
int i; int i;
for (i = 0; i < MAX_MB_PLANE; ++i) { for (i = 0; i < MAX_MB_PLANE; ++i) {
struct macroblockd_plane *const pd = &xd->plane[i]; struct macroblockd_plane *const pd = &xd->plane[i];
...@@ -460,7 +464,11 @@ static INLINE void set_mi_row_col(MACROBLOCKD *xd, const TileInfo *const tile, ...@@ -460,7 +464,11 @@ static INLINE void set_mi_row_col(MACROBLOCKD *xd, const TileInfo *const tile,
xd->mb_to_right_edge = ((mi_cols - bw - mi_col) * MI_SIZE) * 8; xd->mb_to_right_edge = ((mi_cols - bw - mi_col) * MI_SIZE) * 8;
// Are edges available for intra prediction? // Are edges available for intra prediction?
#if CONFIG_EXT_TILE
xd->up_available = (mi_row > tile->mi_row_start);
#else
xd->up_available = (mi_row != 0); xd->up_available = (mi_row != 0);
#endif // CONFIG_EXT_TILE
xd->left_available = (mi_col > tile->mi_col_start); xd->left_available = (mi_col > tile->mi_col_start);
if (xd->up_available) { if (xd->up_available) {
xd->above_mi = xd->mi[-xd->mi_stride]; xd->above_mi = xd->mi[-xd->mi_stride];
...@@ -586,11 +594,18 @@ static INLINE int partition_plane_context(const MACROBLOCKD *xd, ...@@ -586,11 +594,18 @@ static INLINE int partition_plane_context(const MACROBLOCKD *xd,
static INLINE void vp10_zero_above_context(VP10_COMMON *const cm, static INLINE void vp10_zero_above_context(VP10_COMMON *const cm,
int mi_col_start, int mi_col_end) { int mi_col_start, int mi_col_end) {
const int width = mi_col_end - mi_col_start; const int width = mi_col_end - mi_col_start;
int i;
for (i = 0 ; i < MAX_MB_PLANE ; i++) const int offset_y = 2 * mi_col_start;
vp10_zero_array(cm->above_context[i] + 2 * mi_col_start, 2 * width); const int width_y = 2 * width;
const int offset_uv = offset_y >> cm->subsampling_x;
const int width_uv = width_y >> cm->subsampling_x;
vp10_zero_array(cm->above_context[0] + offset_y, width_y);
vp10_zero_array(cm->above_context[1] + offset_uv, width_uv);
vp10_zero_array(cm->above_context[2] + offset_uv, width_uv);
vp10_zero_array(cm->above_seg_context + mi_col_start, width); vp10_zero_array(cm->above_seg_context + mi_col_start, width);
#if CONFIG_VAR_TX #if CONFIG_VAR_TX
vp10_zero_array(cm->above_txfm_context + mi_col_start, width); vp10_zero_array(cm->above_txfm_context + mi_col_start, width);
#endif // CONFIG_VAR_TX #endif // CONFIG_VAR_TX
......
...@@ -172,7 +172,7 @@ static void loop_filter_rows_mt(YV12_BUFFER_CONFIG *frame, ...@@ -172,7 +172,7 @@ static void loop_filter_rows_mt(YV12_BUFFER_CONFIG *frame,
const int sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2; const int sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2;
// Decoder may allocate more threads than number of tiles based on user's // Decoder may allocate more threads than number of tiles based on user's
// input. // input.
const int tile_cols = 1 << cm->log2_tile_cols; const int tile_cols = cm->tile_cols;
const int num_workers = VPXMIN(nworkers, tile_cols); const int num_workers = VPXMIN(nworkers, tile_cols);
int i; int i;
......
...@@ -15,20 +15,16 @@ ...@@ -15,20 +15,16 @@
#define MIN_TILE_WIDTH_B64 4 #define MIN_TILE_WIDTH_B64 4
#define MAX_TILE_WIDTH_B64 64 #define MAX_TILE_WIDTH_B64 64
static int get_tile_offset(int idx, int mis, int log2) {
const int sb_cols = mi_cols_aligned_to_sb(mis) >> MI_BLOCK_SIZE_LOG2;
const int offset = ((idx * sb_cols) >> log2) << MI_BLOCK_SIZE_LOG2;
return VPXMIN(offset, mis);
}
void vp10_tile_set_row(TileInfo *tile, const VP10_COMMON *cm, int row) { void vp10_tile_set_row(TileInfo *tile, const VP10_COMMON *cm, int row) {
tile->mi_row_start = get_tile_offset(row, cm->mi_rows, cm->log2_tile_rows); tile->mi_row_start = row * cm->tile_height;
tile->mi_row_end = get_tile_offset(row + 1, cm->mi_rows, cm->log2_tile_rows); tile->mi_row_end = VPXMIN(tile->mi_row_start + cm->tile_height,
cm->mi_rows);
} }
void vp10_tile_set_col(TileInfo *tile, const VP10_COMMON *cm, int col) { void vp10_tile_set_col(TileInfo *tile, const VP10_COMMON *cm, int col) {
tile->mi_col_start = get_tile_offset(col, cm->mi_cols, cm->log2_tile_cols); tile->mi_col_start = col * cm->tile_width;
tile->mi_col_end = get_tile_offset(col + 1, cm->mi_cols, cm->log2_tile_cols); tile->mi_col_end = VPXMIN(tile->mi_col_start + cm->tile_width,
cm->mi_cols);
} }
void vp10_tile_init(TileInfo *tile, const VP10_COMMON *cm, int row, int col) { void vp10_tile_init(TileInfo *tile, const VP10_COMMON *cm, int row, int col) {
...@@ -36,6 +32,8 @@ void vp10_tile_init(TileInfo *tile, const VP10_COMMON *cm, int row, int col) { ...@@ -36,6 +32,8 @@ void vp10_tile_init(TileInfo *tile, const VP10_COMMON *cm, int row, int col) {
vp10_tile_set_col(tile, cm, col); vp10_tile_set_col(tile, cm, col);
} }
#if !CONFIG_EXT_TILE
// TODO(geza.lore): CU_SIZE dependent.
static int get_min_log2_tile_cols(const int sb64_cols) { static int get_min_log2_tile_cols(const int sb64_cols) {
int min_log2 = 0; int min_log2 = 0;
while ((MAX_TILE_WIDTH_B64 << min_log2) < sb64_cols) while ((MAX_TILE_WIDTH_B64 << min_log2) < sb64_cols)
...@@ -51,9 +49,10 @@ static int get_max_log2_tile_cols(const int sb64_cols) { ...@@ -51,9 +49,10 @@ static int get_max_log2_tile_cols(const int sb64_cols) {
} }
void vp10_get_tile_n_bits(int mi_cols, void vp10_get_tile_n_bits(int mi_cols,
int *min_log2_tile_cols, int *max_log2_tile_cols) { int *min_log2_tile_cols, int *max_log2_tile_cols) {
const int sb64_cols = mi_cols_aligned_to_sb(mi_cols) >> MI_BLOCK_SIZE_LOG2; const int sb64_cols = mi_cols_aligned_to_sb(mi_cols) >> MI_BLOCK_SIZE_LOG2;
*min_log2_tile_cols = get_min_log2_tile_cols(sb64_cols); *min_log2_tile_cols = get_min_log2_tile_cols(sb64_cols);
*max_log2_tile_cols = get_max_log2_tile_cols(sb64_cols); *max_log2_tile_cols = get_max_log2_tile_cols(sb64_cols);
assert(*min_log2_tile_cols <= *max_log2_tile_cols); assert(*min_log2_tile_cols <= *max_log2_tile_cols);
} }
#endif // !CONFIG_EXT_TILE
This diff is collapsed.
...@@ -57,6 +57,12 @@ typedef struct TileWorkerData { ...@@ -57,6 +57,12 @@ typedef struct TileWorkerData {
struct vpx_internal_error_info error_info; struct vpx_internal_error_info error_info;
} TileWorkerData; } TileWorkerData;
typedef struct TileBufferDec {
const uint8_t *data;
size_t size;
int col; // only used with multi-threaded decoding
} TileBufferDec;
typedef struct VP10Decoder { typedef struct VP10Decoder {
DECLARE_ALIGNED(16, MACROBLOCKD, mb); DECLARE_ALIGNED(16, MACROBLOCKD, mb);
...@@ -78,7 +84,9 @@ typedef struct VP10Decoder { ...@@ -78,7 +84,9 @@ typedef struct VP10Decoder {
int num_tile_workers; int num_tile_workers;
TileData *tile_data; TileData *tile_data;
int total_tiles; int allocated_tiles;
TileBufferDec tile_buffers[MAX_TILE_ROWS][MAX_TILE_COLS];
VP9LfSync lf_row_sync; VP9LfSync lf_row_sync;
...@@ -89,6 +97,12 @@ typedef struct VP10Decoder { ...@@ -89,6 +97,12 @@ typedef struct VP10Decoder {
int inv_tile_order; int inv_tile_order;
int need_resync; // wait for key/intra-only frame. int need_resync; // wait for key/intra-only frame.
int hold_ref_buf; // hold the reference buffer. int hold_ref_buf; // hold the reference buffer.
int tile_size_bytes;
#if CONFIG_EXT_TILE
int tile_col_size_bytes;
int dec_tile_row, dec_tile_col;
#endif // CONFIG_EXT_TILE
} VP10Decoder; } VP10Decoder;
int vp10_receive_compressed_data(struct VP10Decoder *pbi, int vp10_receive_compressed_data(struct VP10Decoder *pbi,
......
This diff is collapsed.
...@@ -18,7 +18,6 @@ extern "C" { ...@@ -18,7 +18,6 @@ extern "C" {
#include "vp10/encoder/encoder.h" #include "vp10/encoder/encoder.h"
void vp10_encode_token_init();
void vp10_pack_bitstream(VP10_COMP *const cpi, uint8_t *dest, size_t *size); void vp10_pack_bitstream(VP10_COMP *const cpi, uint8_t *dest, size_t *size);
void vp10_encode_token_init(); void vp10_encode_token_init();
......
...@@ -4144,12 +4144,13 @@ static void encode_rd_sb_row(VP10_COMP *cpi, ...@@ -4144,12 +4144,13 @@ static void encode_rd_sb_row(VP10_COMP *cpi,
int mi_row, int mi_row,
TOKENEXTRA **tp) { TOKENEXTRA **tp) {
VP10_COMMON *const cm = &cpi->common; VP10_COMMON *const cm = &cpi->common;
TileInfo *const tile_info = &tile_data->tile_info; const TileInfo *const tile_info = &tile_data->tile_info;
MACROBLOCK *const x = &td->mb; MACROBLOCK *const x = &td->mb;
MACROBLOCKD *const xd = &x->e_mbd; MACROBLOCKD *const xd = &x->e_mbd;
SPEED_FEATURES *const sf = &cpi->sf; SPEED_FEATURES *const sf = &cpi->sf;
int mi_col; int mi_col;
// Initialize the left context for the new SB row
vp10_zero_left_context(xd); vp10_zero_left_context(xd);
// Code each SB in the row // Code each SB in the row
...@@ -4269,14 +4270,11 @@ static void init_encode_frame_mb_context(VP10_COMP *cpi) { ...@@ -4269,14 +4270,11 @@ static void init_encode_frame_mb_context(VP10_COMP *cpi) {
MACROBLOCK *const x = &cpi->td.mb; MACROBLOCK *const x = &cpi->td.mb;
VP10_COMMON *const cm = &cpi->common; VP10_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &x->e_mbd; MACROBLOCKD *const xd = &x->e_mbd;
const int aligned_mi_cols = mi_cols_aligned_to_sb(cm->mi_cols);
// Copy data over into macro block data structures. // Copy data over into macro block data structures.
vp10_setup_src_planes(x, cpi->Source, 0, 0); vp10_setup_src_planes(x, cpi->Source, 0, 0);
vp10_setup_block_planes(xd, cm->subsampling_x, cm->subsampling_y); vp10_setup_block_planes(xd, cm->subsampling_x, cm->subsampling_y);
vp10_zero_above_context(cm, 0, aligned_mi_cols);
} }
static int check_dual_ref_flags(VP10_COMP *cpi) { static int check_dual_ref_flags(VP10_COMP *cpi) {
...@@ -4338,11 +4336,11 @@ static TX_MODE select_tx_mode(const VP10_COMP *cpi, MACROBLOCKD *const xd) { ...@@ -4338,11 +4336,11 @@ static TX_MODE select_tx_mode(const VP10_COMP *cpi, MACROBLOCKD *const xd) {
void vp10_init_tile_data(VP10_COMP *cpi) { void vp10_init_tile_data(VP10_COMP *cpi) {
VP10_COMMON *const cm = &cpi->common; VP10_COMMON *const cm = &cpi->common;
const int tile_cols = 1 << cm->log2_tile_cols; const int tile_cols = cm->tile_cols;
const int tile_rows = 1 << cm->log2_tile_rows; const int tile_rows = cm->tile_rows;
int tile_col, tile_row; int tile_col, tile_row;
TOKENEXTRA *pre_tok = cpi->tile_tok[0][0]; TOKENEXTRA *pre_tok = cpi->tile_tok[0][0];
int tile_tok = 0; unsigned int tile_tok = 0;
if (cpi->tile_data == NULL || cpi->allocated_tiles < tile_cols * tile_rows) { if (cpi->tile_data == NULL || cpi->allocated_tiles < tile_cols * tile_rows) {
if (cpi->tile_data != NULL) if (cpi->tile_data != NULL)
...@@ -4353,7 +4351,7 @@ void vp10_init_tile_data(VP10_COMP *cpi) { ...@@ -4353,7 +4351,7 @@ void vp10_init_tile_data(VP10_COMP *cpi) {
for (tile_row = 0; tile_row < tile_rows; ++tile_row) for (tile_row = 0; tile_row < tile_rows; ++tile_row)
for (tile_col = 0; tile_col < tile_cols; ++tile_col) { for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
TileDataEnc *tile_data = TileDataEnc *const tile_data =
&cpi->tile_data[tile_row * tile_cols + tile_col]; &cpi->tile_data[tile_row * tile_cols + tile_col];
int i, j; int i, j;
for (i = 0; i < BLOCK_SIZES; ++i) { for (i = 0; i < BLOCK_SIZES; ++i) {
...@@ -4367,7 +4365,7 @@ void vp10_init_tile_data(VP10_COMP *cpi) { ...@@ -4367,7 +4365,7 @@ void vp10_init_tile_data(VP10_COMP *cpi) {
for (tile_row = 0; tile_row < tile_rows; ++tile_row) { for (tile_row = 0; tile_row < tile_rows; ++tile_row) {
for (tile_col = 0; tile_col < tile_cols; ++tile_col) { for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
TileInfo *tile_info = TileInfo *const tile_info =
&cpi->tile_data[tile_row * tile_cols + tile_col].tile_info; &cpi->tile_data[tile_row * tile_cols + tile_col].tile_info;
vp10_tile_init(tile_info, cm, tile_row, tile_col); vp10_tile_init(tile_info, cm, tile_row, tile_col);
...@@ -4381,13 +4379,14 @@ void vp10_init_tile_data(VP10_COMP *cpi) { ...@@ -4381,13 +4379,14 @@ void vp10_init_tile_data(VP10_COMP *cpi) {
void vp10_encode_tile(VP10_COMP *cpi, ThreadData *td, void vp10_encode_tile(VP10_COMP *cpi, ThreadData *td,
int tile_row, int tile_col) { int tile_row, int tile_col) {
VP10_COMMON *const cm = &cpi->common; VP10_COMMON *const cm = &cpi->common;
const int tile_cols = 1 << cm->log2_tile_cols; TileDataEnc *const this_tile =
TileDataEnc *this_tile = &cpi->tile_data[tile_row * cm->tile_cols + tile_col];
&cpi->tile_data[tile_row * tile_cols + tile_col];
const TileInfo * const tile_info = &this_tile->tile_info; const TileInfo * const tile_info = &this_tile->tile_info;
TOKENEXTRA *tok = cpi->tile_tok[tile_row][tile_col]; TOKENEXTRA *tok = cpi->tile_tok[tile_row][tile_col];
int mi_row; int mi_row;
vp10_zero_above_context(cm, tile_info->mi_col_start, tile_info->mi_col_end);
// Set up pointers to per thread motion search counters. // Set up pointers to per thread motion search counters.
td->mb.m_search_count_ptr = &td->rd_counts.m_search_count; td->mb.m_search_count_ptr = &td->rd_counts.m_search_count;
td->mb.ex_search_count_ptr = &td->rd_counts.ex_search_count; td->mb.ex_search_count_ptr = &td->rd_counts.ex_search_count;
...@@ -4396,22 +4395,20 @@ void vp10_encode_tile(VP10_COMP *cpi, ThreadData *td, ...@@ -4396,22 +4395,20 @@ void vp10_encode_tile(VP10_COMP *cpi, ThreadData *td,
mi_row += MI_BLOCK_SIZE) { mi_row += MI_BLOCK_SIZE) {
encode_rd_sb_row(cpi, td, this_tile, mi_row, &tok); encode_rd_sb_row(cpi, td, this_tile, mi_row, &tok);
} }
cpi->tok_count[tile_row][tile_col] = cpi->tok_count[tile_row][tile_col] =
(unsigned int)(tok - cpi->tile_tok[tile_row][tile_col]); (unsigned int)(tok - cpi->tile_tok[tile_row][tile_col]);
assert(tok - cpi->tile_tok[tile_row][tile_col] <= assert(cpi->tok_count[tile_row][tile_col] <= allocated_tokens(*tile_info));
allocated_tokens(*tile_info));
} }
static void encode_tiles(VP10_COMP *cpi) { static void encode_tiles(VP10_COMP *cpi) {
VP10_COMMON *const cm = &cpi->common; VP10_COMMON *const cm = &cpi->common;
const int tile_cols = 1 << cm->log2_tile_cols;
const int tile_rows = 1 << cm->log2_tile_rows;
int tile_col, tile_row; int tile_col, tile_row;
vp10_init_tile_data(cpi); vp10_init_tile_data(cpi);
for (tile_row = 0; tile_row < tile_rows; ++tile_row) for (tile_row = 0; tile_row < cm->tile_rows; ++tile_row)
for (tile_col = 0; tile_col < tile_cols; ++tile_col) for (tile_col = 0; tile_col < cm->tile_cols; ++tile_col)
vp10_encode_tile(cpi, &cpi->td, tile_row, tile_col); vp10_encode_tile(cpi, &cpi->td, tile_row, tile_col);
} }
...@@ -4497,7 +4494,10 @@ static void encode_frame_internal(VP10_COMP *cpi) { ...@@ -4497,7 +4494,10 @@ static void encode_frame_internal(VP10_COMP *cpi) {
#endif #endif
// If allowed, encoding tiles in parallel with one thread handling one tile. // If allowed, encoding tiles in parallel with one thread handling one tile.
if (VPXMIN(cpi->oxcf.max_threads, 1 << cm->log2_tile_cols) > 1) // TODO(geza.lore): The multi-threaded encoder is not safe with more than
// 1 tile rows, as it uses the single above_context et al arrays from
// cpi->common
if (VPXMIN(cpi->oxcf.max_threads, cm->tile_cols) > 1 && cm->tile_rows == 1)
vp10_encode_tiles_mt(cpi); vp10_encode_tiles_mt(cpi);
else else
encode_tiles(cpi); encode_tiles(cpi);
......
...@@ -788,13 +788,38 @@ void vp10_new_framerate(VP10_COMP *cpi, double framerate) { ...@@ -788,13 +788,38 @@ void vp10_new_framerate(VP10_COMP *cpi, double framerate) {
static void set_tile_limits(VP10_COMP *cpi) { static void set_tile_limits(VP10_COMP *cpi) {
VP10_COMMON *const cm = &cpi->common; VP10_COMMON *const cm = &cpi->common;
#if CONFIG_EXT_TILE
cm->tile_width = clamp(cpi->oxcf.tile_columns, 1, 64) << MI_BLOCK_SIZE_LOG2;
cm->tile_height = clamp(cpi->oxcf.tile_rows, 1, 64) << MI_BLOCK_SIZE_LOG2;
cm->tile_width = VPXMIN(cm->tile_width, cm->mi_cols);
cm->tile_height = VPXMIN(cm->tile_height, cm->mi_rows);
// Get the number of tiles
cm->tile_cols = 1;
while (cm->tile_cols * cm->tile_width < cm->mi_cols)
++cm->tile_cols;
cm->tile_rows = 1;
while (cm->tile_rows * cm->tile_height < cm->mi_rows)
++cm->tile_rows;
#else
int min_log2_tile_cols, max_log2_tile_cols; int min_log2_tile_cols, max_log2_tile_cols;
vp10_get_tile_n_bits(cm->mi_cols, &min_log2_tile_cols, &max_log2_tile_cols); vp10_get_tile_n_bits(cm->mi_cols, &min_log2_tile_cols, &max_log2_tile_cols);
cm->log2_tile_cols = clamp(cpi->oxcf.tile_columns, cm->log2_tile_cols = clamp(cpi->oxcf.tile_columns,
min_log2_tile_cols, max_log2_tile_cols); min_log2_tile_cols, max_log2_tile_cols);
cm->log2_tile_rows = cpi->oxcf.tile_rows; cm->log2_tile_rows = cpi->oxcf.tile_rows;
cm->tile_cols = 1 << cm->log2_tile_cols;
cm->tile_rows = 1 << cm->log2_tile_rows;
cm->tile_width = (mi_cols_aligned_to_sb(cm->mi_cols) >> cm->log2_tile_cols);
cm->tile_height = (mi_cols_aligned_to_sb(cm->mi_rows) >> cm->log2_tile_rows);
// round to integer multiples of 8
cm->tile_width = mi_cols_aligned_to_sb(cm->tile_width);
cm->tile_height = mi_cols_aligned_to_sb(cm->tile_height);
#endif // CONFIG_EXT_TILE
} }
static void update_frame_size(VP10_COMP *cpi) { static void update_frame_size(VP10_COMP *cpi) {
...@@ -3843,7 +3868,9 @@ static void encode_with_recode_loop(VP10_COMP *cpi, ...@@ -3843,7 +3868,9 @@ static void encode_with_recode_loop(VP10_COMP *cpi,
// to recode. // to recode.
if (cpi->sf.recode_loop >= ALLOW_RECODE_KFARFGF) { if (cpi->sf.recode_loop >= ALLOW_RECODE_KFARFGF) {
save_coding_context(cpi); save_coding_context(cpi);
vp10_pack_bitstream(cpi, dest, size); vp10_pack_bitstream(cpi, dest, size);
rc->projected_frame_size = (int)(*size) << 3; rc->projected_frame_size = (int)(*size) << 3;
restore_coding_context(cpi); restore_coding_context(cpi);
......
...@@ -303,6 +303,11 @@ typedef struct SUBFRAME_STATS { ...@@ -303,6 +303,11 @@ typedef struct SUBFRAME_STATS {
} SUBFRAME_STATS; } SUBFRAME_STATS;
#endif // CONFIG_ENTROPY #endif // CONFIG_ENTROPY
typedef struct TileBufferEnc {
uint8_t *data;
size_t size;
} TileBufferEnc;
typedef struct VP10_COMP { typedef struct VP10_COMP {
QUANTS quants; QUANTS quants;
ThreadData td; ThreadData td;
...@@ -325,9 +330,6 @@ typedef struct VP10_COMP { ...@@ -325,9 +330,6 @@ typedef struct VP10_COMP {
EncRefCntBuffer upsampled_ref_bufs[MAX_REF_FRAMES]; EncRefCntBuffer upsampled_ref_bufs[MAX_REF_FRAMES];
int upsampled_ref_idx[MAX_REF_FRAMES]; int upsampled_ref_idx[MAX_REF_FRAMES];
TileDataEnc *tile_data;
int allocated_tiles; // Keep track of memory allocated for tiles.
// For a still frame, this flag is set to 1 to skip partition search. // For a still frame, this flag is set to 1 to skip partition search.
int partition_search_skippable_frame; int partition_search_skippable_frame;
...@@ -369,9 +371,6 @@ typedef struct VP10_COMP { ...@@ -369,9 +371,6 @@ typedef struct VP10_COMP {
YV12_BUFFER_CONFIG last_frame_db; YV12_BUFFER_CONFIG last_frame_db;
#endif // CONFIG_LOOP_RESTORATION #endif // CONFIG_LOOP_RESTORATION
TOKENEXTRA *tile_tok[4][1 << 6];
unsigned int tok_count[4][1 << 6];
// Ambient reconstruction err target for force key frames // Ambient reconstruction err target for force key frames
int64_t ambient_err; int64_t ambient_err;
...@@ -444,7 +443,6 @@ typedef struct VP10_COMP { ...@@ -444,7 +443,6 @@ typedef struct VP10_COMP {
YV12_BUFFER_CONFIG alt_ref_buffer; YV12_BUFFER_CONFIG alt_ref_buffer;
#if CONFIG_INTERNAL_STATS #if CONFIG_INTERNAL_STATS
unsigned int mode_chosen_counts[MAX_MODES]; unsigned int mode_chosen_counts[MAX_MODES];
...@@ -543,6 +541,15 @@ typedef struct VP10_COMP { ...@@ -543,6 +541,15 @@ typedef struct VP10_COMP {
int multi_arf_allowed; int multi_arf_allowed;
int multi_arf_enabled; int multi_arf_enabled;
int multi_arf_last_grp_enabled; int multi_arf_last_grp_enabled;
TileDataEnc *tile_data;
int allocated_tiles; // Keep track of memory allocated for tiles.
TOKENEXTRA *tile_tok[MAX_TILE_ROWS][MAX_TILE_COLS];
unsigned int tok_count[MAX_TILE_ROWS][MAX_TILE_COLS];
TileBufferEnc tile_buffers[MAX_TILE_ROWS][MAX_TILE_COLS];
#if CONFIG_VP9_TEMPORAL_DENOISING #if CONFIG_VP9_TEMPORAL_DENOISING
VP9_DENOISER denoiser; VP9_DENOISER denoiser;
#endif #endif
...@@ -659,7 +666,7 @@ static INLINE YV12_BUFFER_CONFIG *get_ref_frame_buffer( ...@@ -659,7 +666,7 @@ static INLINE YV12_BUFFER_CONFIG *get_ref_frame_buffer(
buf_idx != INVALID_IDX ? &cm->buffer_pool->frame_bufs[buf_idx].buf : NULL; buf_idx != INVALID_IDX ? &cm->buffer_pool->frame_bufs[buf_idx].buf : NULL;
} }
static INLINE int get_token_alloc(int mb_rows, int mb_cols) { static INLINE unsigned int get_token_alloc(int mb_rows, int mb_cols) {
// TODO(JBB): double check we can't exceed this token count if we have a // TODO(JBB): double check we can't exceed this token count if we have a
// 32x32 transform crossing a boundary at a multiple of 16. // 32x32 transform crossing a boundary at a multiple of 16.
// mb_rows, cols are in units of 16 pixels. We assume 3 planes all at full // mb_rows, cols are in units of 16 pixels. We assume 3 planes all at full
...@@ -670,7 +677,7 @@ static INLINE int get_token_alloc(int mb_rows, int mb_cols) { ...@@ -670,7 +677,7 @@ static INLINE int get_token_alloc(int mb_rows, int mb_cols) {
// Get the allocated token size for a tile. It does the same calculation as in // Get the allocated token size for a tile. It does the same calculation as in