Commit 454989ff authored by Geza Lore's avatar Geza Lore

Make superblock size variable at the frame level.

The uncompressed frame header contains a bit to signal whether the
frame is encoded using 64x64 or 128x128 superblocks. This can vary
between any 2 frames.

vpxenc gained the --sb-size={64,128,dynamic} option, which allows the
configuration of the superblock size used (default is dynamic). 64/128
will force the encoder to always use the specified superblock size.
Dynamic would enable the encoder to choose the sb size for each
frame, but this is not implemented yet (dynamic does the same as 128
for now).

Constraints on tile sizes depend on the superblock size, the following
is a summary of the current bitstream syntax and semantics:

If both --enable-ext-tile is OFF and --enable-ext-partition is OFF:
     The tile coding in this case is the same as VP9. In particular,
     tiles have a minimum width of 256 pixels and a maximum width of
     4096 pixels. The tile width must be multiples of 64 pixels
     (except for the rightmost tile column). There can be a maximum
     of 64 tile columns and 4 tile rows.

If --enable-ext-tile is OFF and --enable-ext-partition is ON:
     Same constraints as above, except that tile width must be
     multiples of 128 pixels (except for the rightmost tile column).

There is no change in the bitstream syntax used for coding the tile
configuration if --enable-ext-tile is OFF.

If --enable-ext-tile is ON and --enable-ext-partition is ON:
     This is the new large scale tile coding configuration. The
     minimum/maximum tile width and height are 64/4096 pixels. Tile
     width and height must be multiples of 64 pixels. The uncompressed
     header contains two 6 bit fields that hold the tile width/heigh
     in units of 64 pixels. The maximum number of tile rows/columns
     is only limited by the maximum frame size of 65536x65536 pixels
     that can be coded in the bitstream. This yields a maximum of
     1024x1024 tile rows and columns (of 64x64 tiles in a 65536x65536
     frame).

If both --enable-ext-tile is ON and --enable-ext-partition is ON:
     Same applies as above, except that in the bitstream the 2 fields
     containing the tile width/height are in units of the superblock
     size, and the superblock size itself is also coded in the bitstream.
     If the uncompressed header signals the use of 64x64 superblocks,
     then the tile width/height fields are 6 bits wide and are in units
     of 64 pixels. If the uncompressed header signals the use of 128x128
     superblocks, then the tile width/height fields are 5 bits wide and
     are in units of 128 pixels.

The above is a summary of the bitstream. The user interface to vpxenc
(and the equivalent encoder API) behaves a follows:

If --enable-ext-tile is OFF:
     No change in the user interface. --tile-columns and --tile-rows
     specify the base 2 logarithm of the desired number of tile columns
     and tile rows. The actual number of tile rows and tile columns,
     and the particular tile width and tile height are computed by the
     codec ensuring all of the above constraints are respected.

If --enable-ext-tile is ON, but --enable-ext-partition is OFF:
     No change in the user interface. --tile-columns and --tile-rows
     specify the WIDTH and HEIGHT of the tiles in unit of 64 pixels.
     The valid values are in the range [1, 64] (which corresponds to
     [64, 4096] pixels in increments of 64.

If both --enable-ext-tile is ON and --enable-ext-partition is ON:
     If --sb-size=64 (default):
         The user interface is the same as in the previous point.
         --tile-columns and --tile-rows specify tile WIDTH and HEIGHT,
         in units of 64 pixels, in the range [1, 64] (which corresponds
         to [64, 4096] pixels in increments of 64).
     If --sb-size=128 or --sb-size=dynamic:
         --tile-columns and --tile-rows specify tile WIDTH and HEIGHT,
         in units of 128 pixels in the range [1, 32] (which corresponds
         to [128, 4096] pixels in increments of 128).

Change-Id: Idc9beee1ad12ff1634e83671985d14c680f9179a
parent 6161f350
......@@ -134,7 +134,8 @@ int vp10_alloc_context_buffers(VP10_COMMON *cm, int width, int height) {
// TODO(geza.lore): These are bigger than they need to be.
// cm->tile_width would be enough but it complicates indexing a
// little elsewhere.
const int aligned_mi_cols = mi_cols_aligned_to_sb(cm->mi_cols);
const int aligned_mi_cols =
ALIGN_POWER_OF_TWO(cm->mi_cols, MAX_MIB_SIZE_LOG2);
int i;
for (i = 0 ; i < MAX_MB_PLANE ; i++) {
......
......@@ -41,6 +41,7 @@ extern "C" {
#define MAX_MIB_MASK (MAX_MIB_SIZE - 1)
#define MAX_MIB_MASK_2 (MAX_MIB_SIZE * 2 - 1)
// Maximum number of tile rows and tile columns
#if CONFIG_EXT_TILE
# define MAX_TILE_ROWS 1024
# define MAX_TILE_COLS 1024
......
......@@ -863,10 +863,8 @@ void vp10_setup_mask(VP10_COMMON *const cm, const int mi_row, const int mi_col,
const int shift_32_uv[] = {0, 2, 8, 10};
const int shift_16_uv[] = {0, 1, 4, 5};
int i;
const int max_rows = (mi_row + MAX_MIB_SIZE > cm->mi_rows ?
cm->mi_rows - mi_row : MAX_MIB_SIZE);
const int max_cols = (mi_col + MAX_MIB_SIZE > cm->mi_cols ?
cm->mi_cols - mi_col : MAX_MIB_SIZE);
const int max_rows = VPXMIN(cm->mi_rows - mi_row, MAX_MIB_SIZE);
const int max_cols = VPXMIN(cm->mi_cols - mi_col, MAX_MIB_SIZE);
#if CONFIG_EXT_PARTITION
assert(0 && "Not yet updated");
#endif // CONFIG_EXT_PARTITION
......@@ -1206,13 +1204,12 @@ static void highbd_filter_selectively_vert(uint16_t *s, int pitch,
void vp10_filter_block_plane_non420(VP10_COMMON *cm,
struct macroblockd_plane *plane,
MODE_INFO **mi_8x8,
MODE_INFO **mib,
int mi_row, int mi_col) {
const int ss_x = plane->subsampling_x;
const int ss_y = plane->subsampling_y;
const int row_step = 1 << ss_y;
const int col_step = 1 << ss_x;
const int row_step_stride = cm->mi_stride * row_step;
struct buf_2d *const dst = &plane->dst;
uint8_t* const dst0 = dst->buf;
unsigned int mask_16x16[MAX_MIB_SIZE] = {0};
......@@ -1222,15 +1219,15 @@ void vp10_filter_block_plane_non420(VP10_COMMON *cm,
uint8_t lfl[MAX_MIB_SIZE][MAX_MIB_SIZE];
int r, c;
for (r = 0; r < MAX_MIB_SIZE && mi_row + r < cm->mi_rows; r += row_step) {
for (r = 0; r < cm->mib_size && mi_row + r < cm->mi_rows; r += row_step) {
unsigned int mask_16x16_c = 0;
unsigned int mask_8x8_c = 0;
unsigned int mask_4x4_c = 0;
unsigned int border_mask;
// Determine the vertical edges that need filtering
for (c = 0; c < MAX_MIB_SIZE && mi_col + c < cm->mi_cols; c += col_step) {
const MODE_INFO *mi = mi_8x8[c];
for (c = 0; c < cm->mib_size && mi_col + c < cm->mi_cols; c += col_step) {
const MODE_INFO *mi = mib[c];
const MB_MODE_INFO *mbmi = &mi[0].mbmi;
const BLOCK_SIZE sb_type = mbmi->sb_type;
const int skip_this = mbmi->skip && is_inter_block(mbmi);
......@@ -1378,13 +1375,13 @@ void vp10_filter_block_plane_non420(VP10_COMMON *cm,
mask_4x4_int[r],
&cm->lf_info, &lfl[r][0]);
#endif // CONFIG_VP9_HIGHBITDEPTH
dst->buf += 8 * dst->stride;
mi_8x8 += row_step_stride;
dst->buf += MI_SIZE * dst->stride;
mib += row_step * cm->mi_stride;
}
// Now do horizontal pass
dst->buf = dst0;
for (r = 0; r < MAX_MIB_SIZE && mi_row + r < cm->mi_rows; r += row_step) {
for (r = 0; r < cm->mib_size && mi_row + r < cm->mi_rows; r += row_step) {
const int skip_border_4x4_r = ss_y && mi_row + r == cm->mi_rows - 1;
const unsigned int mask_4x4_int_r = skip_border_4x4_r ? 0 : mask_4x4_int[r];
......@@ -1428,7 +1425,7 @@ void vp10_filter_block_plane_non420(VP10_COMMON *cm,
mask_4x4_int_r,
&cm->lf_info, &lfl[r][0]);
#endif // CONFIG_VP9_HIGHBITDEPTH
dst->buf += 8 * dst->stride;
dst->buf += MI_SIZE * dst->stride;
}
}
......@@ -1447,7 +1444,7 @@ void vp10_filter_block_plane_ss00(VP10_COMMON *const cm,
assert(plane->subsampling_x == 0 && plane->subsampling_y == 0);
// Vertical pass: do 2 rows at one time
for (r = 0; r < MAX_MIB_SIZE && mi_row + r < cm->mi_rows; r += 2) {
for (r = 0; r < cm->mib_size && mi_row + r < cm->mi_rows; r += 2) {
unsigned int mask_16x16_l = mask_16x16 & 0xffff;
unsigned int mask_8x8_l = mask_8x8 & 0xffff;
unsigned int mask_4x4_l = mask_4x4 & 0xffff;
......@@ -1472,11 +1469,11 @@ void vp10_filter_block_plane_ss00(VP10_COMMON *const cm,
mask_4x4_l, mask_4x4_int_l, &cm->lf_info,
&lfm->lfl_y[r][0]);
#endif // CONFIG_VP9_HIGHBITDEPTH
dst->buf += 16 * dst->stride;
mask_16x16 >>= 16;
mask_8x8 >>= 16;
mask_4x4 >>= 16;
mask_4x4_int >>= 16;
dst->buf += 2 * MI_SIZE * dst->stride;
mask_16x16 >>= 2 * MI_SIZE;
mask_8x8 >>= 2 * MI_SIZE;
mask_4x4 >>= 2 * MI_SIZE;
mask_4x4_int >>= 2 * MI_SIZE;
}
// Horizontal pass
......@@ -1486,7 +1483,7 @@ void vp10_filter_block_plane_ss00(VP10_COMMON *const cm,
mask_4x4 = lfm->above_y[TX_4X4];
mask_4x4_int = lfm->int_4x4_y;
for (r = 0; r < MAX_MIB_SIZE && mi_row + r < cm->mi_rows; r++) {
for (r = 0; r < cm->mib_size && mi_row + r < cm->mi_rows; r++) {
unsigned int mask_16x16_r;
unsigned int mask_8x8_r;
unsigned int mask_4x4_r;
......@@ -1519,11 +1516,11 @@ void vp10_filter_block_plane_ss00(VP10_COMMON *const cm,
&lfm->lfl_y[r][0]);
#endif // CONFIG_VP9_HIGHBITDEPTH
dst->buf += 8 * dst->stride;
mask_16x16 >>= 8;
mask_8x8 >>= 8;
mask_4x4 >>= 8;
mask_4x4_int >>= 8;
dst->buf += MI_SIZE * dst->stride;
mask_16x16 >>= MI_SIZE;
mask_8x8 >>= MI_SIZE;
mask_4x4 >>= MI_SIZE;
mask_4x4_int >>= MI_SIZE;
}
}
......@@ -1541,14 +1538,13 @@ void vp10_filter_block_plane_ss11(VP10_COMMON *const cm,
uint16_t mask_4x4_int = lfm->left_int_4x4_uv;
assert(plane->subsampling_x == 1 && plane->subsampling_y == 1);
assert(plane->plane_type == PLANE_TYPE_UV);
// Vertical pass: do 2 rows at one time
for (r = 0; r < MAX_MIB_SIZE && mi_row + r < cm->mi_rows; r += 4) {
if (plane->plane_type == 1) {
for (c = 0; c < (MAX_MIB_SIZE >> 1); c++) {
lfm->lfl_uv[r >> 1][c] = lfm->lfl_y[r][c << 1];
lfm->lfl_uv[(r + 2) >> 1][c] = lfm->lfl_y[r + 2][c << 1];
}
for (r = 0; r < cm->mib_size && mi_row + r < cm->mi_rows; r += 4) {
for (c = 0; c < (cm->mib_size >> 1); c++) {
lfm->lfl_uv[r >> 1][c] = lfm->lfl_y[r][c << 1];
lfm->lfl_uv[(r + 2) >> 1][c] = lfm->lfl_y[r + 2][c << 1];
}
{
......@@ -1577,11 +1573,11 @@ void vp10_filter_block_plane_ss11(VP10_COMMON *const cm,
&lfm->lfl_uv[r >> 1][0]);
#endif // CONFIG_VP9_HIGHBITDEPTH
dst->buf += 16 * dst->stride;
mask_16x16 >>= 8;
mask_8x8 >>= 8;
mask_4x4 >>= 8;
mask_4x4_int >>= 8;
dst->buf += 2 * MI_SIZE * dst->stride;
mask_16x16 >>= MI_SIZE;
mask_8x8 >>= MI_SIZE;
mask_4x4 >>= MI_SIZE;
mask_4x4_int >>= MI_SIZE;
}
}
......@@ -1592,7 +1588,7 @@ void vp10_filter_block_plane_ss11(VP10_COMMON *const cm,
mask_4x4 = lfm->above_uv[TX_4X4];
mask_4x4_int = lfm->above_int_4x4_uv;
for (r = 0; r < MAX_MIB_SIZE && mi_row + r < cm->mi_rows; r += 2) {
for (r = 0; r < cm->mib_size && mi_row + r < cm->mi_rows; r += 2) {
const int skip_border_4x4_r = mi_row + r == cm->mi_rows - 1;
const unsigned int mask_4x4_int_r =
skip_border_4x4_r ? 0 : (mask_4x4_int & 0xf);
......@@ -1628,11 +1624,11 @@ void vp10_filter_block_plane_ss11(VP10_COMMON *const cm,
&lfm->lfl_uv[r >> 1][0]);
#endif // CONFIG_VP9_HIGHBITDEPTH
dst->buf += 8 * dst->stride;
mask_16x16 >>= 4;
mask_8x8 >>= 4;
mask_4x4 >>= 4;
mask_4x4_int >>= 4;
dst->buf += MI_SIZE * dst->stride;
mask_16x16 >>= MI_SIZE / 2;
mask_8x8 >>= MI_SIZE / 2;
mask_4x4 >>= MI_SIZE / 2;
mask_4x4_int >>= MI_SIZE / 2;
}
}
......@@ -1647,12 +1643,12 @@ void vp10_loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer,
# if CONFIG_VAR_TX
memset(cm->above_txfm_context, TX_SIZES, cm->mi_cols);
# endif // CONFIG_VAR_TX
for (mi_row = start; mi_row < stop; mi_row += MAX_MIB_SIZE) {
for (mi_row = start; mi_row < stop; mi_row += cm->mib_size) {
MODE_INFO **mi = cm->mi_grid_visible + mi_row * cm->mi_stride;
# if CONFIG_VAR_TX
memset(cm->left_txfm_context, TX_SIZES, MAX_MIB_SIZE);
# endif // CONFIG_VAR_TX
for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MAX_MIB_SIZE) {
for (mi_col = 0; mi_col < cm->mi_cols; mi_col += cm->mib_size) {
int plane;
vp10_setup_dst_planes(planes, frame_buffer, mi_row, mi_col);
......
......@@ -312,7 +312,7 @@ typedef struct VP10Common {
int log2_tile_cols, log2_tile_rows;
#endif // !CONFIG_EXT_TILE
int tile_cols, tile_rows;
int tile_width, tile_height;
int tile_width, tile_height; // In MI units
int byte_alignment;
int skip_loop_filter;
......@@ -343,6 +343,10 @@ typedef struct VP10Common {
#if CONFIG_ANS
rans_dec_lut token_tab[COEFF_PROB_MODELS];
#endif // CONFIG_ANS
BLOCK_SIZE sb_size; // Size of the superblock used for this frame
int mib_size; // Size of the superblock in units of MI blocks
int mib_size_log2; // Log 2 of above.
} VP10_COMMON;
// TODO(hkuang): Don't need to lock the whole pool after implementing atomic
......@@ -408,8 +412,12 @@ static INLINE void ref_cnt_fb(RefCntBuffer *bufs, int *idx, int new_idx) {
bufs[new_idx].ref_count++;
}
static INLINE int mi_cols_aligned_to_sb(int n_mis) {
return ALIGN_POWER_OF_TWO(n_mis, MAX_MIB_SIZE_LOG2);
static INLINE int mi_cols_aligned_to_sb(const VP10_COMMON *cm) {
return ALIGN_POWER_OF_TWO(cm->mi_cols, cm->mib_size_log2);
}
static INLINE int mi_rows_aligned_to_sb(const VP10_COMMON *cm) {
return ALIGN_POWER_OF_TWO(cm->mi_rows, cm->mib_size_log2);
}
static INLINE int frame_is_intra_only(const VP10_COMMON *const cm) {
......@@ -697,6 +705,13 @@ static INLINE PARTITION_TYPE get_partition(const VP10_COMMON *const cm,
}
}
static INLINE void set_sb_size(VP10_COMMON *const cm,
const BLOCK_SIZE sb_size) {
cm->sb_size = sb_size;
cm->mib_size = num_8x8_blocks_wide_lookup[cm->sb_size];
cm->mib_size_log2 = mi_width_log2_lookup[cm->sb_size];
}
#ifdef __cplusplus
} // extern "C"
#endif
......
......@@ -94,7 +94,7 @@ void thread_loop_filter_rows(const YV12_BUFFER_CONFIG *const frame_buffer,
int start, int stop, int y_only,
VP9LfSync *const lf_sync) {
const int num_planes = y_only ? 1 : MAX_MB_PLANE;
const int sb_cols = mi_cols_aligned_to_sb(cm->mi_cols) >> MAX_MIB_SIZE_LOG2;
const int sb_cols = mi_cols_aligned_to_sb(cm) >> cm->mib_size_log2;
int mi_row, mi_col;
#if !CONFIG_EXT_PARTITION_TYPES
enum lf_path path;
......@@ -116,12 +116,12 @@ void thread_loop_filter_rows(const YV12_BUFFER_CONFIG *const frame_buffer,
#endif // CONFIG_EXT_PARTITION
for (mi_row = start; mi_row < stop;
mi_row += lf_sync->num_workers * MAX_MIB_SIZE) {
mi_row += lf_sync->num_workers * cm->mib_size) {
MODE_INFO **const mi = cm->mi_grid_visible + mi_row * cm->mi_stride;
for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MAX_MIB_SIZE) {
const int r = mi_row >> MAX_MIB_SIZE_LOG2;
const int c = mi_col >> MAX_MIB_SIZE_LOG2;
for (mi_col = 0; mi_col < cm->mi_cols; mi_col += cm->mib_size) {
const int r = mi_row >> cm->mib_size_log2;
const int c = mi_col >> cm->mib_size_log2;
int plane;
sync_read(lf_sync, r, c);
......@@ -175,7 +175,7 @@ static void loop_filter_rows_mt(YV12_BUFFER_CONFIG *frame,
VP9LfSync *lf_sync) {
const VPxWorkerInterface *const winterface = vpx_get_worker_interface();
// Number of superblock rows and cols
const int sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MAX_MIB_SIZE_LOG2;
const int sb_rows = mi_rows_aligned_to_sb(cm) >> cm->mib_size_log2;
// Decoder may allocate more threads than number of tiles based on user's
// input.
const int tile_cols = cm->tile_cols;
......@@ -215,7 +215,7 @@ static void loop_filter_rows_mt(YV12_BUFFER_CONFIG *frame,
// Loopfilter data
vp10_loop_filter_data_reset(lf_data, frame, cm, planes);
lf_data->start = start + i * MAX_MIB_SIZE;
lf_data->start = start + i * cm->mib_size;
lf_data->stop = stop;
lf_data->y_only = y_only;
......
......@@ -12,9 +12,6 @@
#include "vp10/common/onyxc_int.h"
#include "vpx_dsp/vpx_dsp_common.h"
#define MIN_TILE_WIDTH_B64 4
#define MAX_TILE_WIDTH_B64 64
void vp10_tile_set_row(TileInfo *tile, const VP10_COMMON *cm, int row) {
tile->mi_row_start = row * cm->tile_height;
tile->mi_row_end = VPXMIN(tile->mi_row_start + cm->tile_height,
......@@ -33,26 +30,35 @@ void vp10_tile_init(TileInfo *tile, const VP10_COMMON *cm, int row, int col) {
}
#if !CONFIG_EXT_TILE
// TODO(geza.lore): CU_SIZE dependent.
static int get_min_log2_tile_cols(const int sb64_cols) {
# if CONFIG_EXT_PARTITION
# define MIN_TILE_WIDTH_MAX_SB 2
# define MAX_TILE_WIDTH_MAX_SB 32
# else
# define MIN_TILE_WIDTH_MAX_SB 4
# define MAX_TILE_WIDTH_MAX_SB 64
# endif // CONFIG_EXT_PARTITION
static int get_min_log2_tile_cols(const int max_sb_cols) {
int min_log2 = 0;
while ((MAX_TILE_WIDTH_B64 << min_log2) < sb64_cols)
while ((MAX_TILE_WIDTH_MAX_SB << min_log2) < max_sb_cols)
++min_log2;
return min_log2;
}
static int get_max_log2_tile_cols(const int sb64_cols) {
static int get_max_log2_tile_cols(const int max_sb_cols) {
int max_log2 = 1;
while ((sb64_cols >> max_log2) >= MIN_TILE_WIDTH_B64)
while ((max_sb_cols >> max_log2) >= MIN_TILE_WIDTH_MAX_SB)
++max_log2;
return max_log2 - 1;
}
void vp10_get_tile_n_bits(int mi_cols,
void vp10_get_tile_n_bits(const int mi_cols,
int *min_log2_tile_cols, int *max_log2_tile_cols) {
const int sb64_cols = mi_cols_aligned_to_sb(mi_cols) >> MAX_MIB_SIZE_LOG2;
*min_log2_tile_cols = get_min_log2_tile_cols(sb64_cols);
*max_log2_tile_cols = get_max_log2_tile_cols(sb64_cols);
const int max_sb_cols =
ALIGN_POWER_OF_TWO(mi_cols, MAX_MIB_SIZE_LOG2) >> MAX_MIB_SIZE_LOG2;
*min_log2_tile_cols = get_min_log2_tile_cols(max_sb_cols);
*max_log2_tile_cols = get_max_log2_tile_cols(max_sb_cols);
assert(*min_log2_tile_cols <= *max_log2_tile_cols);
}
#endif // !CONFIG_EXT_TILE
......@@ -30,8 +30,8 @@ void vp10_tile_init(TileInfo *tile, const struct VP10Common *cm,
void vp10_tile_set_row(TileInfo *tile, const struct VP10Common *cm, int row);
void vp10_tile_set_col(TileInfo *tile, const struct VP10Common *cm, int col);
void vp10_get_tile_n_bits(int mi_cols,
int *min_log2_tile_cols, int *max_log2_tile_cols);
void vp10_get_tile_n_bits(const int mi_cols,
int *min_log2_tile_cols, int *max_log2_tile_cols);
#ifdef __cplusplus
} // extern "C"
......
......@@ -2817,16 +2817,24 @@ static void setup_frame_size_with_refs(VP10_COMMON *cm,
pool->frame_bufs[cm->new_fb_idx].buf.render_height = cm->render_height;
}
static void setup_tile_info(VP10Decoder *const pbi,
static void read_tile_info(VP10Decoder *const pbi,
struct vpx_read_bit_buffer *const rb) {
VP10_COMMON *const cm = &pbi->common;
#if CONFIG_EXT_TILE
// Read the tile width/height
cm->tile_width = vpx_rb_read_literal(rb, 6) + 1; // in [1, 64]
cm->tile_height = vpx_rb_read_literal(rb, 6) + 1; // in [1, 64]
#if CONFIG_EXT_PARTITION
if (cm->sb_size == BLOCK_128X128) {
cm->tile_width = vpx_rb_read_literal(rb, 5) + 1;
cm->tile_height = vpx_rb_read_literal(rb, 5) + 1;
} else
#endif // CONFIG_EXT_PARTITION
{
cm->tile_width = vpx_rb_read_literal(rb, 6) + 1;
cm->tile_height = vpx_rb_read_literal(rb, 6) + 1;
}
cm->tile_width = cm->tile_width << MAX_MIB_SIZE_LOG2;
cm->tile_height = cm->tile_height << MAX_MIB_SIZE_LOG2;
cm->tile_width <<= cm->mib_size_log2;
cm->tile_height <<= cm->mib_size_log2;
cm->tile_width = VPXMIN(cm->tile_width, cm->mi_cols);
cm->tile_height = VPXMIN(cm->tile_height, cm->mi_rows);
......@@ -2867,12 +2875,14 @@ static void setup_tile_info(VP10Decoder *const pbi,
cm->tile_cols = 1 << cm->log2_tile_cols;
cm->tile_rows = 1 << cm->log2_tile_rows;
cm->tile_width = (mi_cols_aligned_to_sb(cm->mi_cols) >> cm->log2_tile_cols);
cm->tile_height = (mi_cols_aligned_to_sb(cm->mi_rows) >> cm->log2_tile_rows);
cm->tile_width = ALIGN_POWER_OF_TWO(cm->mi_cols, MAX_MIB_SIZE_LOG2);
cm->tile_width >>= cm->log2_tile_cols;
cm->tile_height = ALIGN_POWER_OF_TWO(cm->mi_rows, MAX_MIB_SIZE_LOG2);
cm->tile_height >>= cm->log2_tile_rows;
// round to integer multiples of 8
cm->tile_width = mi_cols_aligned_to_sb(cm->tile_width);
cm->tile_height = mi_cols_aligned_to_sb(cm->tile_height);
// round to integer multiples of superblock size
cm->tile_width = ALIGN_POWER_OF_TWO(cm->tile_width, MAX_MIB_SIZE_LOG2);
cm->tile_height = ALIGN_POWER_OF_TWO(cm->tile_height, MAX_MIB_SIZE_LOG2);
// tile size magnitude
if (cm->tile_rows > 1 || cm->tile_cols > 1) {
......@@ -3107,8 +3117,7 @@ static const uint8_t *decode_tiles(VP10Decoder *pbi,
int tile_row, tile_col;
#if CONFIG_ENTROPY
cm->do_subframe_update =
cm->log2_tile_cols == 0 && cm->log2_tile_rows == 0;
cm->do_subframe_update = cm->tile_cols == 1 && cm->tile_rows == 1;
#endif // CONFIG_ENTROPY
if (cm->lf.filter_level && !cm->skip_loop_filter &&
......@@ -3192,19 +3201,19 @@ static const uint8_t *decode_tiles(VP10Decoder *pbi,
vp10_zero_above_context(cm, tile_info.mi_col_start, tile_info.mi_col_end);
for (mi_row = tile_info.mi_row_start; mi_row < tile_info.mi_row_end;
mi_row += MAX_MIB_SIZE) {
mi_row += cm->mib_size) {
int mi_col;
vp10_zero_left_context(&td->xd);
for (mi_col = tile_info.mi_col_start; mi_col < tile_info.mi_col_end;
mi_col += MAX_MIB_SIZE) {
mi_col += cm->mib_size) {
decode_partition(pbi, &td->xd,
#if CONFIG_SUPERTX
0,
#endif // CONFIG_SUPERTX
mi_row, mi_col, &td->bit_reader,
BLOCK_LARGEST, MAX_SB_SIZE_LOG2 - 2);
cm->sb_size, b_width_log2_lookup[cm->sb_size]);
}
pbi->mb.corrupted |= td->xd.corrupted;
if (pbi->mb.corrupted)
......@@ -3231,8 +3240,8 @@ static const uint8_t *decode_tiles(VP10Decoder *pbi,
// Loopfilter one tile row.
if (cm->lf.filter_level && !cm->skip_loop_filter) {
LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1;
const int lf_start = VPXMAX(0, tile_info.mi_row_start - MAX_MIB_SIZE);
const int lf_end = tile_info.mi_row_end - MAX_MIB_SIZE;
const int lf_start = VPXMAX(0, tile_info.mi_row_start - cm->mib_size);
const int lf_end = tile_info.mi_row_end - cm->mib_size;
// Delay the loopfilter if the first tile row is only
// a single superblock high.
......@@ -3256,7 +3265,7 @@ static const uint8_t *decode_tiles(VP10Decoder *pbi,
// After loopfiltering, the last 7 row pixels in each superblock row may
// still be changed by the longest loopfilter of the next superblock row.
if (cm->frame_parallel_decode)
vp10_frameworker_broadcast(pbi->cur_buf, mi_row << MAX_MIB_SIZE_LOG2);
vp10_frameworker_broadcast(pbi->cur_buf, mi_row << cm->mib_size_log2);
#endif // !CONFIG_VAR_TX
}
......@@ -3292,6 +3301,7 @@ static const uint8_t *decode_tiles(VP10Decoder *pbi,
static int tile_worker_hook(TileWorkerData *const tile_data,
const TileInfo *const tile) {
VP10Decoder *const pbi = tile_data->pbi;
const VP10_COMMON *const cm = &pbi->common;
int mi_row, mi_col;
if (setjmp(tile_data->error_info.jmp)) {
......@@ -3306,17 +3316,17 @@ static int tile_worker_hook(TileWorkerData *const tile_data,
vp10_zero_above_context(&pbi->common, tile->mi_col_start, tile->mi_col_end);
for (mi_row = tile->mi_row_start; mi_row < tile->mi_row_end;
mi_row += MAX_MIB_SIZE) {
mi_row += cm->mib_size) {
vp10_zero_left_context(&tile_data->xd);
for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end;
mi_col += MAX_MIB_SIZE) {
mi_col += cm->mib_size) {
decode_partition(pbi, &tile_data->xd,
#if CONFIG_SUPERTX
0,
#endif
mi_row, mi_col, &tile_data->bit_reader,
BLOCK_LARGEST, MAX_SB_SIZE_LOG2 - 2);
cm->sb_size, b_width_log2_lookup[cm->sb_size]);
}
}
return !tile_data->xd.corrupted;
......@@ -3769,6 +3779,12 @@ static size_t read_uncompressed_header(VP10Decoder *pbi,
if (frame_is_intra_only(cm) || cm->error_resilient_mode)
vp10_setup_past_independence(cm);
#if CONFIG_EXT_PARTITION
set_sb_size(cm, vpx_rb_read_bit(rb) ? BLOCK_128X128 : BLOCK_64X64);
#else
set_sb_size(cm, BLOCK_64X64);
#endif // CONFIG_EXT_PARTITION
setup_loopfilter(cm, rb);
#if CONFIG_LOOP_RESTORATION
setup_restoration(cm, rb);
......@@ -3808,7 +3824,7 @@ static size_t read_uncompressed_header(VP10Decoder *pbi,
: read_tx_mode(rb);
cm->reference_mode = read_frame_reference_mode(cm, rb);
setup_tile_info(pbi, rb);
read_tile_info(pbi, rb);
sz = vpx_rb_read_literal(rb, 16);
if (sz == 0)
......
......@@ -1170,7 +1170,7 @@ static int read_is_inter_block(VP10_COMMON *const cm, MACROBLOCKD *const xd,
static void fpm_sync(void *const data, int mi_row) {
VP10Decoder *const pbi = (VP10Decoder *)data;
vp10_frameworker_wait(pbi->frame_worker_owner, pbi->common.prev_frame,
mi_row << MAX_MIB_SIZE_LOG2);
mi_row << pbi->common.mib_size_log2);
}
static void read_inter_block_mode_info(VP10Decoder *const pbi,
......
......@@ -116,8 +116,6 @@ void vp10_caq_select_segment(VP10_COMP *cpi, MACROBLOCK *mb, BLOCK_SIZE bs,
VP10_COMMON *const cm = &cpi->common;
const int mi_offset = mi_row * cm->mi_cols + mi_col;
const int bw = num_8x8_blocks_wide_lookup[BLOCK_LARGEST];
const int bh = num_8x8_blocks_high_lookup[BLOCK_LARGEST];
const int xmis = VPXMIN(cm->mi_cols - mi_col, num_8x8_blocks_wide_lookup[bs]);
const int ymis = VPXMIN(cm->mi_rows - mi_row, num_8x8_blocks_high_lookup[bs]);
int x, y;
......@@ -130,7 +128,7 @@ void vp10_caq_select_segment(VP10_COMP *cpi, MACROBLOCK *mb, BLOCK_SIZE bs,
// Rate depends on fraction of a SB64 in frame (xmis * ymis / bw * bh).
// It is converted to bits * 256 units.
const int target_rate = (cpi->rc.sb64_target_rate * xmis * ymis * 256) /
(bw * bh);
(cm->mib_size * cm->mib_size);
double logvar;
double low_var_thresh;
const int aq_strength = get_aq_c_strength(cm->base_qindex, cm->bit_depth);
......
......@@ -388,8 +388,8 @@ static void cyclic_refresh_update_map(VP10_COMP *const cpi) {
int i, block_count, bl_index, sb_rows, sb_cols, sbs_in_frame;
int xmis, ymis, x, y;
memset(seg_map, CR_SEGMENT_ID_BASE, cm->mi_rows * cm->mi_cols);
sb_cols = (cm->mi_cols + MAX_MIB_SIZE - 1) / MAX_MIB_SIZE;
sb_rows = (cm->mi_rows + MAX_MIB_SIZE - 1) / MAX_MIB_SIZE;
sb_cols = (cm->mi_cols + cm->mib_size - 1) / cm->mib_size;
sb_rows = (cm->mi_rows + cm->mib_size - 1) / cm->mib_size;
sbs_in_frame = sb_cols * sb_rows;
// Number of target blocks to get the q delta (segment 1).
block_count = cr->percent_refresh * cm->mi_rows * cm->mi_cols / 100;
......@@ -404,8 +404,8 @@ static void cyclic_refresh_update_map(VP10_COMP *const cpi) {
// Get the mi_row/mi_col corresponding to superblock index i.
int sb_row_index = (i / sb_cols);
int sb_col_index = i - sb_row_index * sb_cols;
int mi_row = sb_row_index * MAX_MIB_SIZE;
int mi_col = sb_col_index * MAX_MIB_SIZE;
int mi_row = sb_row_index * cm->mib_size;
int mi_col = sb_col_index * cm->mib_size;
int qindex_thresh =
cpi->oxcf.content == VP9E_CONTENT_SCREEN
? vp10_get_qindex(&cm->seg, CR_SEGMENT_ID_BOOST2, cm->base_qindex)
......@@ -413,11 +413,9 @@ static void cyclic_refresh_update_map(VP10_COMP *const cpi) {
assert(mi_row >= 0 && mi_row < cm->mi_rows);
assert(mi_col >= 0 && mi_col < cm->mi_cols);
bl_index = mi_row * cm->mi_cols + mi_col;
// Loop through all 8x8 blocks in superblock and update map.
xmis =
VPXMIN(cm->mi_cols - mi_col, num_8x8_blocks_wide_lookup[BLOCK_LARGEST]);
ymis =
VPXMIN(cm->mi_rows - mi_row, num_8x8_blocks_high_lookup[BLOCK_LARGEST]);
// Loop through all MI blocks in superblock and update map.
xmis = VPXMIN(cm->mi_cols - mi_col, cm->mib_size);
ymis = VPXMIN(cm->mi_rows - mi_row, cm->mib_size);
for (y = 0; y < ymis; y++) {
for (x = 0; x < xmis; x++) {
const int bl_index2 = bl_index + y * cm->mi_cols + x;
......
......@@ -1498,6 +1498,7 @@ static void write_modes_b(VP10_COMP *cpi, const TileInfo *const tile,
MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
MODE_INFO *m;
int plane;
int bh, bw;
#if CONFIG_ANS
(void) tok;
(void) tok_end;
......@@ -1507,12 +1508,14 @@ static void write_modes_b(VP10_COMP *cpi, const TileInfo *const tile,
xd->mi = cm->mi_grid_visible + (mi_row * cm->mi_stride + mi_col);
m = xd->mi[0];
assert(m->mbmi.sb_type <= cm->sb_size);
bh = num_8x8_blocks_high_lookup[m->mbmi.sb_type];
bw = num_8x8_blocks_wide_lookup[m->mbmi.sb_type];
cpi->td.mb.mbmi_ext = cpi->mbmi_ext_base + (mi_row * cm->mi_cols + mi_col);
set_mi_row_col(xd, tile,
mi_row, num_8x8_blocks_high_lookup[m->mbmi.sb_type],
mi_col, num_8x8_blocks_wide_lookup[m->mbmi.sb_type],
cm->mi_rows, cm->mi_cols);
set_mi_row_col(xd, tile, mi_row, bh, mi_col, bw, cm->mi_rows, cm->mi_cols);
if (frame_is_intra_only(cm)) {
write_mb_modes_kf(cm, xd, xd->mi, w);
} else {
......@@ -1660,7 +1663,7 @@ static void write_modes_sb(VP10_COMP *const cpi,
const BLOCK_SIZE subsize = get_subsize(bsize, partition);
#if CONFIG_SUPERTX
const int mi_offset = mi_row * cm->mi_stride + mi_col;
MB_MODE_INFO *mbmi = NULL;
MB_MODE_INFO *mbmi;
const int pack_token = !supertx_enabled;
TX_SIZE supertx_size;
int plane;
......@@ -1835,12 +1838,12 @@ static void write_modes(VP10_COMP *const cpi,
vp10_zero_above_context(cm, mi_col_start, mi_col_end);
for (mi_row = mi_row_start; mi_row < mi_row_end; mi_row += MAX_MIB_SIZE) {
for (mi_row = mi_row_start; mi_row < mi_row_end; mi_row += cm->mib_size) {
vp10_zero_left_context(xd);
for (mi_col = mi_col_start; mi_col < mi_col_end; mi_col += MAX_MIB_SIZE) {
for (mi_col = mi_col_start; mi_col < mi_col_end; mi_col += cm->mib_size) {
write_modes_sb_wrapper(cpi, tile, w, tok, tok_end, 0,
mi_row, mi_col, BLOCK_LARGEST);
mi_row, mi_col, cm->sb_size);
}
}
}
......@@ -2529,21 +2532,32 @@ static void fix_interp_filter(VP10_COMMON *cm, FRAME_COUNTS *counts) {
}
}
static void write_tile_info(VP10_COMMON *const cm,
static void write_tile_info(const VP10_COMMON *const cm,
struct vpx_write_bit_buffer *wb) {