Commit 0bd3bf69 authored by Jingning Han's avatar Jingning Han

Make adaptive scan order support multi-threading

Count the non-zero appearance per tile and update per frame. This
resolves an enc/dec mismatch in multi-threading coding.

BUG=aomedia:1073

Change-Id: I474f05913e6c3f75ca3fa1057bd4809ef51af164
parent 0e4a9a75
...@@ -72,10 +72,30 @@ struct seg_counts { ...@@ -72,10 +72,30 @@ struct seg_counts {
unsigned int pred[PREDICTION_PROBS][2]; unsigned int pred[PREDICTION_PROBS][2];
}; };
#if CONFIG_ADAPT_SCAN
typedef struct NON_ZERO_COUNT {
unsigned int non_zero_count_4X4[TX_TYPES][16];
unsigned int non_zero_count_8X8[TX_TYPES][64];
unsigned int non_zero_count_16X16[TX_TYPES][256];
unsigned int non_zero_count_32X32[TX_TYPES][1024];
unsigned int non_zero_count_4x8[TX_TYPES][32];
unsigned int non_zero_count_8x4[TX_TYPES][32];
unsigned int non_zero_count_8x16[TX_TYPES][128];
unsigned int non_zero_count_16x8[TX_TYPES][128];
unsigned int non_zero_count_16x32[TX_TYPES][512];
unsigned int non_zero_count_32x16[TX_TYPES][512];
unsigned int txb_count[TX_SIZES_ALL][TX_TYPES];
} NON_ZERO_COUNT;
#endif
typedef struct frame_contexts { typedef struct frame_contexts {
coeff_cdf_model coef_tail_cdfs[TX_SIZES][PLANE_TYPES]; coeff_cdf_model coef_tail_cdfs[TX_SIZES][PLANE_TYPES];
coeff_cdf_model coef_head_cdfs[TX_SIZES][PLANE_TYPES]; coeff_cdf_model coef_head_cdfs[TX_SIZES][PLANE_TYPES];
#if CONFIG_ADAPT_SCAN #if CONFIG_ADAPT_SCAN
struct NON_ZERO_COUNT non_zero_count;
// TODO(angiebird): try aom_prob // TODO(angiebird): try aom_prob
uint32_t non_zero_prob_4X4[TX_TYPES][16]; uint32_t non_zero_prob_4X4[TX_TYPES][16];
uint32_t non_zero_prob_8X8[TX_TYPES][64]; uint32_t non_zero_prob_8X8[TX_TYPES][64];
...@@ -309,21 +329,6 @@ typedef struct FRAME_COUNTS { ...@@ -309,21 +329,6 @@ typedef struct FRAME_COUNTS {
#endif #endif
unsigned int switchable_interp[SWITCHABLE_FILTER_CONTEXTS] unsigned int switchable_interp[SWITCHABLE_FILTER_CONTEXTS]
[SWITCHABLE_FILTERS]; [SWITCHABLE_FILTERS];
#if CONFIG_ADAPT_SCAN
unsigned int non_zero_count_4X4[TX_TYPES][16];
unsigned int non_zero_count_8X8[TX_TYPES][64];
unsigned int non_zero_count_16X16[TX_TYPES][256];
unsigned int non_zero_count_32X32[TX_TYPES][1024];
unsigned int non_zero_count_4x8[TX_TYPES][32];
unsigned int non_zero_count_8x4[TX_TYPES][32];
unsigned int non_zero_count_8x16[TX_TYPES][128];
unsigned int non_zero_count_16x8[TX_TYPES][128];
unsigned int non_zero_count_16x32[TX_TYPES][512];
unsigned int non_zero_count_32x16[TX_TYPES][512];
unsigned int txb_count[TX_SIZES_ALL][TX_TYPES];
#endif // CONFIG_ADAPT_SCAN
#if CONFIG_LV_MAP #if CONFIG_LV_MAP
unsigned int txb_skip[TX_SIZES][TXB_SKIP_CONTEXTS][2]; unsigned int txb_skip[TX_SIZES][TXB_SKIP_CONTEXTS][2];
......
...@@ -5288,8 +5288,8 @@ static int16_t *get_adapt_nb(FRAME_CONTEXT *fc, TX_SIZE tx_size, ...@@ -5288,8 +5288,8 @@ static int16_t *get_adapt_nb(FRAME_CONTEXT *fc, TX_SIZE tx_size,
} }
} }
static uint32_t *get_non_zero_counts(FRAME_COUNTS *counts, TX_SIZE tx_size, static uint32_t *get_non_zero_counts(struct NON_ZERO_COUNT *counts,
TX_TYPE tx_type) { TX_SIZE tx_size, TX_TYPE tx_type) {
switch (tx_size) { switch (tx_size) {
case TX_4X4: return counts->non_zero_count_4X4[tx_type]; case TX_4X4: return counts->non_zero_count_4X4[tx_type];
case TX_8X8: return counts->non_zero_count_8X8[tx_type]; case TX_8X8: return counts->non_zero_count_8X8[tx_type];
...@@ -5459,9 +5459,10 @@ static void update_scan_prob(AV1_COMMON *cm, TX_SIZE tx_size, TX_TYPE tx_type, ...@@ -5459,9 +5459,10 @@ static void update_scan_prob(AV1_COMMON *cm, TX_SIZE tx_size, TX_TYPE tx_type,
FRAME_CONTEXT *pre_fc = cm->pre_fc; FRAME_CONTEXT *pre_fc = cm->pre_fc;
uint32_t *prev_non_zero_prob = get_non_zero_prob(pre_fc, tx_size, tx_type); uint32_t *prev_non_zero_prob = get_non_zero_prob(pre_fc, tx_size, tx_type);
uint32_t *non_zero_prob = get_non_zero_prob(cm->fc, tx_size, tx_type); uint32_t *non_zero_prob = get_non_zero_prob(cm->fc, tx_size, tx_type);
uint32_t *non_zero_count = get_non_zero_counts(&cm->counts, tx_size, tx_type); uint32_t *non_zero_count =
get_non_zero_counts(&cm->fc->non_zero_count, tx_size, tx_type);
const int tx2d_size = tx_size_2d[tx_size]; const int tx2d_size = tx_size_2d[tx_size];
unsigned int block_num = cm->counts.txb_count[tx_size][tx_type]; unsigned int block_num = cm->fc->non_zero_count.txb_count[tx_size][tx_type];
#if USE_2X2_PROB #if USE_2X2_PROB
#if CONFIG_TX64X64 #if CONFIG_TX64X64
DECLARE_ALIGNED(16, uint32_t, non_zero_count_ds[1024]); DECLARE_ALIGNED(16, uint32_t, non_zero_count_ds[1024]);
...@@ -5505,10 +5506,9 @@ static void update_scan_count(int16_t *scan, int max_scan, ...@@ -5505,10 +5506,9 @@ static void update_scan_count(int16_t *scan, int max_scan,
} }
} }
void av1_update_scan_count_facade(AV1_COMMON *cm, int mi_row, void av1_update_scan_count_facade(const AV1_COMMON *const cm, MACROBLOCKD *xd,
FRAME_COUNTS *counts, TX_SIZE tx_size, int mi_row, TX_SIZE tx_size, TX_TYPE tx_type,
TX_TYPE tx_type, const tran_low_t *dqcoeffs, const tran_low_t *dqcoeffs, int max_scan) {
int max_scan) {
#if SUB_FRAME_COUNT #if SUB_FRAME_COUNT
if (((mi_row >> 5) << 5) + 32 >= cm->mi_rows) return; if (((mi_row >> 5) << 5) + 32 >= cm->mi_rows) return;
#else #else
...@@ -5516,13 +5516,15 @@ void av1_update_scan_count_facade(AV1_COMMON *cm, int mi_row, ...@@ -5516,13 +5516,15 @@ void av1_update_scan_count_facade(AV1_COMMON *cm, int mi_row,
#endif #endif
if (cm->use_adapt_scan && do_adapt_scan(tx_size, tx_type) && max_scan) { if (cm->use_adapt_scan && do_adapt_scan(tx_size, tx_type) && max_scan) {
FRAME_CONTEXT *const ec_ctx = xd->tile_ctx;
#if SUB_REGION_COUNT #if SUB_REGION_COUNT
if (counts->txb_count[tx_size][tx_type] >= UINT8_MAX) return; if (ec_ctx->non_zero_count.txb_count[tx_size][tx_type] >= UINT8_MAX) return;
#endif #endif
int16_t *scan = get_adapt_scan(cm->fc, tx_size, tx_type); int16_t *scan = get_adapt_scan(cm->fc, tx_size, tx_type);
uint32_t *non_zero_count = get_non_zero_counts(counts, tx_size, tx_type); uint32_t *non_zero_count =
get_non_zero_counts(&ec_ctx->non_zero_count, tx_size, tx_type);
update_scan_count(scan, max_scan, dqcoeffs, non_zero_count); update_scan_count(scan, max_scan, dqcoeffs, non_zero_count);
++counts->txb_count[tx_size][tx_type]; ++ec_ctx->non_zero_count.txb_count[tx_size][tx_type];
} }
} }
...@@ -5840,7 +5842,8 @@ void av1_init_scan_order(AV1_COMMON *cm) { ...@@ -5840,7 +5842,8 @@ void av1_init_scan_order(AV1_COMMON *cm) {
#if UNI_RECT #if UNI_RECT
void unify_rect_tx_count(AV1_COMMON *cm, TX_SIZE tx_size, TX_TYPE tx_type) { void unify_rect_tx_count(AV1_COMMON *cm, TX_SIZE tx_size, TX_TYPE tx_type) {
uint32_t tmp_non_zero_counts[MAX_TX_SQUARE] = { 0 }; uint32_t tmp_non_zero_counts[MAX_TX_SQUARE] = { 0 };
uint32_t *non_zero_count = get_non_zero_counts(&cm->counts, tx_size, tx_type); uint32_t *non_zero_count =
get_non_zero_counts(&cm->fc->non_zero_count, tx_size, tx_type);
int bw = tx_size_wide[tx_size]; int bw = tx_size_wide[tx_size];
int bh = tx_size_high[tx_size]; int bh = tx_size_high[tx_size];
int tx_size_length = bw * bh; int tx_size_length = bw * bh;
...@@ -5854,16 +5857,19 @@ void unify_rect_tx_count(AV1_COMMON *cm, TX_SIZE tx_size, TX_TYPE tx_type) { ...@@ -5854,16 +5857,19 @@ void unify_rect_tx_count(AV1_COMMON *cm, TX_SIZE tx_size, TX_TYPE tx_type) {
bw = tx_size_wide[stx_size]; bw = tx_size_wide[stx_size];
bh = tx_size_high[stx_size]; bh = tx_size_high[stx_size];
non_zero_count = get_non_zero_counts(&cm->counts, stx_size, tx_type); non_zero_count =
get_non_zero_counts(&cm->fc->non_zero_count, stx_size, tx_type);
for (int idy = 0; idy < bh; ++idy) for (int idy = 0; idy < bh; ++idy)
for (int idx = 0; idx < bw; ++idx) for (int idx = 0; idx < bw; ++idx)
tmp_non_zero_counts[idx * bh + idy] += non_zero_count[idy * bw + idx]; tmp_non_zero_counts[idx * bh + idy] += non_zero_count[idy * bw + idx];
non_zero_count = get_non_zero_counts(&cm->counts, tx_size, tx_type); non_zero_count =
get_non_zero_counts(&cm->fc->non_zero_count, tx_size, tx_type);
for (int idx = 0; idx < tx_size_length; ++idx) for (int idx = 0; idx < tx_size_length; ++idx)
non_zero_count[idx] = tmp_non_zero_counts[idx]; non_zero_count[idx] = tmp_non_zero_counts[idx];
non_zero_count = get_non_zero_counts(&cm->counts, stx_size, tx_type); non_zero_count =
get_non_zero_counts(&cm->fc->non_zero_count, stx_size, tx_type);
for (int idy = 0; idy < bh; ++idy) for (int idy = 0; idy < bh; ++idy)
for (int idx = 0; idx < bw; ++idx) for (int idx = 0; idx < bw; ++idx)
non_zero_count[idy * bw + idx] = tmp_non_zero_counts[idx * bh + idy]; non_zero_count[idy * bw + idx] = tmp_non_zero_counts[idx * bh + idy];
...@@ -5878,7 +5884,8 @@ void unify_rect_tx_count_facade(AV1_COMMON *cm) { ...@@ -5878,7 +5884,8 @@ void unify_rect_tx_count_facade(AV1_COMMON *cm) {
} }
#endif #endif
void av1_adapt_scan_order(AV1_COMMON *cm) { void av1_adapt_scan_order(AV1_COMMON *cm, FRAME_CONTEXT *ec_ctxs[],
int num_tiles) {
if (cm->use_adapt_scan) { if (cm->use_adapt_scan) {
TX_SIZE tx_size; TX_SIZE tx_size;
#if CACHE_SCAN_PROB #if CACHE_SCAN_PROB
...@@ -5887,6 +5894,27 @@ void av1_adapt_scan_order(AV1_COMMON *cm) { ...@@ -5887,6 +5894,27 @@ void av1_adapt_scan_order(AV1_COMMON *cm) {
int use_curr_frame = 1; int use_curr_frame = 1;
#endif // CACHE_SCAN_PROB #endif // CACHE_SCAN_PROB
for (tx_size = 0; tx_size < TX_SIZES_ALL; ++tx_size) {
const int length = tx_size_2d[tx_size];
TX_TYPE tx_type;
for (tx_type = DCT_DCT; tx_type < TX_TYPES; ++tx_type) {
if (!do_adapt_scan(tx_size, tx_type)) continue;
uint32_t *non_zero_count =
get_non_zero_counts(&cm->fc->non_zero_count, tx_size, tx_type);
for (int i = 0; i < length; ++i) non_zero_count[i] = 0;
cm->fc->non_zero_count.txb_count[tx_size][tx_type] = 0;
for (int i = 0; i < num_tiles; ++i) {
uint32_t *tile_count = get_non_zero_counts(
&ec_ctxs[i]->non_zero_count, tx_size, tx_type);
for (int idx = 0; idx < length; ++idx)
non_zero_count[idx] += tile_count[idx];
cm->fc->non_zero_count.txb_count[tx_size][tx_type] +=
ec_ctxs[i]->non_zero_count.txb_count[tx_size][tx_type];
}
}
}
#if UNI_RECT #if UNI_RECT
unify_rect_tx_count_facade(cm); unify_rect_tx_count_facade(cm);
#endif #endif
......
...@@ -39,10 +39,9 @@ extern const SCAN_ORDER av1_inter_scan_orders[TX_SIZES_ALL][TX_TYPES]; ...@@ -39,10 +39,9 @@ extern const SCAN_ORDER av1_inter_scan_orders[TX_SIZES_ALL][TX_TYPES];
#define UNI_RECT 1 #define UNI_RECT 1
#define USE_TOPOLOGICAL_SORT 0 #define USE_TOPOLOGICAL_SORT 0
#define USE_LIMIT_SCAN_DISTANCE 0 #define USE_LIMIT_SCAN_DISTANCE 0
void av1_update_scan_count_facade(AV1_COMMON *cm, int mi_row, void av1_update_scan_count_facade(const AV1_COMMON *const cm, MACROBLOCKD *xd,
FRAME_COUNTS *counts, TX_SIZE tx_size, int mi_row, TX_SIZE tx_size, TX_TYPE tx_type,
TX_TYPE tx_type, const tran_low_t *dqcoeffs, const tran_low_t *dqcoeffs, int max_scan);
int max_scan);
// embed r + c and coeff_idx info with nonzero probabilities. When sorting the // embed r + c and coeff_idx info with nonzero probabilities. When sorting the
// nonzero probabilities, if there is a tie, the coefficient with smaller r + c // nonzero probabilities, if there is a tie, the coefficient with smaller r + c
...@@ -70,7 +69,8 @@ void av1_update_scan_order(TX_SIZE tx_size, TX_TYPE tx_type, ...@@ -70,7 +69,8 @@ void av1_update_scan_order(TX_SIZE tx_size, TX_TYPE tx_type,
void av1_update_neighbors(TX_SIZE tx_size, const int16_t *scan, void av1_update_neighbors(TX_SIZE tx_size, const int16_t *scan,
const int16_t *iscan, int16_t *neighbors); const int16_t *iscan, int16_t *neighbors);
void av1_init_scan_order(AV1_COMMON *cm); void av1_init_scan_order(AV1_COMMON *cm);
void av1_adapt_scan_order(AV1_COMMON *cm); void av1_adapt_scan_order(AV1_COMMON *cm, FRAME_CONTEXT *ec_ctxs[],
int num_tiles);
#if USE_2X2_PROB #if USE_2X2_PROB
void av1_down_sample_scan_count(uint32_t *non_zero_count_ds, void av1_down_sample_scan_count(uint32_t *non_zero_count_ds,
const uint32_t *non_zero_count, const uint32_t *non_zero_count,
......
...@@ -3703,7 +3703,7 @@ void av1_decode_tg_tiles_and_wrapup(AV1Decoder *pbi, const uint8_t *data, ...@@ -3703,7 +3703,7 @@ void av1_decode_tg_tiles_and_wrapup(AV1Decoder *pbi, const uint8_t *data,
av1_average_tile_loopfilter_cdfs(pbi->common.fc, tile_ctxs, cdf_ptrs, av1_average_tile_loopfilter_cdfs(pbi->common.fc, tile_ctxs, cdf_ptrs,
num_bwd_ctxs); num_bwd_ctxs);
#if CONFIG_ADAPT_SCAN #if CONFIG_ADAPT_SCAN
av1_adapt_scan_order(cm); av1_adapt_scan_order(cm, tile_ctxs, num_bwd_ctxs);
#endif // CONFIG_ADAPT_SCAN #endif // CONFIG_ADAPT_SCAN
if (!frame_is_intra_only(cm)) { if (!frame_is_intra_only(cm)) {
......
...@@ -415,8 +415,8 @@ uint8_t av1_read_coeffs_txb_facade(AV1_COMMON *cm, MACROBLOCKD *xd, ...@@ -415,8 +415,8 @@ uint8_t av1_read_coeffs_txb_facade(AV1_COMMON *cm, MACROBLOCKD *xd,
const int mi_row = -xd->mb_to_top_edge >> (3 + MI_SIZE_LOG2); const int mi_row = -xd->mb_to_top_edge >> (3 + MI_SIZE_LOG2);
if (xd->counts && *eob > 0) if (xd->counts && *eob > 0)
av1_update_scan_count_facade(cm, mi_row, xd->counts, tx_size, tx_type, av1_update_scan_count_facade(cm, xd, mi_row, tx_size, tx_type, pd->dqcoeff,
pd->dqcoeff, *eob); *eob);
#endif #endif
av1_set_contexts(xd, pd, plane, tx_size, cul_level, col, row); av1_set_contexts(xd, pd, plane, tx_size, cul_level, col, row);
return cul_level; return cul_level;
......
...@@ -341,8 +341,8 @@ int av1_decode_block_tokens(AV1_COMMON *cm, MACROBLOCKD *const xd, int plane, ...@@ -341,8 +341,8 @@ int av1_decode_block_tokens(AV1_COMMON *cm, MACROBLOCKD *const xd, int plane,
const int mi_row = -xd->mb_to_top_edge >> (3 + MI_SIZE_LOG2); const int mi_row = -xd->mb_to_top_edge >> (3 + MI_SIZE_LOG2);
if (xd->counts) if (xd->counts)
av1_update_scan_count_facade(cm, mi_row, xd->counts, tx_size, tx_type, av1_update_scan_count_facade(cm, xd, mi_row, tx_size, tx_type, pd->dqcoeff,
pd->dqcoeff, eob); eob);
#else #else
(void)cm; (void)cm;
#endif #endif
......
...@@ -5747,7 +5747,7 @@ static void encode_frame_to_data_rate(AV1_COMP *cpi, size_t *size, ...@@ -5747,7 +5747,7 @@ static void encode_frame_to_data_rate(AV1_COMP *cpi, size_t *size,
av1_average_tile_loopfilter_cdfs(cpi->common.fc, tile_ctxs, cdf_ptrs, av1_average_tile_loopfilter_cdfs(cpi->common.fc, tile_ctxs, cdf_ptrs,
num_bwd_ctxs); num_bwd_ctxs);
#if CONFIG_ADAPT_SCAN #if CONFIG_ADAPT_SCAN
av1_adapt_scan_order(cm); av1_adapt_scan_order(cm, tile_ctxs, num_bwd_ctxs);
#endif // CONFIG_ADAPT_SCAN #endif // CONFIG_ADAPT_SCAN
} }
......
...@@ -546,6 +546,11 @@ void av1_write_coeffs_txb(const AV1_COMMON *const cm, MACROBLOCKD *xd, ...@@ -546,6 +546,11 @@ void av1_write_coeffs_txb(const AV1_COMMON *const cm, MACROBLOCKD *xd,
w, abs(tcoeff[scan[c]]) - COEFF_BASE_RANGE - 1 - NUM_BASE_LEVELS); w, abs(tcoeff[scan[c]]) - COEFF_BASE_RANGE - 1 - NUM_BASE_LEVELS);
} }
} }
#if CONFIG_ADAPT_SCAN
const int mi_row = -xd->mb_to_top_edge >> (3 + MI_SIZE_LOG2);
av1_update_scan_count_facade(cm, xd, mi_row, tx_size, tx_type, tcoeff, eob);
#endif
} }
void av1_write_coeffs_mb(const AV1_COMMON *const cm, MACROBLOCK *x, void av1_write_coeffs_mb(const AV1_COMMON *const cm, MACROBLOCK *x,
...@@ -2403,8 +2408,8 @@ void av1_update_and_record_txb_context(int plane, int block, int blk_row, ...@@ -2403,8 +2408,8 @@ void av1_update_and_record_txb_context(int plane, int block, int blk_row,
// because av1_update_scan_count_facade() only cares if coefficients are zero // because av1_update_scan_count_facade() only cares if coefficients are zero
// or not. // or not.
const int mi_row = -xd->mb_to_top_edge >> (3 + MI_SIZE_LOG2); const int mi_row = -xd->mb_to_top_edge >> (3 + MI_SIZE_LOG2);
av1_update_scan_count_facade((AV1_COMMON *)cm, mi_row, td->counts, tx_size, av1_update_scan_count_facade((AV1_COMMON *)cm, xd, mi_row, tx_size, tx_type,
tx_type, qcoeff, eob); qcoeff, eob);
#endif #endif
} }
......
...@@ -479,8 +479,8 @@ static void tokenize_b(int plane, int block, int blk_row, int blk_col, ...@@ -479,8 +479,8 @@ static void tokenize_b(int plane, int block, int blk_row, int blk_col,
// because av1_update_scan_count_facade() only cares if coefficients are zero // because av1_update_scan_count_facade() only cares if coefficients are zero
// or not. // or not.
const int mi_row = -xd->mb_to_top_edge >> (3 + MI_SIZE_LOG2); const int mi_row = -xd->mb_to_top_edge >> (3 + MI_SIZE_LOG2);
av1_update_scan_count_facade((AV1_COMMON *)cm, mi_row, td->counts, tx_size, av1_update_scan_count_facade((AV1_COMMON *)cm, xd, mi_row, tx_size, tx_type,
tx_type, qcoeff, c); qcoeff, c);
#endif #endif
av1_set_contexts(xd, pd, plane, tx_size, c > 0, blk_col, blk_row); av1_set_contexts(xd, pd, plane, tx_size, c > 0, blk_col, blk_row);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment