Commit 4a05a58c authored by Monty Montgomery's avatar Monty Montgomery Committed by Christopher Montgomery

Modify RDO for use with Daala TX constant-depth coeffs

Modify the portions of RDO using TX-domain coeff calaculations to deal
with TX_COEFF_DEPTH and constant-depth coefficient scaling.  At
present, this represents no functional change.

subset-1:
monty-rest-of-stack-quant-s1@2017-11-13T14:38:43.774Z ->
 monty-rest-of-stack-RDO-s1@2017-11-13T14:39:17.093Z

  PSNR | PSNR Cb | PSNR Cr | PSNR HVS |   SSIM | MS SSIM | CIEDE 2000
0.0000 |  0.0000 |  0.0000 |   0.0000 | 0.0000 |  0.0000 |     0.0000

objective-1-fast --limit=4:
monty-rest-of-stack-quant-o1f4@2017-11-13T14:38:28.828Z ->
 monty-rest-of-stack-RDO-o1f4@2017-11-13T14:38:57.951Z

  PSNR | PSNR Cb | PSNR Cr | PSNR HVS |   SSIM | MS SSIM | CIEDE 2000
0.0000 |  0.0000 |  0.0000 |   0.0000 | 0.0000 |  0.0000 |     0.0000

Change-Id: I0fbc45e018f565f48e1fc8fdeabfcd6cb6fa62fe
parent 3fe369c8
...@@ -445,7 +445,9 @@ if (aom_config("CONFIG_AV1_ENCODER") eq "yes") { ...@@ -445,7 +445,9 @@ if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
} }
add_proto qw/int64_t av1_highbd_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz, int bd"; add_proto qw/int64_t av1_highbd_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz, int bd";
specialize qw/av1_highbd_block_error sse2/; if (aom_config("CONFIG_DAALA_TX") ne "yes") {
specialize qw/av1_highbd_block_error sse2/;
}
add_proto qw/void av1_highbd_temporal_filter_apply/, "uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count"; add_proto qw/void av1_highbd_temporal_filter_apply/, "uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count";
......
...@@ -147,7 +147,14 @@ static int optimize_b_greedy(const AV1_COMMON *cm, MACROBLOCK *mb, int plane, ...@@ -147,7 +147,14 @@ static int optimize_b_greedy(const AV1_COMMON *cm, MACROBLOCK *mb, int plane,
get_scan(cm, tx_size, tx_type, &xd->mi[0]->mbmi); get_scan(cm, tx_size, tx_type, &xd->mi[0]->mbmi);
const int16_t *const scan = scan_order->scan; const int16_t *const scan = scan_order->scan;
const int16_t *const nb = scan_order->neighbors; const int16_t *const nb = scan_order->neighbors;
#if CONFIG_DAALA_TX
// This is one of the few places where RDO is done on coeffs; it
// expects the coeffs to be in Q3/D11, so we need to scale them.
int depth_shift = (TX_COEFF_DEPTH - 11) * 2;
int depth_round = depth_shift > 1 ? (1 << depth_shift >> 1) : 0;
#else
const int shift = av1_get_tx_scale(tx_size); const int shift = av1_get_tx_scale(tx_size);
#endif
#if CONFIG_AOM_QM #if CONFIG_AOM_QM
int seg_id = xd->mi[0]->mbmi.segment_id; int seg_id = xd->mi[0]->mbmi.segment_id;
// Use a flat matrix (i.e. no weighting) for 1D and Identity transforms // Use a flat matrix (i.e. no weighting) for 1D and Identity transforms
...@@ -212,14 +219,19 @@ static int optimize_b_greedy(const AV1_COMMON *cm, MACROBLOCK *mb, int plane, ...@@ -212,14 +219,19 @@ static int optimize_b_greedy(const AV1_COMMON *cm, MACROBLOCK *mb, int plane,
tail_token_costs[band_cur][ctx_cur]); tail_token_costs[band_cur][ctx_cur]);
// accu_error does not change when x==0 // accu_error does not change when x==0
} else { } else {
/* Computing distortion /* Computing distortion
*/ */
// compute the distortion for the first candidate // compute the distortion for the first candidate
// and the distortion for quantizing to 0. // and the distortion for quantizing to 0.
#if CONFIG_DAALA_TX
int dx0 = coeff[rc];
const int64_t d0 = ((int64_t)dx0 * dx0 + depth_round) >> depth_shift;
#else
int dx0 = abs(coeff[rc]) * (1 << shift); int dx0 = abs(coeff[rc]) * (1 << shift);
dx0 >>= xd->bd - 8; dx0 >>= xd->bd - 8;
const int64_t d0 = (int64_t)dx0 * dx0; const int64_t d0 = (int64_t)dx0 * dx0;
#endif
const int x_a = x - 2 * sz - 1; const int x_a = x - 2 * sz - 1;
int dqv; int dqv;
#if CONFIG_AOM_QM #if CONFIG_AOM_QM
...@@ -233,15 +245,29 @@ static int optimize_b_greedy(const AV1_COMMON *cm, MACROBLOCK *mb, int plane, ...@@ -233,15 +245,29 @@ static int optimize_b_greedy(const AV1_COMMON *cm, MACROBLOCK *mb, int plane,
dqv = dequant_ptr[rc != 0]; dqv = dequant_ptr[rc != 0];
#endif #endif
#if CONFIG_DAALA_TX
int dx = dqcoeff[rc] - coeff[rc];
const int64_t d2 = ((int64_t)dx * dx + depth_round) >> depth_shift;
#else
int dx = (dqcoeff[rc] - coeff[rc]) * (1 << shift); int dx = (dqcoeff[rc] - coeff[rc]) * (1 << shift);
dx = signed_shift_right(dx, xd->bd - 8); dx = signed_shift_right(dx, xd->bd - 8);
const int64_t d2 = (int64_t)dx * dx; const int64_t d2 = (int64_t)dx * dx;
#endif
/* compute the distortion for the second candidate /* compute the distortion for the second candidate
* x_a = x - 2 * sz + 1; * x_a = x - 2 * sz + 1;
*/ */
int64_t d2_a; int64_t d2_a;
if (x_a != 0) { if (x_a != 0) {
#if CONFIG_DAALA_TX
#if CONFIG_NEW_QUANT
dx = av1_dequant_coeff_nuq(x, dqv, dequant_val[band_translate[i]]) -
coeff[rc];
#else // CONFIG_NEW_QUANT
dx -= (dqv + sz) ^ sz;
#endif // CONFIG_NEW_QUANT
d2_a = ((int64_t)dx * dx + depth_round) >> depth_shift;
#else // CONFIG_DAALA_TX
#if CONFIG_NEW_QUANT #if CONFIG_NEW_QUANT
dx = av1_dequant_coeff_nuq(x, dqv, dequant_val[band_translate[i]]) - dx = av1_dequant_coeff_nuq(x, dqv, dequant_val[band_translate[i]]) -
(coeff[rc] * (1 << shift)); (coeff[rc] * (1 << shift));
...@@ -250,9 +276,11 @@ static int optimize_b_greedy(const AV1_COMMON *cm, MACROBLOCK *mb, int plane, ...@@ -250,9 +276,11 @@ static int optimize_b_greedy(const AV1_COMMON *cm, MACROBLOCK *mb, int plane,
dx -= ((dqv >> (xd->bd - 8)) + sz) ^ sz; dx -= ((dqv >> (xd->bd - 8)) + sz) ^ sz;
#endif // CONFIG_NEW_QUANT #endif // CONFIG_NEW_QUANT
d2_a = (int64_t)dx * dx; d2_a = (int64_t)dx * dx;
#endif // CONFIG_DAALA_TX
} else { } else {
d2_a = d0; d2_a = d0;
} }
// Computing RD cost // Computing RD cost
int64_t base_bits; int64_t base_bits;
// rate cost of x // rate cost of x
...@@ -321,6 +349,15 @@ static int optimize_b_greedy(const AV1_COMMON *cm, MACROBLOCK *mb, int plane, ...@@ -321,6 +349,15 @@ static int optimize_b_greedy(const AV1_COMMON *cm, MACROBLOCK *mb, int plane,
int dqc_a = 0; int dqc_a = 0;
if (best_x || best_eob_x) { if (best_x || best_eob_x) {
if (x_a != 0) { if (x_a != 0) {
#if CONFIG_DAALA_TX
#if CONFIG_NEW_QUANT
dqc_a = av1_dequant_abscoeff_nuq(abs(x_a), dqv,
dequant_val[band_translate[i]]);
if (sz) dqc_a = -dqc_a;
#else
dqc_a = x_a * dqv;
#endif // CONFIG_NEW_QUANT
#else // CONFIG_DAALA_TX
#if CONFIG_NEW_QUANT #if CONFIG_NEW_QUANT
dqc_a = av1_dequant_abscoeff_nuq(abs(x_a), dqv, dqc_a = av1_dequant_abscoeff_nuq(abs(x_a), dqv,
dequant_val[band_translate[i]]); dequant_val[band_translate[i]]);
...@@ -332,9 +369,10 @@ static int optimize_b_greedy(const AV1_COMMON *cm, MACROBLOCK *mb, int plane, ...@@ -332,9 +369,10 @@ static int optimize_b_greedy(const AV1_COMMON *cm, MACROBLOCK *mb, int plane,
else else
dqc_a = (x_a * dqv) >> shift; dqc_a = (x_a * dqv) >> shift;
#endif // CONFIG_NEW_QUANT #endif // CONFIG_NEW_QUANT
#endif // CONFIG_DAALA_TX
} else { } else {
dqc_a = 0; dqc_a = 0;
} // if (x_a != 0) }
} }
// record the better quantized value // record the better quantized value
......
...@@ -126,8 +126,16 @@ static INLINE tran_low_t qcoeff_to_dqcoeff(tran_low_t qc, int dqv, int shift) { ...@@ -126,8 +126,16 @@ static INLINE tran_low_t qcoeff_to_dqcoeff(tran_low_t qc, int dqv, int shift) {
static INLINE int64_t get_coeff_dist(tran_low_t tcoeff, tran_low_t dqcoeff, static INLINE int64_t get_coeff_dist(tran_low_t tcoeff, tran_low_t dqcoeff,
int shift) { int shift) {
#if CONFIG_DAALA_TX
int depth_shift = (TX_COEFF_DEPTH - 11) * 2;
int depth_round = depth_shift > 1 ? (1 << (depth_shift - 1)) : 0;
const int64_t diff = tcoeff - dqcoeff;
const int64_t error = diff * diff + depth_round >> depth_shift;
(void)shift;
#else
const int64_t diff = (tcoeff - dqcoeff) * (1 << shift); const int64_t diff = (tcoeff - dqcoeff) * (1 << shift);
const int64_t error = diff * diff; const int64_t error = diff * diff;
#endif
return error; return error;
} }
......
...@@ -1588,7 +1588,12 @@ int64_t av1_highbd_block_error_c(const tran_low_t *coeff, ...@@ -1588,7 +1588,12 @@ int64_t av1_highbd_block_error_c(const tran_low_t *coeff,
int64_t *ssz, int bd) { int64_t *ssz, int bd) {
int i; int i;
int64_t error = 0, sqcoeff = 0; int64_t error = 0, sqcoeff = 0;
#if CONFIG_DAALA_TX
(void)bd;
int shift = 2 * (TX_COEFF_DEPTH - 11);
#else
int shift = 2 * (bd - 8); int shift = 2 * (bd - 8);
#endif
int rounding = shift > 0 ? 1 << (shift - 1) : 0; int rounding = shift > 0 ? 1 << (shift - 1) : 0;
for (i = 0; i < block_size; i++) { for (i = 0; i < block_size; i++) {
...@@ -1926,7 +1931,13 @@ void av1_dist_block(const AV1_COMP *cpi, MACROBLOCK *x, int plane, ...@@ -1926,7 +1931,13 @@ void av1_dist_block(const AV1_COMP *cpi, MACROBLOCK *x, int plane,
// not involve an inverse transform, but it is less accurate. // not involve an inverse transform, but it is less accurate.
const int buffer_length = tx_size_2d[tx_size]; const int buffer_length = tx_size_2d[tx_size];
int64_t this_sse; int64_t this_sse;
// TX-domain results need to shift down to Q2/D10 to match pixel
// domain distortion values which are in Q2^2
#if CONFIG_DAALA_TX
int shift = (TX_COEFF_DEPTH - 10) * 2;
#else
int shift = (MAX_TX_SCALE - av1_get_tx_scale(tx_size)) * 2; int shift = (MAX_TX_SCALE - av1_get_tx_scale(tx_size)) * 2;
#endif
tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block); tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
...@@ -2106,7 +2117,13 @@ static void block_rd_txfm(int plane, int block, int blk_row, int blk_col, ...@@ -2106,7 +2117,13 @@ static void block_rd_txfm(int plane, int block, int blk_row, int blk_col,
av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size, av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
coeff_ctx, AV1_XFORM_QUANT_FP); coeff_ctx, AV1_XFORM_QUANT_FP);
// TX-domain results need to shift down to Q2/D10 to match pixel
// domain distortion values which are in Q2^2
#if CONFIG_DAALA_TX
const int shift = (TX_COEFF_DEPTH - 10) * 2;
#else
const int shift = (MAX_TX_SCALE - av1_get_tx_scale(tx_size)) * 2; const int shift = (MAX_TX_SCALE - av1_get_tx_scale(tx_size)) * 2;
#endif
tran_low_t *const coeff = BLOCK_OFFSET(x->plane[plane].coeff, block); tran_low_t *const coeff = BLOCK_OFFSET(x->plane[plane].coeff, block);
tran_low_t *const dqcoeff = BLOCK_OFFSET(xd->plane[plane].dqcoeff, block); tran_low_t *const dqcoeff = BLOCK_OFFSET(xd->plane[plane].dqcoeff, block);
const int buffer_length = tx_size_2d[tx_size]; const int buffer_length = tx_size_2d[tx_size];
...@@ -3658,6 +3675,7 @@ void av1_tx_block_rd_b(const AV1_COMP *cpi, MACROBLOCK *x, TX_SIZE tx_size, ...@@ -3658,6 +3675,7 @@ void av1_tx_block_rd_b(const AV1_COMP *cpi, MACROBLOCK *x, TX_SIZE tx_size,
const int coeff_ctx_one_byte = combine_entropy_contexts(*a, *l); const int coeff_ctx_one_byte = combine_entropy_contexts(*a, *l);
const uint8_t cur_joint_ctx = (coeff_ctx << 2) + coeff_ctx_one_byte; const uint8_t cur_joint_ctx = (coeff_ctx << 2) + coeff_ctx_one_byte;
// Note: tmp below is pixel distortion, not TX domain
tmp = pixel_diff_dist(x, plane, diff, diff_stride, blk_row, blk_col, tmp = pixel_diff_dist(x, plane, diff, diff_stride, blk_row, blk_col,
plane_bsize, txm_bsize); plane_bsize, txm_bsize);
...@@ -3714,7 +3732,13 @@ void av1_tx_block_rd_b(const AV1_COMP *cpi, MACROBLOCK *x, TX_SIZE tx_size, ...@@ -3714,7 +3732,13 @@ void av1_tx_block_rd_b(const AV1_COMP *cpi, MACROBLOCK *x, TX_SIZE tx_size,
av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size, av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
coeff_ctx, AV1_XFORM_QUANT_FP); coeff_ctx, AV1_XFORM_QUANT_FP);
// TX-domain results need to shift down to Q2/D10 to match pixel
// domain distortion values which are in Q2^2
#if CONFIG_DAALA_TX
const int shift = (TX_COEFF_DEPTH - 10) * 2;
#else
const int shift = (MAX_TX_SCALE - av1_get_tx_scale(tx_size)) * 2; const int shift = (MAX_TX_SCALE - av1_get_tx_scale(tx_size)) * 2;
#endif
tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block); tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
const int buffer_length = tx_size_2d[tx_size]; const int buffer_length = tx_size_2d[tx_size];
int64_t tmp_dist, tmp_sse; int64_t tmp_dist, tmp_sse;
......
...@@ -156,7 +156,7 @@ TEST_P(ErrorBlockTest, ExtremeValues) { ...@@ -156,7 +156,7 @@ TEST_P(ErrorBlockTest, ExtremeValues) {
<< "First failed at test case " << first_failure; << "First failed at test case " << first_failure;
} }
#if HAVE_SSE2 || HAVE_AVX #if (HAVE_SSE2 || HAVE_AVX) && !CONFIG_DAALA_TX
using std::tr1::make_tuple; using std::tr1::make_tuple;
INSTANTIATE_TEST_CASE_P( INSTANTIATE_TEST_CASE_P(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment