Commit 4a05a58c authored by Monty Montgomery's avatar Monty Montgomery Committed by Christopher Montgomery

Modify RDO for use with Daala TX constant-depth coeffs

Modify the portions of RDO using TX-domain coeff calaculations to deal
with TX_COEFF_DEPTH and constant-depth coefficient scaling.  At
present, this represents no functional change.

subset-1:
monty-rest-of-stack-quant-s1@2017-11-13T14:38:43.774Z ->
 monty-rest-of-stack-RDO-s1@2017-11-13T14:39:17.093Z

  PSNR | PSNR Cb | PSNR Cr | PSNR HVS |   SSIM | MS SSIM | CIEDE 2000
0.0000 |  0.0000 |  0.0000 |   0.0000 | 0.0000 |  0.0000 |     0.0000

objective-1-fast --limit=4:
monty-rest-of-stack-quant-o1f4@2017-11-13T14:38:28.828Z ->
 monty-rest-of-stack-RDO-o1f4@2017-11-13T14:38:57.951Z

  PSNR | PSNR Cb | PSNR Cr | PSNR HVS |   SSIM | MS SSIM | CIEDE 2000
0.0000 |  0.0000 |  0.0000 |   0.0000 | 0.0000 |  0.0000 |     0.0000

Change-Id: I0fbc45e018f565f48e1fc8fdeabfcd6cb6fa62fe
parent 3fe369c8
......@@ -445,7 +445,9 @@ if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
}
add_proto qw/int64_t av1_highbd_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz, int bd";
specialize qw/av1_highbd_block_error sse2/;
if (aom_config("CONFIG_DAALA_TX") ne "yes") {
specialize qw/av1_highbd_block_error sse2/;
}
add_proto qw/void av1_highbd_temporal_filter_apply/, "uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count";
......
......@@ -147,7 +147,14 @@ static int optimize_b_greedy(const AV1_COMMON *cm, MACROBLOCK *mb, int plane,
get_scan(cm, tx_size, tx_type, &xd->mi[0]->mbmi);
const int16_t *const scan = scan_order->scan;
const int16_t *const nb = scan_order->neighbors;
#if CONFIG_DAALA_TX
// This is one of the few places where RDO is done on coeffs; it
// expects the coeffs to be in Q3/D11, so we need to scale them.
int depth_shift = (TX_COEFF_DEPTH - 11) * 2;
int depth_round = depth_shift > 1 ? (1 << depth_shift >> 1) : 0;
#else
const int shift = av1_get_tx_scale(tx_size);
#endif
#if CONFIG_AOM_QM
int seg_id = xd->mi[0]->mbmi.segment_id;
// Use a flat matrix (i.e. no weighting) for 1D and Identity transforms
......@@ -212,14 +219,19 @@ static int optimize_b_greedy(const AV1_COMMON *cm, MACROBLOCK *mb, int plane,
tail_token_costs[band_cur][ctx_cur]);
// accu_error does not change when x==0
} else {
/* Computing distortion
*/
// compute the distortion for the first candidate
// and the distortion for quantizing to 0.
/* Computing distortion
*/
// compute the distortion for the first candidate
// and the distortion for quantizing to 0.
#if CONFIG_DAALA_TX
int dx0 = coeff[rc];
const int64_t d0 = ((int64_t)dx0 * dx0 + depth_round) >> depth_shift;
#else
int dx0 = abs(coeff[rc]) * (1 << shift);
dx0 >>= xd->bd - 8;
const int64_t d0 = (int64_t)dx0 * dx0;
#endif
const int x_a = x - 2 * sz - 1;
int dqv;
#if CONFIG_AOM_QM
......@@ -233,15 +245,29 @@ static int optimize_b_greedy(const AV1_COMMON *cm, MACROBLOCK *mb, int plane,
dqv = dequant_ptr[rc != 0];
#endif
#if CONFIG_DAALA_TX
int dx = dqcoeff[rc] - coeff[rc];
const int64_t d2 = ((int64_t)dx * dx + depth_round) >> depth_shift;
#else
int dx = (dqcoeff[rc] - coeff[rc]) * (1 << shift);
dx = signed_shift_right(dx, xd->bd - 8);
const int64_t d2 = (int64_t)dx * dx;
#endif
/* compute the distortion for the second candidate
* x_a = x - 2 * sz + 1;
*/
int64_t d2_a;
if (x_a != 0) {
#if CONFIG_DAALA_TX
#if CONFIG_NEW_QUANT
dx = av1_dequant_coeff_nuq(x, dqv, dequant_val[band_translate[i]]) -
coeff[rc];
#else // CONFIG_NEW_QUANT
dx -= (dqv + sz) ^ sz;
#endif // CONFIG_NEW_QUANT
d2_a = ((int64_t)dx * dx + depth_round) >> depth_shift;
#else // CONFIG_DAALA_TX
#if CONFIG_NEW_QUANT
dx = av1_dequant_coeff_nuq(x, dqv, dequant_val[band_translate[i]]) -
(coeff[rc] * (1 << shift));
......@@ -250,9 +276,11 @@ static int optimize_b_greedy(const AV1_COMMON *cm, MACROBLOCK *mb, int plane,
dx -= ((dqv >> (xd->bd - 8)) + sz) ^ sz;
#endif // CONFIG_NEW_QUANT
d2_a = (int64_t)dx * dx;
#endif // CONFIG_DAALA_TX
} else {
d2_a = d0;
}
// Computing RD cost
int64_t base_bits;
// rate cost of x
......@@ -321,6 +349,15 @@ static int optimize_b_greedy(const AV1_COMMON *cm, MACROBLOCK *mb, int plane,
int dqc_a = 0;
if (best_x || best_eob_x) {
if (x_a != 0) {
#if CONFIG_DAALA_TX
#if CONFIG_NEW_QUANT
dqc_a = av1_dequant_abscoeff_nuq(abs(x_a), dqv,
dequant_val[band_translate[i]]);
if (sz) dqc_a = -dqc_a;
#else
dqc_a = x_a * dqv;
#endif // CONFIG_NEW_QUANT
#else // CONFIG_DAALA_TX
#if CONFIG_NEW_QUANT
dqc_a = av1_dequant_abscoeff_nuq(abs(x_a), dqv,
dequant_val[band_translate[i]]);
......@@ -332,9 +369,10 @@ static int optimize_b_greedy(const AV1_COMMON *cm, MACROBLOCK *mb, int plane,
else
dqc_a = (x_a * dqv) >> shift;
#endif // CONFIG_NEW_QUANT
#endif // CONFIG_DAALA_TX
} else {
dqc_a = 0;
} // if (x_a != 0)
}
}
// record the better quantized value
......
......@@ -126,8 +126,16 @@ static INLINE tran_low_t qcoeff_to_dqcoeff(tran_low_t qc, int dqv, int shift) {
static INLINE int64_t get_coeff_dist(tran_low_t tcoeff, tran_low_t dqcoeff,
int shift) {
#if CONFIG_DAALA_TX
int depth_shift = (TX_COEFF_DEPTH - 11) * 2;
int depth_round = depth_shift > 1 ? (1 << (depth_shift - 1)) : 0;
const int64_t diff = tcoeff - dqcoeff;
const int64_t error = diff * diff + depth_round >> depth_shift;
(void)shift;
#else
const int64_t diff = (tcoeff - dqcoeff) * (1 << shift);
const int64_t error = diff * diff;
#endif
return error;
}
......
......@@ -1588,7 +1588,12 @@ int64_t av1_highbd_block_error_c(const tran_low_t *coeff,
int64_t *ssz, int bd) {
int i;
int64_t error = 0, sqcoeff = 0;
#if CONFIG_DAALA_TX
(void)bd;
int shift = 2 * (TX_COEFF_DEPTH - 11);
#else
int shift = 2 * (bd - 8);
#endif
int rounding = shift > 0 ? 1 << (shift - 1) : 0;
for (i = 0; i < block_size; i++) {
......@@ -1926,7 +1931,13 @@ void av1_dist_block(const AV1_COMP *cpi, MACROBLOCK *x, int plane,
// not involve an inverse transform, but it is less accurate.
const int buffer_length = tx_size_2d[tx_size];
int64_t this_sse;
// TX-domain results need to shift down to Q2/D10 to match pixel
// domain distortion values which are in Q2^2
#if CONFIG_DAALA_TX
int shift = (TX_COEFF_DEPTH - 10) * 2;
#else
int shift = (MAX_TX_SCALE - av1_get_tx_scale(tx_size)) * 2;
#endif
tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
......@@ -2106,7 +2117,13 @@ static void block_rd_txfm(int plane, int block, int blk_row, int blk_col,
av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
coeff_ctx, AV1_XFORM_QUANT_FP);
// TX-domain results need to shift down to Q2/D10 to match pixel
// domain distortion values which are in Q2^2
#if CONFIG_DAALA_TX
const int shift = (TX_COEFF_DEPTH - 10) * 2;
#else
const int shift = (MAX_TX_SCALE - av1_get_tx_scale(tx_size)) * 2;
#endif
tran_low_t *const coeff = BLOCK_OFFSET(x->plane[plane].coeff, block);
tran_low_t *const dqcoeff = BLOCK_OFFSET(xd->plane[plane].dqcoeff, block);
const int buffer_length = tx_size_2d[tx_size];
......@@ -3658,6 +3675,7 @@ void av1_tx_block_rd_b(const AV1_COMP *cpi, MACROBLOCK *x, TX_SIZE tx_size,
const int coeff_ctx_one_byte = combine_entropy_contexts(*a, *l);
const uint8_t cur_joint_ctx = (coeff_ctx << 2) + coeff_ctx_one_byte;
// Note: tmp below is pixel distortion, not TX domain
tmp = pixel_diff_dist(x, plane, diff, diff_stride, blk_row, blk_col,
plane_bsize, txm_bsize);
......@@ -3714,7 +3732,13 @@ void av1_tx_block_rd_b(const AV1_COMP *cpi, MACROBLOCK *x, TX_SIZE tx_size,
av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
coeff_ctx, AV1_XFORM_QUANT_FP);
// TX-domain results need to shift down to Q2/D10 to match pixel
// domain distortion values which are in Q2^2
#if CONFIG_DAALA_TX
const int shift = (TX_COEFF_DEPTH - 10) * 2;
#else
const int shift = (MAX_TX_SCALE - av1_get_tx_scale(tx_size)) * 2;
#endif
tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
const int buffer_length = tx_size_2d[tx_size];
int64_t tmp_dist, tmp_sse;
......
......@@ -156,7 +156,7 @@ TEST_P(ErrorBlockTest, ExtremeValues) {
<< "First failed at test case " << first_failure;
}
#if HAVE_SSE2 || HAVE_AVX
#if (HAVE_SSE2 || HAVE_AVX) && !CONFIG_DAALA_TX
using std::tr1::make_tuple;
INSTANTIATE_TEST_CASE_P(
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment