Commit 60f2a229 authored by Monty Montgomery's avatar Monty Montgomery Committed by Christopher Montgomery
Browse files

Add Daala TX fixed-coeff-depth capability to quantization

This patch completes the work to add fixed-depth TX domain support to
the quantization and dequantization code.  At present, it is active but
configured to behave identically to current AV1 master as RDO and TX
have not yet been updated to also support this functionality.

subset-1:
monty-rest-of-stack-noshift-s1@2017-11-13T14:37:42.541Z ->
 monty-rest-of-stack-quant-s1@2017-11-13T14:38:43.774Z

  PSNR | PSNR Cb | PSNR Cr | PSNR HVS |   SSIM | MS SSIM | CIEDE 2000
0.0000 |  0.0000 |  0.0000 |   0.0000 | 0.0000 |  0.0000 |     0.0000

objective-1-fast --limit=4:
monty-rest-of-stack-noshift-o1f4@2017-11-13T14:37:16.992Z ->
 monty-rest-of-stack-quant-o1f4@2017-11-13T14:38:28.828Z

  PSNR | PSNR Cb | PSNR Cr | PSNR HVS |   SSIM | MS SSIM | CIEDE 2000
0.0000 |  0.0000 |  0.0000 |   0.0000 | 0.0000 |  0.0000 |     0.0000

Change-Id: I3773a1fc128136c9fea227f4b547576a8aa6efa3
parent 605d63f3
......@@ -65,7 +65,11 @@ extern "C" {
/**The maximum number of color planes allowed in a single frame.*/
# define OD_NPLANES_MAX (3)
# define OD_COEFF_SHIFT (4)
/* Native coefficient 'bitdepth'; TX is scaled up by (TX_COEFF_DEPTH-bitdepth)
such that the real coefficient depth precision is always TX_CEOFF_DEPTH
regardless of bitdepth or transform size.
*/
# define TX_COEFF_DEPTH (11)
# define OD_DISABLE_CFL (1)
# define OD_DISABLE_FILTER (1)
......
......@@ -120,7 +120,7 @@ tran_low_t av1_dequant_coeff_nuq(int v, int q, const tran_low_t *dq) {
}
#endif // CONFIG_NEW_QUANT
 
static const int16_t dc_qlookup[QINDEX_RANGE] = {
static const int16_t dc_qlookup_Q3[QINDEX_RANGE] = {
4, 8, 8, 9, 10, 11, 12, 12, 13, 14, 15, 16, 17, 18,
19, 19, 20, 21, 22, 23, 24, 25, 26, 26, 27, 28, 29, 30,
31, 32, 32, 33, 34, 35, 36, 37, 38, 38, 39, 40, 41, 42,
......@@ -143,7 +143,7 @@ static const int16_t dc_qlookup[QINDEX_RANGE] = {
};
 
#if CONFIG_HIGHBITDEPTH
static const int16_t dc_qlookup_10[QINDEX_RANGE] = {
static const int16_t dc_qlookup_10_Q3[QINDEX_RANGE] = {
4, 9, 10, 13, 15, 17, 20, 22, 25, 28, 31, 34, 37,
40, 43, 47, 50, 53, 57, 60, 64, 68, 71, 75, 78, 82,
86, 90, 93, 97, 101, 105, 109, 113, 116, 120, 124, 128, 132,
......@@ -166,7 +166,7 @@ static const int16_t dc_qlookup_10[QINDEX_RANGE] = {
3953, 4089, 4236, 4394, 4559, 4737, 4929, 5130, 5347,
};
 
static const int16_t dc_qlookup_12[QINDEX_RANGE] = {
static const int16_t dc_qlookup_12_Q3[QINDEX_RANGE] = {
4, 12, 18, 25, 33, 41, 50, 60, 70, 80, 91,
103, 115, 127, 140, 153, 166, 180, 194, 208, 222, 237,
251, 266, 281, 296, 312, 327, 343, 358, 374, 390, 405,
......@@ -194,7 +194,7 @@ static const int16_t dc_qlookup_12[QINDEX_RANGE] = {
};
#endif
 
static const int16_t ac_qlookup[QINDEX_RANGE] = {
static const int16_t ac_qlookup_Q3[QINDEX_RANGE] = {
4, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
......@@ -218,7 +218,7 @@ static const int16_t ac_qlookup[QINDEX_RANGE] = {
};
 
#if CONFIG_HIGHBITDEPTH
static const int16_t ac_qlookup_10[QINDEX_RANGE] = {
static const int16_t ac_qlookup_10_Q3[QINDEX_RANGE] = {
4, 9, 11, 13, 16, 18, 21, 24, 27, 30, 33, 37, 40,
44, 48, 51, 55, 59, 63, 67, 71, 75, 79, 83, 88, 92,
96, 100, 105, 109, 114, 118, 122, 127, 131, 136, 140, 145, 149,
......@@ -241,7 +241,7 @@ static const int16_t ac_qlookup_10[QINDEX_RANGE] = {
6268, 6388, 6512, 6640, 6768, 6900, 7036, 7172, 7312,
};
 
static const int16_t ac_qlookup_12[QINDEX_RANGE] = {
static const int16_t ac_qlookup_12_Q3[QINDEX_RANGE] = {
4, 13, 19, 27, 35, 44, 54, 64, 75, 87, 99,
112, 126, 139, 154, 168, 183, 199, 214, 230, 247, 263,
280, 297, 314, 331, 349, 366, 384, 402, 420, 438, 456,
......@@ -269,52 +269,90 @@ static const int16_t ac_qlookup_12[QINDEX_RANGE] = {
};
#endif
 
int16_t av1_dc_quant(int qindex, int delta, aom_bit_depth_t bit_depth) {
#if !CONFIG_DAALA_TX
// Coefficient scaling and quantization with AV1 TX are tailored to
// the AV1 TX transforms. Regardless of the bit-depth of the input,
// the transform stages scale the coefficient values up by a factor of
// 8 (3 bits) over the scale of the pixel values. Thus, for 8-bit
// input, the coefficients have effectively 11 bits of scale depth
// (8+3), 10-bit input pixels result in 13-bit coefficient depth
// (10+3) and 12-bit pixels yield 15-bit (12+3) coefficient depth.
// All quantizers are built using this invariant of x8, 3-bit scaling,
// thus the Q3 suffix.
// A partial exception to this rule is large transforms; to avoid
// overflow, TX blocks with > 256 pels (>16x16) are scaled only
// 4-times unity (2 bits) over the pixel depth, and TX blocks with
// over 1024 pixels (>32x32) are scaled up only 2x unity (1 bit).
// This descaling is found via av1_tx_get_scale(). Thus, 16x32, 32x16
// and 32x32 transforms actually return Q2 coefficients, and 32x64,
// 64x32 and 64x64 transforms return Q1 coefficients. However, the
// quantizers are de-scaled down on-the-fly by the same amount
// (av1_tx_get_scale()) during quantization, and as such the
// quantized/coded coefficients, even for large TX blocks, are always
// effectively Q3.
// Note that encoder decision making (which uses the quantizer to
// generate several bespoke lamdas for RDO and other heuristics)
// expects quantizers to be larger for higher-bitdepth input. In
// addition, the minimum allowable quantizer is 4; smaller values will
// underflow to 0 in the actual quantization routines.
int16_t av1_dc_quant_Q3(int qindex, int delta, aom_bit_depth_t bit_depth) {
#if CONFIG_HIGHBITDEPTH
switch (bit_depth) {
case AOM_BITS_8: return dc_qlookup[clamp(qindex + delta, 0, MAXQ)];
case AOM_BITS_10: return dc_qlookup_10[clamp(qindex + delta, 0, MAXQ)];
case AOM_BITS_12: return dc_qlookup_12[clamp(qindex + delta, 0, MAXQ)];
case AOM_BITS_8: return dc_qlookup_Q3[clamp(qindex + delta, 0, MAXQ)];
case AOM_BITS_10: return dc_qlookup_10_Q3[clamp(qindex + delta, 0, MAXQ)];
case AOM_BITS_12: return dc_qlookup_12_Q3[clamp(qindex + delta, 0, MAXQ)];
default:
assert(0 && "bit_depth should be AOM_BITS_8, AOM_BITS_10 or AOM_BITS_12");
return -1;
}
#else
(void)bit_depth;
return dc_qlookup[clamp(qindex + delta, 0, MAXQ)];
return dc_qlookup_Q3[clamp(qindex + delta, 0, MAXQ)];
#endif
}
 
int16_t av1_ac_quant(int qindex, int delta, aom_bit_depth_t bit_depth) {
int16_t av1_ac_quant_Q3(int qindex, int delta, aom_bit_depth_t bit_depth) {
#if CONFIG_HIGHBITDEPTH
switch (bit_depth) {
case AOM_BITS_8: return ac_qlookup[clamp(qindex + delta, 0, MAXQ)];
case AOM_BITS_10: return ac_qlookup_10[clamp(qindex + delta, 0, MAXQ)];
case AOM_BITS_12: return ac_qlookup_12[clamp(qindex + delta, 0, MAXQ)];
case AOM_BITS_8: return ac_qlookup_Q3[clamp(qindex + delta, 0, MAXQ)];
case AOM_BITS_10: return ac_qlookup_10_Q3[clamp(qindex + delta, 0, MAXQ)];
case AOM_BITS_12: return ac_qlookup_12_Q3[clamp(qindex + delta, 0, MAXQ)];
default:
assert(0 && "bit_depth should be AOM_BITS_8, AOM_BITS_10 or AOM_BITS_12");
return -1;
}
#else
(void)bit_depth;
return ac_qlookup[clamp(qindex + delta, 0, MAXQ)];
return ac_qlookup_Q3[clamp(qindex + delta, 0, MAXQ)];
#endif
}
 
int16_t av1_qindex_from_ac(int ac, aom_bit_depth_t bit_depth) {
// In AV1 TX, the coefficients are always scaled up a factor of 8 (3
// bits), so QTX == Q3.
int16_t av1_dc_quant_QTX(int qindex, int delta, aom_bit_depth_t bit_depth) {
return av1_dc_quant_Q3(qindex, delta, bit_depth);
}
int16_t av1_ac_quant_QTX(int qindex, int delta, aom_bit_depth_t bit_depth) {
return av1_ac_quant_Q3(qindex, delta, bit_depth);
}
int16_t av1_qindex_from_ac_Q3(int ac_Q3, aom_bit_depth_t bit_depth) {
int i;
const int16_t *tab = ac_qlookup;
ac *= 4;
const int16_t *tab = ac_qlookup_Q3;
#if CONFIG_HIGHBITDEPTH
switch (bit_depth) {
case AOM_BITS_10: {
tab = ac_qlookup_10;
ac *= 4;
tab = ac_qlookup_10_Q3;
break;
}
case AOM_BITS_12: {
tab = ac_qlookup_12;
ac *= 16;
tab = ac_qlookup_12_Q3;
break;
}
default:
......@@ -324,10 +362,87 @@ int16_t av1_qindex_from_ac(int ac, aom_bit_depth_t bit_depth) {
#endif
(void)bit_depth;
for (i = 0; i < QINDEX_RANGE; i++) {
if (ac <= tab[i]) return i;
if (ac_Q3 <= tab[i]) return i;
}
return QINDEX_RANGE - 1;
}
#else // CONFIG_DAALA_TX
// Daala TX uses a constant effective coefficient depth
// (TX_COEFF_DEPTH) regardless of input pixel bitdepth or transform
// size. This means that coefficient scale and range is identical
// regardless of the bit depth of the pixel input. However, the
// existing encoder heuristics and RDO loop were built expecting a
// quantizer that scales with bitdepth, treating it more as a
// proto-lambda than a quantizer. The assumption that quantizer scale
// increases with bitdepth is spread throughout the encoder.
// For this reason, we need to be able to find an old-style 'Q3'
// quantizer that scales with pixel depth (to be used in encoder
// decision making) as well as the literal quantizer that is used in
// actual quantization/dequantization. That is centralized here.
// Right now, the existing quantization code and setup are not
// particularly well suited to Daala TX. The scale range used by, eg,
// the 12 bit lookups is intentionally larger in order to provide more
// fine control at the top end of the quality range, as 12-bit input
// would be assumed to offer a lower noise floor than an 8-bit input.
// However, the 12-bit lookups assume an effective 15-bit TX depth,
// while we intend to run Daala TX somewhere between 12 and 14. We
// can't simply scale it down, because this would violate the minimum
// allowable quantizer in the current code (4).
// As such, we do the simplest thing for the time being: Always use
// the 8-bit scale range for all inputs and scale the QTX and Q3
// returns accordingly, which will always be no-ops or upshifts. This
// might well work well enough; if not, we'll need to patch quantizer
// scaling to extend the high-bitdepth quality range upward at some
// later date.
int16_t av1_dc_quant_Q3(int qindex, int delta, aom_bit_depth_t bit_depth) {
assert(bit_depth >= 8);
return qindex == 0 ? dc_qlookup_Q3[0]
: // Do not scale lossless
dc_qlookup_Q3[clamp(qindex + delta, 0, MAXQ)] *
(1 << (bit_depth - 8));
}
int16_t av1_ac_quant_Q3(int qindex, int delta, aom_bit_depth_t bit_depth) {
assert(bit_depth >= 8);
return qindex == 0 ? ac_qlookup_Q3[0]
: // Do not scale lossless
ac_qlookup_Q3[clamp(qindex + delta, 0, MAXQ)] *
(1 << (bit_depth - 8));
}
int16_t av1_dc_quant_QTX(int qindex, int delta, aom_bit_depth_t bit_depth) {
(void)bit_depth;
return qindex == 0 ? dc_qlookup_Q3[0]
: // Do not scale lossless
dc_qlookup_Q3[clamp(qindex + delta, 0, MAXQ)] *
(1 << (TX_COEFF_DEPTH - 11));
}
int16_t av1_ac_quant_QTX(int qindex, int delta, aom_bit_depth_t bit_depth) {
(void)bit_depth;
return qindex == 0 ? ac_qlookup_Q3[0]
: // Do not scale lossless
ac_qlookup_Q3[clamp(qindex + delta, 0, MAXQ)] *
(1 << (TX_COEFF_DEPTH - 11));
}
int16_t av1_qindex_from_ac_Q3(int ac_QTX, aom_bit_depth_t bit_depth) {
int i;
const int16_t *tab = ac_qlookup_Q3;
int scale = (1 << (TX_COEFF_DEPTH - 11));
(void)bit_depth;
for (i = 0; i < QINDEX_RANGE; i++) {
if (ac_QTX <= tab[i] * scale) return i;
}
return QINDEX_RANGE - 1;
}
#endif // !CONFIG_DAALA_TX
 
int av1_get_qindex(const struct segmentation *seg, int segment_id,
#if CONFIG_Q_SEGMENTATION
......
......@@ -37,9 +37,11 @@ extern "C" {
struct AV1Common;
int16_t av1_dc_quant(int qindex, int delta, aom_bit_depth_t bit_depth);
int16_t av1_ac_quant(int qindex, int delta, aom_bit_depth_t bit_depth);
int16_t av1_qindex_from_ac(int ac, aom_bit_depth_t bit_depth);
int16_t av1_dc_quant_Q3(int qindex, int delta, aom_bit_depth_t bit_depth);
int16_t av1_ac_quant_Q3(int qindex, int delta, aom_bit_depth_t bit_depth);
int16_t av1_dc_quant_QTX(int qindex, int delta, aom_bit_depth_t bit_depth);
int16_t av1_ac_quant_QTX(int qindex, int delta, aom_bit_depth_t bit_depth);
int16_t av1_qindex_from_ac_Q3(int ac_Q3, aom_bit_depth_t bit_depth);
int av1_get_qindex(const struct segmentation *seg, int segment_id,
#if CONFIG_Q_SEGMENTATION
......
......@@ -394,14 +394,6 @@ static void decode_mbmi_block(AV1Decoder *const pbi, MACROBLOCKD *const xd,
aom_merge_corrupted_flag(&xd->corrupted, reader_corrupted_flag);
}
// Converts a Q3 quantizer lookup from static configuration to the
// actual TX scaling in use
static int dequant_Q3_to_QTX(int q3, int bd) {
// Right now, TX scale in use is still Q3
(void)bd;
return q3;
}
static void decode_token_and_recon_block(AV1Decoder *const pbi,
MACROBLOCKD *const xd, int mi_row,
int mi_col, aom_reader *r,
......@@ -439,12 +431,10 @@ static void decode_token_and_recon_block(AV1Decoder *const pbi,
: (j == 1 ? cm->u_dc_delta_q : cm->v_dc_delta_q);
const int ac_delta_q =
j == 0 ? 0 : (j == 1 ? cm->u_ac_delta_q : cm->v_ac_delta_q);
xd->plane[j].seg_dequant_QTX[i][0] = dequant_Q3_to_QTX(
av1_dc_quant(current_qindex, dc_delta_q, cm->bit_depth),
cm->bit_depth);
xd->plane[j].seg_dequant_QTX[i][1] = dequant_Q3_to_QTX(
av1_ac_quant(current_qindex, ac_delta_q, cm->bit_depth),
cm->bit_depth);
xd->plane[j].seg_dequant_QTX[i][0] =
av1_dc_quant_QTX(current_qindex, dc_delta_q, cm->bit_depth);
xd->plane[j].seg_dequant_QTX[i][1] =
av1_ac_quant_QTX(current_qindex, ac_delta_q, cm->bit_depth);
}
}
}
......@@ -1329,18 +1319,17 @@ static void setup_segmentation_dequant(AV1_COMMON *const cm) {
#else
const int qindex = av1_get_qindex(&cm->seg, i, cm->base_qindex);
#endif
cm->y_dequant_QTX[i][0] = dequant_Q3_to_QTX(
av1_dc_quant(qindex, cm->y_dc_delta_q, cm->bit_depth), cm->bit_depth);
cm->y_dequant_QTX[i][1] = dequant_Q3_to_QTX(
av1_ac_quant(qindex, 0, cm->bit_depth), cm->bit_depth);
cm->u_dequant_QTX[i][0] = dequant_Q3_to_QTX(
av1_dc_quant(qindex, cm->u_dc_delta_q, cm->bit_depth), cm->bit_depth);
cm->u_dequant_QTX[i][1] = dequant_Q3_to_QTX(
av1_ac_quant(qindex, cm->u_ac_delta_q, cm->bit_depth), cm->bit_depth);
cm->v_dequant_QTX[i][0] = dequant_Q3_to_QTX(
av1_dc_quant(qindex, cm->v_dc_delta_q, cm->bit_depth), cm->bit_depth);
cm->v_dequant_QTX[i][1] = dequant_Q3_to_QTX(
av1_ac_quant(qindex, cm->v_ac_delta_q, cm->bit_depth), cm->bit_depth);
cm->y_dequant_QTX[i][0] =
av1_dc_quant_QTX(qindex, cm->y_dc_delta_q, cm->bit_depth);
cm->y_dequant_QTX[i][1] = av1_ac_quant_QTX(qindex, 0, cm->bit_depth);
cm->u_dequant_QTX[i][0] =
av1_dc_quant_QTX(qindex, cm->u_dc_delta_q, cm->bit_depth);
cm->u_dequant_QTX[i][1] =
av1_ac_quant_QTX(qindex, cm->u_ac_delta_q, cm->bit_depth);
cm->v_dequant_QTX[i][0] =
av1_dc_quant_QTX(qindex, cm->v_dc_delta_q, cm->bit_depth);
cm->v_dequant_QTX[i][1] =
av1_ac_quant_QTX(qindex, cm->v_ac_delta_q, cm->bit_depth);
#if CONFIG_AOM_QM
const int lossless = qindex == 0 && cm->y_dc_delta_q == 0 &&
cm->u_dc_delta_q == 0 && cm->u_ac_delta_q == 0 &&
......
......@@ -43,7 +43,7 @@ static const double aq_c_var_thresholds[AQ_C_STRENGTHS][AQ_C_SEGMENTS] = {
static int get_aq_c_strength(int q_index, aom_bit_depth_t bit_depth) {
// Approximate base quatizer (truncated to int)
const int base_quant = av1_ac_quant(q_index, 0, bit_depth) / 4;
const int base_quant = av1_ac_quant_Q3(q_index, 0, bit_depth) / 4;
return (base_quant > 10) + (base_quant > 25);
}
......
......@@ -1516,7 +1516,7 @@ static void invert_quant(int16_t *quant, int16_t *shift, int d) {
}
static int get_qzbin_factor(int q, aom_bit_depth_t bit_depth) {
const int quant = av1_dc_quant(q, 0, bit_depth);
const int quant = av1_dc_quant_Q3(q, 0, bit_depth);
#if CONFIG_HIGHBITDEPTH
switch (bit_depth) {
case AOM_BITS_8: return q == 0 ? 64 : (quant < 148 ? 84 : 80);
......@@ -1545,10 +1545,11 @@ void av1_build_quantizer(aom_bit_depth_t bit_depth, int y_dc_delta_q,
for (i = 0; i < 2; ++i) {
int qrounding_factor_fp = 64;
// y quantizer setup with original coeff shift of Q3
quant_Q3 = i == 0 ? av1_dc_quant(q, y_dc_delta_q, bit_depth)
: av1_ac_quant(q, 0, bit_depth);
// y quantizer with TX scale; right now, it's still Q3 as above;
quant_QTX = quant_Q3;
quant_Q3 = i == 0 ? av1_dc_quant_Q3(q, y_dc_delta_q, bit_depth)
: av1_ac_quant_Q3(q, 0, bit_depth);
// y quantizer with TX scale
quant_QTX = i == 0 ? av1_dc_quant_QTX(q, y_dc_delta_q, bit_depth)
: av1_ac_quant_QTX(q, 0, bit_depth);
invert_quant(&quants->y_quant[q][i], &quants->y_quant_shift[q][i],
quant_QTX);
quants->y_quant_fp[q][i] = (1 << 16) / quant_QTX;
......@@ -1559,10 +1560,11 @@ void av1_build_quantizer(aom_bit_depth_t bit_depth, int y_dc_delta_q,
deq->y_dequant_Q3[q][i] = quant_Q3;
// u quantizer setup with original coeff shift of Q3
quant_Q3 = i == 0 ? av1_dc_quant(q, u_dc_delta_q, bit_depth)
: av1_ac_quant(q, u_ac_delta_q, bit_depth);
// u quantizer with TX scale; right now, it's still Q3 as above;
quant_QTX = quant_Q3;
quant_Q3 = i == 0 ? av1_dc_quant_Q3(q, u_dc_delta_q, bit_depth)
: av1_ac_quant_Q3(q, u_ac_delta_q, bit_depth);
// u quantizer with TX scale
quant_QTX = i == 0 ? av1_dc_quant_QTX(q, u_dc_delta_q, bit_depth)
: av1_ac_quant_QTX(q, u_ac_delta_q, bit_depth);
invert_quant(&quants->u_quant[q][i], &quants->u_quant_shift[q][i],
quant_QTX);
quants->u_quant_fp[q][i] = (1 << 16) / quant_QTX;
......@@ -1573,10 +1575,11 @@ void av1_build_quantizer(aom_bit_depth_t bit_depth, int y_dc_delta_q,
deq->u_dequant_Q3[q][i] = quant_Q3;
// v quantizer setup with original coeff shift of Q3
quant_Q3 = i == 0 ? av1_dc_quant(q, v_dc_delta_q, bit_depth)
: av1_ac_quant(q, v_ac_delta_q, bit_depth);
// v quantizer with TX scale; right now, it's still Q3 as above;
quant_QTX = quant_Q3;
quant_Q3 = i == 0 ? av1_dc_quant_Q3(q, v_dc_delta_q, bit_depth)
: av1_ac_quant_Q3(q, v_ac_delta_q, bit_depth);
// v quantizer with TX scale
quant_QTX = i == 0 ? av1_dc_quant_QTX(q, v_dc_delta_q, bit_depth)
: av1_ac_quant_QTX(q, v_ac_delta_q, bit_depth);
invert_quant(&quants->v_quant[q][i], &quants->v_quant_shift[q][i],
quant_QTX);
quants->v_quant_fp[q][i] = (1 << 16) / quant_QTX;
......
......@@ -329,7 +329,7 @@ void av1_cdef_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
int chroma_cdef = xd->plane[1].subsampling_x == xd->plane[1].subsampling_y &&
xd->plane[2].subsampling_x == xd->plane[2].subsampling_y;
quantizer =
av1_ac_quant(cm->base_qindex, 0, cm->bit_depth) >> (cm->bit_depth - 8);
av1_ac_quant_Q3(cm->base_qindex, 0, cm->bit_depth) >> (cm->bit_depth - 8);
lambda = .12 * quantizer * quantizer / 256.;
av1_setup_dst_planes(xd->plane, cm->sb_size, frame, 0, 0);
......
......@@ -414,7 +414,7 @@ void av1_pick_filter_level(const YV12_BUFFER_CONFIG *sd, AV1_COMP *cpi,
} else if (method >= LPF_PICK_FROM_Q) {
const int min_filter_level = 0;
const int max_filter_level = av1_get_max_filter_level(cpi);
const int q = av1_ac_quant(cm->base_qindex, 0, cm->bit_depth);
const int q = av1_ac_quant_Q3(cm->base_qindex, 0, cm->bit_depth);
// These values were determined by linear fitting the result of the
// searched level for 8 bit depth:
// Keyframes: filt_guess = q * 0.06699 - 1.60817
......
......@@ -157,15 +157,15 @@ double av1_convert_qindex_to_q(int qindex, aom_bit_depth_t bit_depth) {
// Convert the index to a real Q value (scaled down to match old Q values)
#if CONFIG_HIGHBITDEPTH
switch (bit_depth) {
case AOM_BITS_8: return av1_ac_quant(qindex, 0, bit_depth) / 4.0;
case AOM_BITS_10: return av1_ac_quant(qindex, 0, bit_depth) / 16.0;
case AOM_BITS_12: return av1_ac_quant(qindex, 0, bit_depth) / 64.0;
case AOM_BITS_8: return av1_ac_quant_Q3(qindex, 0, bit_depth) / 4.0;
case AOM_BITS_10: return av1_ac_quant_Q3(qindex, 0, bit_depth) / 16.0;
case AOM_BITS_12: return av1_ac_quant_Q3(qindex, 0, bit_depth) / 64.0;
default:
assert(0 && "bit_depth should be AOM_BITS_8, AOM_BITS_10 or AOM_BITS_12");
return -1.0;
}
#else
return av1_ac_quant(qindex, 0, bit_depth) / 4.0;
return av1_ac_quant_Q3(qindex, 0, bit_depth) / 4.0;
#endif
}
......
......@@ -367,25 +367,47 @@ static void od_enc_rc_reset(od_rc_state *rc) {
/*All of these initial scale/exp values are from Theora, and have not yet
been adapted to Daala, so they're certainly wrong.
The B-frame values especially are simply copies of the P-frame values.*/
/*XXXXX: This constant initialization, apart from tuning, is very
likely also scaled incorrectly.
In Theora, where these constants come from, (bits/pixel) ==
scale*((q_Q2/4)^-(exp/64))
This can be derived from looking at the update formula in
od_enc_rc_update_state().
I.e., we have a quantizer normalized to Q0 for 8-bit pixel values,
which we exponentiate. To get the same behavior here, we need to
continue normalizing the quantizer the same way. Otherwise we'll have
to exponentiate any scaling baked into the quantizer as well (which
seems complicated and unnecessary).
If we have normalized the quantizer before exponentiation, then
the initializers for log_scale[] should not depend on bit depth or
coefficient depth in any way.
This is now restored to the initialization as is was in Theora,
and should be revistited/rederived/corrected for AV1.*/
if (ibpp < 1) {
rc->exp[OD_I_FRAME] = 59;
rc->log_scale[OD_I_FRAME] = od_blog64(1997) - OD_Q57(OD_COEFF_SHIFT);
rc->log_scale[OD_I_FRAME] = od_blog64(1997) - OD_Q57(8);
} else if (ibpp < 2) {
rc->exp[OD_I_FRAME] = 55;
rc->log_scale[OD_I_FRAME] = od_blog64(1604) - OD_Q57(OD_COEFF_SHIFT);
rc->log_scale[OD_I_FRAME] = od_blog64(1604) - OD_Q57(8);
} else {
rc->exp[OD_I_FRAME] = 48;
rc->log_scale[OD_I_FRAME] = od_blog64(834) - OD_Q57(OD_COEFF_SHIFT);
rc->log_scale[OD_I_FRAME] = od_blog64(834) - OD_Q57(8);
}
if (ibpp < 4) {
rc->exp[OD_P_FRAME] = 100;
rc->log_scale[OD_P_FRAME] = od_blog64(2249) - OD_Q57(OD_COEFF_SHIFT);
rc->log_scale[OD_P_FRAME] = od_blog64(2249) - OD_Q57(8);
} else if (ibpp < 8) {
rc->exp[OD_P_FRAME] = 95;
rc->log_scale[OD_P_FRAME] = od_blog64(1751) - OD_Q57(OD_COEFF_SHIFT);
rc->log_scale[OD_P_FRAME] = od_blog64(1751) - OD_Q57(8);
} else {
rc->exp[OD_P_FRAME] = 73;
rc->log_scale[OD_P_FRAME] = od_blog64(1260) - OD_Q57(OD_COEFF_SHIFT);
rc->log_scale[OD_P_FRAME] = od_blog64(1260) - OD_Q57(8);
}
/*Golden P-frames both use the same log_scale and exp modeling
values as regular P-frames and the same scale follower.
......@@ -810,7 +832,11 @@ int od_enc_rc_select_quantizers_and_lambdas(od_rc_state *rc,
calculation, that needs to be modulated as well.
Calculate what is, effectively, a fractional coded quantizer. */
/*Get the log2 quantizer in Q57 (normalized for coefficient shift).*/
log_quantizer = od_blog64(rc->base_quantizer) - OD_Q57(OD_COEFF_SHIFT);
/*XXXXX: See the above XXXX comment in rate control
initialization; the scaling on the log-quantizer calculation
should be the same as in quantizer scale initialization, but
OD_Q57(8) is possibly the incorrect value. */
log_quantizer = od_blog64(rc->base_quantizer) - OD_Q57(8);
/*log_quantizer to Q21.*/
log_quantizer >>= 36;
/*scale log quantizer, result is Q33.*/
......@@ -824,7 +850,7 @@ int od_enc_rc_select_quantizers_and_lambdas(od_rc_state *rc,
/*Back to log2 quantizer in Q57.*/
log_quantizer = (log_quantizer - OD_LOG_QUANTIZER_OFFSET_Q45) *
OD_LOG_QUANTIZER_EXP_Q12 +
OD_Q57(OD_COEFF_SHIFT);
OD_Q57(8);
/*Convert Q57 log2 quantizer to unclamped linear target quantizer value.*/
rc->target_quantizer = od_bexp64(log_quantizer);
}
......@@ -881,7 +907,11 @@ int od_enc_rc_select_quantizers_and_lambdas(od_rc_state *rc,
for (i = 0; i < OD_FRAME_NSUBTYPES; i++) {
/*Modulate base quantizer by frame type.*/
/*Get the log2 quantizer in Q57 (normalized for coefficient shift).*/
log_quantizer = log_base_quantizer - OD_Q57(OD_COEFF_SHIFT);
/*XXXXX: See the above XXXX comment in rate control
initialization; the scaling on the log-quantizer calculation
should be the same as in quantizer scale initialization, but
OD_Q57(8) is possibly the incorrect value. */
log_quantizer = log_base_quantizer - OD_Q57(8);
/*log_quantizer to Q21.*/
log_quantizer >>= 36;
/*scale log quantizer, result is Q33.*/
......@@ -895,7 +925,7 @@ int od_enc_rc_select_quantizers_and_lambdas(od_rc_state *rc,
/*Back to log2 quantizer in Q57.*/
log_quantizer = (log_quantizer - OD_LOG_QUANTIZER_OFFSET_Q45) *
OD_LOG_QUANTIZER_EXP_Q12 +
OD_Q57(OD_COEFF_SHIFT);
OD_Q57(8);
/*Clamp modulated quantizer values.*/
log_quantizer = OD_CLAMPI(od_blog64(lossy_quantizer_min), log_quantizer,
od_blog64(lossy_quantizer_max));
......@@ -924,7 +954,11 @@ int od_enc_rc_select_quantizers_and_lambdas(od_rc_state *rc,
/*Modulate chosen base quantizer to produce target quantizer.*/
log_quantizer = od_blog64(base_quantizer);
/*Get the log2 quantizer in Q57 (normalized for coefficient shift).*/
log_quantizer -= OD_Q57(OD_COEFF_SHIFT);
/*XXXXX: See the above XXXX comment in rate control
initialization; the scaling on the log-quantizer calculation
should be the same as in quantizer scale initialization, but
OD_Q57(8) is possibly the incorrect value. */
log_quantizer -= OD_Q57(8);
/*log_quantizer to Q21.*/
log_quantizer >>= 36;
/*scale log quantizer, result is Q33.*/
......@@ -938,7 +972,7 @@ int od_enc_rc_select_quantizers_and_lambdas(od_rc_state *rc,
/*Back to log2 quantizer in Q57.*/
log_quantizer = (log_quantizer - OD_LOG_QUANTIZER_OFFSET_Q45) *
OD_LOG_QUANTIZER_EXP_Q12 +
OD_Q57(OD_COEFF_SHIFT);
OD_Q57(8);
/*Clamp modulated quantizer values.*/
log_quantizer = OD_CLAMPI(od_blog64(lossy_quantizer_min), log_quantizer,
od_blog64(lossy_quantizer_max));
......@@ -1023,7 +1057,18 @@ int od_enc_rc_select_quantizers_and_lambdas(od_rc_state *rc,
}
*bottom_idx = lossy_quantizer_min;
*top_idx = lossy_quantizer_max;
rc->target_quantizer = av1_qindex_from_ac(
/*XXXXXX: the store back to rc->target_quantizer just seems
wrong. target_quantizer is used as an actual linear quantizer
(like base_quantizer, I think it should be scaled the same way as
a Q0 quantizer for 8-bit inputs). But av1_qindex_from_ac*()
returns a quantizer index, which is completely incomparable.
Passing rc->target_quantizer directly to av1_qindex_from_ac_Q3 is
also probably incorrect. If we move to storing a value scaled the
same way as a Q0 quantizer for 8-bit inputs, then it should just
be rc->target_quantizer << (TX_COEFF_DEPTH - 8) for DAALA_TX, and
something depending on the bit depth for !DAALA_TX. */
rc->target_quantizer = av1_qindex_from_ac_Q3(
OD_CLAMPI(lossy_quantizer_min, rc->target_quantizer, lossy_quantizer_max),
rc->bit_depth);
return rc->target_quantizer;
......
......@@ -338,7 +338,7 @@ static const int rd_frame_type_factor[FRAME_UPDATE_TYPES] = {
};
int av1_compute_rd_mult(const AV1_COMP *cpi, int qindex) {
const int64_t q = av1_dc_quant(qindex, 0, cpi->common.bit_depth);
const int64_t q = av1_dc_quant_Q3(qindex, 0, cpi->common.bit_depth);
#if CONFIG_HIGHBITDEPTH
int64_t rdmult = 0;
switch (cpi->common.bit_depth) {
......@@ -368,16 +368,16 @@ static int compute_rd_thresh_factor(int qindex, aom_bit_depth_t bit_depth) {
double q;
#if CONFIG_HIGHBITDEPTH
switch (bit_depth) {
case AOM_BITS_8: q = av1_dc_quant(qindex, 0, AOM_BITS_8) / 4.0; break;
case AOM_BITS_10: q = av1_dc_quant(qindex, 0, AOM_BITS_10) / 16.0; break;
case AOM_BITS_12: q = av1_dc_quant(qindex, 0, AOM_BITS_12) / 64.0; break;
case AOM_BITS_8: q = av1_dc_quant_Q3(qindex, 0, AOM_BITS_8) / 4.0; break;
case AOM_BITS_10: q = av1_dc_quant_Q3(qindex, 0, AOM_BITS_10) / 16.0; break;
case AOM_BITS_12: q = av1_dc_quant_Q3(qindex, 0, AOM_BITS_12) / 64.0; break;
default:
assert(0 && "bit_depth should be AOM_BITS_8, AOM_BITS_10 or AOM_BITS_12");
return -1;
}
#else
(void)bit_depth;
q = av1_dc_quant(qindex, 0, AOM_BITS_8) / 4.0;
q = av1_dc_quant_Q3(qindex, 0, AOM_BITS_8) / 4.0;
#endif // CONFIG_HIGHBITDEPTH
// TODO(debargha): Adjust the function below.
return AOMMAX((int)(pow(q, RD_THRESH_POW) * 5.12), 8);
......@@ -1295,7 +1295,7 @@ void av1_update_rd_thresh_fact(const AV1_COMMON *const cm,