Commit 8fa38096 authored by Timothy B. Terriberry's avatar Timothy B. Terriberry Committed by Yaowu Xu

Add trellis quantization.

Replace the exponential search for optimal rounding during
 quantization with a linear Viterbi trellis and enable it
 by default when using --best.
Right now this operates on top of the output of the adaptive
 zero-bin quantizer in vp8_regular_quantize_b() and gives a small
 gain.
It can be tested as a replacement for that quantizer by
 enabling the call to vp8_strict_quantize_b(), which uses
 normal rounding and no zero bin offset.
Ultimately, the quantizer will have to become a function of lambda
 in order to take advantage of activity masking, since there is
 limited ability to change the quantization factor itself.
However, currently vp8_strict_quantize_b() plus the trellis
 quantizer (which is lambda-dependent) loses to
 vp8_regular_quantize_b() alone (which is not) on my test clip.

Patch Set 3:

Fix an issue related to the cost evaluation of successor
states when a coefficient is reduced to zero. With this
issue fixed, now the trellis search almost exactly matches
the exponential search.

Patch Set 2:

Overall, the goal of this patch set is to make "trellis"
search to produce encodings that match the exponential
search version. There are three main differences between
Patch Set 2 and 1:
a. Patch set 1 did not properly account for the scale of
2nd order error, so patch set 2 disable it all together
for 2nd blocks.
b. Patch set 1 was not consistent on when to enable the
the quantization optimization. Patch set 2 restore the
condition to be consistent.
c. Patch set 1 checks quantized level L-1, and L for any
input coefficient was quantized to L. Patch set 2 limits
the candidate coefficient to those that were rounded up
to L. It is worth noting here that a strategy to check
L and L+1 for coefficients that were truncated down to L
might work.

(a and b get trellis quant to basically match the exponential
search on all mid/low rate encodings on cif set, without
a, b, trellis quant can hurt the psnr by 0.2 to .3db at
200kbps for some cif clips)
(c gets trellis quant  to match the exponential search
to match at Q0 encoding, without c, trellis quant can be
1.5 to 2db lower for encodings with fixed Q at 0 on most
derf cif clips)

Change-Id:	Ib1a043b665d75fbf00cb0257b7c18e90eebab95e
parent e4fe8669
......@@ -109,7 +109,6 @@ void vp8_encode_intra16x16mby(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
#if !(CONFIG_REALTIME_ONLY)
#if 1
if (x->optimize && x->rddiv > 1)
vp8_optimize_mby(x, rtcd);
......
This diff is collapsed.
......@@ -215,6 +215,65 @@ void vp8_regular_quantize_b(BLOCK *b, BLOCKD *d)
}
#endif
/* Perform regular quantization, with unbiased rounding and no zero bin. */
void vp8_strict_quantize_b(BLOCK *b, BLOCKD *d)
{
int i;
int rc;
int eob;
int x;
int y;
int z;
int sz;
short *coeff_ptr;
short *quant_ptr;
short *quant_shift_ptr;
short *qcoeff_ptr;
short *dqcoeff_ptr;
short *dequant_ptr;
coeff_ptr = &b->coeff[0];
quant_ptr = &b->quant[0][0];
quant_shift_ptr = &b->quant_shift[0][0];
qcoeff_ptr = d->qcoeff;
dqcoeff_ptr = d->dqcoeff;
dequant_ptr = &d->dequant[0][0];
eob = - 1;
vpx_memset(qcoeff_ptr, 0, 32);
vpx_memset(dqcoeff_ptr, 0, 32);
for (i = 0; i < 16; i++)
{
int dq;
int round;
/*TODO: These arrays should be stored in zig-zag order.*/
rc = vp8_default_zig_zag1d[i];
z = coeff_ptr[rc];
dq = dequant_ptr[rc];
round = dq >> 1;
/* Sign of z. */
sz = -(z < 0);
x = (z + sz) ^ sz;
x += round;
if (x >= dq)
{
/* Quantize x. */
y = (((x * quant_ptr[rc]) >> 16) + x) >> quant_shift_ptr[rc];
/* Put the sign back. */
x = (y + sz) ^ sz;
/* Save the coefficient and its dequantized value. */
qcoeff_ptr[rc] = x;
dqcoeff_ptr[rc] = x * dq;
/* Remember the last non-zero coefficient. */
if (y)
eob = i;
}
}
d->eob = eob + 1;
}
void vp8_quantize_mby(MACROBLOCK *x)
{
int i;
......
......@@ -47,6 +47,8 @@ typedef struct
#define QUANTIZE_INVOKE(ctx,fn) vp8_quantize_##fn
#endif
extern void vp8_strict_quantize_b(BLOCK *b,BLOCKD *d);
extern void vp8_quantize_mb(MACROBLOCK *x);
extern void vp8_quantize_mbuv(MACROBLOCK *x);
extern void vp8_quantize_mby(MACROBLOCK *x);
......
......@@ -64,11 +64,6 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x);
#define MAXF(a,b) (((a) > (b)) ? (a) : (b))
extern const TOKENEXTRA vp8_dct_value_tokens[DCT_MAX_VALUE*2];
extern const TOKENEXTRA *vp8_dct_value_tokens_ptr;
extern int vp8_dct_value_cost[DCT_MAX_VALUE*2];
extern int *vp8_dct_value_cost_ptr;
const int vp8_auto_speed_thresh[17] =
{
......
......@@ -27,9 +27,9 @@ void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) ;
void vp8_fix_contexts(VP8_COMP *cpi, MACROBLOCKD *x);
TOKENEXTRA vp8_dct_value_tokens[DCT_MAX_VALUE*2];
TOKENEXTRA *vp8_dct_value_tokens_ptr;
const TOKENEXTRA *vp8_dct_value_tokens_ptr;
int vp8_dct_value_cost[DCT_MAX_VALUE*2];
int *vp8_dct_value_cost_ptr;
const int *vp8_dct_value_cost_ptr;
#if 0
int skip_true_count = 0;
int skip_false_count = 0;
......
......@@ -35,5 +35,11 @@ void print_context_counters();
extern _int64 context_counters[BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [vp8_coef_tokens];
#endif
extern const int *vp8_dct_value_cost_ptr;
/* TODO: The Token field should be broken out into a separate char array to
* improve cache locality, since it's needed for costing when the rest of the
* fields are not.
*/
extern const TOKENEXTRA *vp8_dct_value_tokens_ptr;
#endif /* tokenize_h */
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment