Commit 9e9f5f3d authored by Scott LaVarnway's avatar Scott LaVarnway
Browse files

New vp8_decode_mb_tokens()

This new vp8_decode_mb_tokens() uses a modified version of
WebP's GetCoeffs function.  For now, the dequant does not
occur in GetCoeffs.
Tests showed performance improvements up to 2.5% depending
on material.

Change-Id: Ia24d78627e16ffee5eb4d777ee8379a9270f07c5
parent 06dc2f61
......@@ -15,58 +15,6 @@
#include "vpx_ports/mem.h"
#include "detokenize.h"
#define BOOL_DATA unsigned char
#define OCB_X PREV_COEF_CONTEXTS * ENTROPY_NODES
DECLARE_ALIGNED(16, static const unsigned char, coef_bands_x[16]) =
{
0 * OCB_X, 1 * OCB_X, 2 * OCB_X, 3 * OCB_X,
6 * OCB_X, 4 * OCB_X, 5 * OCB_X, 6 * OCB_X,
6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X,
6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 7 * OCB_X
};
#define EOB_CONTEXT_NODE 0
#define ZERO_CONTEXT_NODE 1
#define ONE_CONTEXT_NODE 2
#define LOW_VAL_CONTEXT_NODE 3
#define TWO_CONTEXT_NODE 4
#define THREE_CONTEXT_NODE 5
#define HIGH_LOW_CONTEXT_NODE 6
#define CAT_ONE_CONTEXT_NODE 7
#define CAT_THREEFOUR_CONTEXT_NODE 8
#define CAT_THREE_CONTEXT_NODE 9
#define CAT_FIVE_CONTEXT_NODE 10
#define CAT1_MIN_VAL 5
#define CAT2_MIN_VAL 7
#define CAT3_MIN_VAL 11
#define CAT4_MIN_VAL 19
#define CAT5_MIN_VAL 35
#define CAT6_MIN_VAL 67
#define CAT1_PROB0 159
#define CAT2_PROB0 145
#define CAT2_PROB1 165
#define CAT3_PROB0 140
#define CAT3_PROB1 148
#define CAT3_PROB2 173
#define CAT4_PROB0 135
#define CAT4_PROB1 140
#define CAT4_PROB2 155
#define CAT4_PROB3 176
#define CAT5_PROB0 130
#define CAT5_PROB1 134
#define CAT5_PROB2 141
#define CAT5_PROB3 157
#define CAT5_PROB4 180
static const unsigned char cat6_prob[12] =
{ 129, 130, 133, 140, 153, 177, 196, 230, 243, 254, 254, 0 };
void vp8_reset_mb_tokens_context(MACROBLOCKD *x)
{
/* Clear entropy contexts for Y2 blocks */
......@@ -83,302 +31,216 @@ void vp8_reset_mb_tokens_context(MACROBLOCKD *x)
}
}
DECLARE_ALIGNED(16, extern const unsigned char, vp8_norm[256]);
#define FILL \
if(count < 0) \
VP8DX_BOOL_DECODER_FILL(count, value, bufptr, bufend);
/*
------------------------------------------------------------------------------
Residual decoding (Paragraph 13.2 / 13.3)
*/
static const uint8_t kBands[16 + 1] = {
0, 1, 2, 3, 6, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7,
0 /* extra entry as sentinel */
};
#define NORMALIZE \
/*if(range < 0x80)*/ \
{ \
shift = vp8_norm[range]; \
range <<= shift; \
value <<= shift; \
count -= shift; \
}
static const uint8_t kCat3[] = { 173, 148, 140, 0 };
static const uint8_t kCat4[] = { 176, 155, 140, 135, 0 };
static const uint8_t kCat5[] = { 180, 157, 141, 134, 130, 0 };
static const uint8_t kCat6[] =
{ 254, 254, 243, 230, 196, 177, 153, 140, 133, 130, 129, 0 };
static const uint8_t* const kCat3456[] = { kCat3, kCat4, kCat5, kCat6 };
static const uint8_t kZigzag[16] = {
0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15
};
#define DECODE_AND_APPLYSIGN(value_to_sign) \
split = (range + 1) >> 1; \
bigsplit = (VP8_BD_VALUE)split << (VP8_BD_VALUE_SIZE - 8); \
FILL \
if ( value < bigsplit ) \
{ \
range = split; \
v= value_to_sign; \
} \
else \
{ \
range = range-split; \
value = value-bigsplit; \
v = -value_to_sign; \
} \
range +=range; \
value +=value; \
count--;
#define VP8GetBit vp8dx_decode_bool
#define NUM_PROBAS 11
#define NUM_CTX 3
#define DECODE_AND_BRANCH_IF_ZERO(probability,branch) \
{ \
split = 1 + ((( probability*(range-1) ) )>> 8); \
bigsplit = (VP8_BD_VALUE)split << (VP8_BD_VALUE_SIZE - 8); \
FILL \
if ( value < bigsplit ) \
{ \
range = split; \
NORMALIZE \
goto branch; \
} \
value -= bigsplit; \
range = range - split; \
NORMALIZE \
}
typedef const uint8_t (*ProbaArray)[NUM_CTX][NUM_PROBAS]; // for const-casting
#define DECODE_AND_LOOP_IF_ZERO(probability,branch) \
{ \
split = 1 + ((( probability*(range-1) ) ) >> 8); \
bigsplit = (VP8_BD_VALUE)split << (VP8_BD_VALUE_SIZE - 8); \
FILL \
if ( value < bigsplit ) \
{ \
range = split; \
NORMALIZE \
Prob = coef_probs; \
if(c<15) {\
++c; \
Prob += coef_bands_x[c]; \
goto branch; \
} goto BLOCK_FINISHED; /*for malformed input */\
} \
value -= bigsplit; \
range = range - split; \
NORMALIZE \
}
static int GetSigned(BOOL_DECODER *br, int value_to_sign)
{
int split = (br->range + 1) >> 1;
VP8_BD_VALUE bigsplit = (VP8_BD_VALUE)split << (VP8_BD_VALUE_SIZE - 8);
int v;
#define DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val) \
DECODE_AND_APPLYSIGN(val) \
Prob = coef_probs + (ENTROPY_NODES*2); \
if(c < 15){\
qcoeff_ptr [ scan[c] ] = (int16_t) v; \
++c; \
goto DO_WHILE; }\
qcoeff_ptr [ 15 ] = (int16_t) v; \
goto BLOCK_FINISHED;
if(br->count < 0)
vp8dx_bool_decoder_fill(br);
if ( br->value < bigsplit )
{
br->range = split;
v= value_to_sign;
}
else
{
br->range = br->range-split;
br->value = br->value-bigsplit;
v = -value_to_sign;
}
br->range +=br->range;
br->value +=br->value;
br->count--;
#define DECODE_EXTRABIT_AND_ADJUST_VAL(prob, bits_count)\
split = 1 + (((range-1) * prob) >> 8); \
bigsplit = (VP8_BD_VALUE)split << (VP8_BD_VALUE_SIZE - 8); \
FILL \
if(value >= bigsplit)\
{\
range = range-split;\
value = value-bigsplit;\
val += ((uint16_t)1<<bits_count);\
}\
else\
{\
range = split;\
}\
NORMALIZE
return v;
}
/*
Returns the position of the last non-zero coeff plus one
(and 0 if there's no coeff at all)
*/
static int GetCoeffs(BOOL_DECODER *br, ProbaArray prob,
int ctx, int n, int16_t* out)
{
const uint8_t* p = prob[n][ctx];
if (!VP8GetBit(br, p[0]))
{ /* first EOB is more a 'CBP' bit. */
return 0;
}
while (1)
{
++n;
if (!VP8GetBit(br, p[1]))
{
p = prob[kBands[n]][0];
}
else
{ /* non zero coeff */
int v, j;
if (!VP8GetBit(br, p[2]))
{
p = prob[kBands[n]][1];
v = 1;
}
else
{
if (!VP8GetBit(br, p[3]))
{
if (!VP8GetBit(br, p[4]))
{
v = 2;
}
else
{
v = 3 + VP8GetBit(br, p[5]);
}
}
else
{
if (!VP8GetBit(br, p[6]))
{
if (!VP8GetBit(br, p[7]))
{
v = 5 + VP8GetBit(br, 159);
} else
{
v = 7 + 2 * VP8GetBit(br, 165);
v += VP8GetBit(br, 145);
}
}
else
{
const uint8_t* tab;
const int bit1 = VP8GetBit(br, p[8]);
const int bit0 = VP8GetBit(br, p[9 + bit1]);
const int cat = 2 * bit1 + bit0;
v = 0;
for (tab = kCat3456[cat]; *tab; ++tab)
{
v += v + VP8GetBit(br, *tab);
}
v += 3 + (8 << cat);
}
}
p = prob[kBands[n]][2];
}
j = kZigzag[n - 1];
out[j] = GetSigned(br, v);
if (n == 16 || !VP8GetBit(br, p[0]))
{ /* EOB */
return n;
}
}
if (n == 16)
{
return 16;
}
}
}
int vp8_decode_mb_tokens(VP8D_COMP *dx, MACROBLOCKD *x)
{
ENTROPY_CONTEXT *A = (ENTROPY_CONTEXT *)x->above_context;
ENTROPY_CONTEXT *L = (ENTROPY_CONTEXT *)x->left_context;
const FRAME_CONTEXT * const fc = &dx->common.fc;
BOOL_DECODER *bc = x->current_bc;
const FRAME_CONTEXT * const fc = &dx->common.fc;
char *eobs = x->eobs;
ENTROPY_CONTEXT *a;
ENTROPY_CONTEXT *l;
int i;
int nonzeros;
int eobtotal = 0;
register int count;
const BOOL_DATA *bufptr;
const BOOL_DATA *bufend;
register unsigned int range;
VP8_BD_VALUE value;
const int *scan;
register unsigned int shift;
unsigned int split;
VP8_BD_VALUE bigsplit;
short *qcoeff_ptr;
ProbaArray coef_probs;
ENTROPY_CONTEXT *a_ctx = ((ENTROPY_CONTEXT *)x->above_context);
ENTROPY_CONTEXT *l_ctx = ((ENTROPY_CONTEXT *)x->left_context);
ENTROPY_CONTEXT *a;
ENTROPY_CONTEXT *l;
int skip_dc = 0;
const vp8_prob *coef_probs;
int stop;
int val, bits_count;
int c;
int v;
const vp8_prob *Prob;
int start_coeff;
i = 0;
stop = 16;
scan = vp8_default_zig_zag1d;
qcoeff_ptr = &x->qcoeff[0];
coef_probs = fc->coef_probs [3] [ 0 ] [0];
if (x->mode_info_context->mbmi.mode != B_PRED &&
x->mode_info_context->mbmi.mode != SPLITMV)
{
i = 24;
stop = 24;
qcoeff_ptr += 24*16;
eobtotal -= 16;
coef_probs = fc->coef_probs [1] [ 0 ] [0];
}
bufend = bc->user_buffer_end;
bufptr = bc->user_buffer;
value = bc->value;
count = bc->count;
range = bc->range;
start_coeff = 0;
BLOCK_LOOP:
a = A + vp8_block2above[i];
l = L + vp8_block2left[i];
c = start_coeff;
a = a_ctx + 8;
l = l_ctx + 8;
VP8_COMBINEENTROPYCONTEXTS(v, *a, *l);
coef_probs = fc->coef_probs [1];
Prob = coef_probs;
Prob += v * ENTROPY_NODES;
*a = *l = 0;
nonzeros = GetCoeffs(bc, coef_probs, (*a + *l), 0, qcoeff_ptr + 24 * 16);
*a = *l = (nonzeros > 0);
DO_WHILE:
Prob += coef_bands_x[c];
DECODE_AND_BRANCH_IF_ZERO(Prob[EOB_CONTEXT_NODE], BLOCK_FINISHED);
*a = *l = 1;
eobs[24] = nonzeros;
eobtotal += nonzeros - 16;
CHECK_0_:
DECODE_AND_LOOP_IF_ZERO(Prob[ZERO_CONTEXT_NODE], CHECK_0_);
DECODE_AND_BRANCH_IF_ZERO(Prob[ONE_CONTEXT_NODE], ONE_CONTEXT_NODE_0_);
DECODE_AND_BRANCH_IF_ZERO(Prob[LOW_VAL_CONTEXT_NODE],
LOW_VAL_CONTEXT_NODE_0_);
DECODE_AND_BRANCH_IF_ZERO(Prob[HIGH_LOW_CONTEXT_NODE],
HIGH_LOW_CONTEXT_NODE_0_);
DECODE_AND_BRANCH_IF_ZERO(Prob[CAT_THREEFOUR_CONTEXT_NODE],
CAT_THREEFOUR_CONTEXT_NODE_0_);
DECODE_AND_BRANCH_IF_ZERO(Prob[CAT_FIVE_CONTEXT_NODE],
CAT_FIVE_CONTEXT_NODE_0_);
val = CAT6_MIN_VAL;
bits_count = 10;
do
coef_probs = fc->coef_probs [0];
skip_dc = 1;
}
else
{
DECODE_EXTRABIT_AND_ADJUST_VAL(cat6_prob[bits_count], bits_count);
bits_count -- ;
coef_probs = fc->coef_probs [3];
skip_dc = 0;
}
while (bits_count >= 0);
DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val);
CAT_FIVE_CONTEXT_NODE_0_:
val = CAT5_MIN_VAL;
DECODE_EXTRABIT_AND_ADJUST_VAL(CAT5_PROB4, 4);
DECODE_EXTRABIT_AND_ADJUST_VAL(CAT5_PROB3, 3);
DECODE_EXTRABIT_AND_ADJUST_VAL(CAT5_PROB2, 2);
DECODE_EXTRABIT_AND_ADJUST_VAL(CAT5_PROB1, 1);
DECODE_EXTRABIT_AND_ADJUST_VAL(CAT5_PROB0, 0);
DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val);
CAT_THREEFOUR_CONTEXT_NODE_0_:
DECODE_AND_BRANCH_IF_ZERO(Prob[CAT_THREE_CONTEXT_NODE],
CAT_THREE_CONTEXT_NODE_0_);
val = CAT4_MIN_VAL;
DECODE_EXTRABIT_AND_ADJUST_VAL(CAT4_PROB3, 3);
DECODE_EXTRABIT_AND_ADJUST_VAL(CAT4_PROB2, 2);
DECODE_EXTRABIT_AND_ADJUST_VAL(CAT4_PROB1, 1);
DECODE_EXTRABIT_AND_ADJUST_VAL(CAT4_PROB0, 0);
DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val);
CAT_THREE_CONTEXT_NODE_0_:
val = CAT3_MIN_VAL;
DECODE_EXTRABIT_AND_ADJUST_VAL(CAT3_PROB2, 2);
DECODE_EXTRABIT_AND_ADJUST_VAL(CAT3_PROB1, 1);
DECODE_EXTRABIT_AND_ADJUST_VAL(CAT3_PROB0, 0);
DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val);
HIGH_LOW_CONTEXT_NODE_0_:
DECODE_AND_BRANCH_IF_ZERO(Prob[CAT_ONE_CONTEXT_NODE],
CAT_ONE_CONTEXT_NODE_0_);
val = CAT2_MIN_VAL;
DECODE_EXTRABIT_AND_ADJUST_VAL(CAT2_PROB1, 1);
DECODE_EXTRABIT_AND_ADJUST_VAL(CAT2_PROB0, 0);
DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val);
CAT_ONE_CONTEXT_NODE_0_:
val = CAT1_MIN_VAL;
DECODE_EXTRABIT_AND_ADJUST_VAL(CAT1_PROB0, 0);
DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val);
LOW_VAL_CONTEXT_NODE_0_:
DECODE_AND_BRANCH_IF_ZERO(Prob[TWO_CONTEXT_NODE], TWO_CONTEXT_NODE_0_);
DECODE_AND_BRANCH_IF_ZERO(Prob[THREE_CONTEXT_NODE], THREE_CONTEXT_NODE_0_);
DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(4);
THREE_CONTEXT_NODE_0_:
DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(3);
TWO_CONTEXT_NODE_0_:
DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(2);
ONE_CONTEXT_NODE_0_:
DECODE_AND_APPLYSIGN(1);
Prob = coef_probs + ENTROPY_NODES;
if (c < 15)
for (i = 0; i < 16; ++i)
{
qcoeff_ptr [ scan[c] ] = (int16_t) v;
++c;
goto DO_WHILE;
}
a = a_ctx + (i&3);
l = l_ctx + ((i&0xc)>>2);
qcoeff_ptr [ 15 ] = (int16_t) v;
BLOCK_FINISHED:
eobs[i] = c;
eobtotal += c;
qcoeff_ptr += 16;
nonzeros = GetCoeffs(bc, coef_probs, (*a + *l), skip_dc, qcoeff_ptr);
*a = *l = (nonzeros > 0);
i++;
nonzeros += skip_dc;
eobs[i] = nonzeros;
eobtotal += nonzeros;
qcoeff_ptr += 16;
}
if (i < stop)
goto BLOCK_LOOP;
coef_probs = fc->coef_probs [2];
if (i == 25)
a_ctx += 4;
l_ctx += 4;
for (i = 16; i < 24; ++i)
{
start_coeff = 1;
i = 0;
stop = 16;
coef_probs = fc->coef_probs [0] [ 0 ] [0];
qcoeff_ptr -= (24*16 + 16);
goto BLOCK_LOOP;
}
a = a_ctx + ((i > 19)<<1) + (i&1);
l = l_ctx + ((i > 19)<<1) + ((i&3)>1);
if (i == 16)
{
start_coeff = 0;
coef_probs = fc->coef_probs [2] [ 0 ] [0];
stop = 24;
goto BLOCK_LOOP;
nonzeros = GetCoeffs(bc, coef_probs, (*a + *l), 0, qcoeff_ptr);
*a = *l = (nonzeros > 0);
eobs[i] = nonzeros;
eobtotal += nonzeros;
qcoeff_ptr += 16;
}
FILL
bc->user_buffer = bufptr;
bc->value = value;
bc->count = count;
bc->range = range;
return eobtotal;
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment