Commit 118ccdcd authored by Ronald S. Bultje's avatar Ronald S. Bultje
Browse files

Inverse dimension order in token_cost array.

This allows us to increment the position at the band-level only as
we go from one band to the next; more importantly, that allows us to
use an add instead of multiply instruction, and omit the instruction
altogether if the band doesn't change from one coef to the next, thus
being slightly faster (probably more noticeable on systems where a
multiply is expensive, like arm).

Change-Id: I4343fe35b9f9a47fa00b217bdcbf5f91ff96c381
parent 35e7e7b6
......@@ -72,6 +72,11 @@ struct macroblock_plane {
int16_t zbin_extra;
};
/* The [2] dimension is for whether we skip the EOB node (i.e. if previous
* coefficient in this block was zero) or not. */
typedef unsigned int vp9_coeff_cost[BLOCK_TYPES][REF_TYPES][COEF_BANDS][2]
[PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS];
typedef struct macroblock MACROBLOCK;
struct macroblock {
struct macroblock_plane plane[MAX_MB_PLANE];
......@@ -133,7 +138,7 @@ struct macroblock {
unsigned char *active_ptr;
// note that token_costs is the cost when eob node is skipped
vp9_coeff_count token_costs[TX_SIZE_MAX_SB][BLOCK_TYPES][2];
vp9_coeff_cost token_costs[TX_SIZE_MAX_SB];
int optimize;
......
......@@ -222,10 +222,10 @@ static void optimize_b(VP9_COMMON *const cm, MACROBLOCK *mb,
band = get_coef_band(band_translate, i + 1);
pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache);
rate0 +=
mb->token_costs[tx_size][type][ref][0][band][pt]
mb->token_costs[tx_size][type][ref][band][0][pt]
[tokens[next][0].token];
rate1 +=
mb->token_costs[tx_size][type][ref][0][band][pt]
mb->token_costs[tx_size][type][ref][band][0][pt]
[tokens[next][1].token];
}
UPDATE_RD_COST();
......@@ -273,12 +273,12 @@ static void optimize_b(VP9_COMMON *const cm, MACROBLOCK *mb,
band = get_coef_band(band_translate, i + 1);
if (t0 != DCT_EOB_TOKEN) {
pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache);
rate0 += mb->token_costs[tx_size][type][ref][!x][band][pt]
rate0 += mb->token_costs[tx_size][type][ref][band][!x][pt]
[tokens[next][0].token];
}
if (t1 != DCT_EOB_TOKEN) {
pt = trellis_get_coeff_context(scan, nb, i, t1, token_cache);
rate1 += mb->token_costs[tx_size][type][ref][!x][band][pt]
rate1 += mb->token_costs[tx_size][type][ref][band][!x][pt]
[tokens[next][1].token];
}
}
......@@ -311,12 +311,12 @@ static void optimize_b(VP9_COMMON *const cm, MACROBLOCK *mb,
/* Update the cost of each path if we're past the EOB token. */
if (t0 != DCT_EOB_TOKEN) {
tokens[next][0].rate +=
mb->token_costs[tx_size][type][ref][1][band][0][t0];
mb->token_costs[tx_size][type][ref][band][1][0][t0];
tokens[next][0].token = ZERO_TOKEN;
}
if (t1 != DCT_EOB_TOKEN) {
tokens[next][1].rate +=
mb->token_costs[tx_size][type][ref][1][band][0][t1];
mb->token_costs[tx_size][type][ref][band][1][0][t1];
tokens[next][1].token = ZERO_TOKEN;
}
best_index[i][0] = best_index[i][1] = 0;
......@@ -333,8 +333,8 @@ static void optimize_b(VP9_COMMON *const cm, MACROBLOCK *mb,
error1 = tokens[next][1].error;
t0 = tokens[next][0].token;
t1 = tokens[next][1].token;
rate0 += mb->token_costs[tx_size][type][ref][0][band][pt][t0];
rate1 += mb->token_costs[tx_size][type][ref][0][band][pt][t1];
rate0 += mb->token_costs[tx_size][type][ref][band][0][pt][t0];
rate1 += mb->token_costs[tx_size][type][ref][band][0][pt][t1];
UPDATE_RD_COST();
best = rd_cost1 < rd_cost0;
final_eob = i0 - 1;
......
......@@ -109,7 +109,7 @@ static int rd_thresh_block_size_factor[BLOCK_SIZE_TYPES] =
#define MAX_RD_THRESH_FREQ_FACT 32
#define MAX_RD_THRESH_FREQ_INC 1
static void fill_token_costs(vp9_coeff_count (*c)[BLOCK_TYPES][2],
static void fill_token_costs(vp9_coeff_cost *c,
vp9_coeff_probs_model (*p)[BLOCK_TYPES]) {
int i, j, k, l;
TX_SIZE t;
......@@ -120,12 +120,12 @@ static void fill_token_costs(vp9_coeff_count (*c)[BLOCK_TYPES][2],
for (l = 0; l < PREV_COEF_CONTEXTS; l++) {
vp9_prob probs[ENTROPY_NODES];
vp9_model_to_full_probs(p[t][i][j][k][l], probs);
vp9_cost_tokens((int *)c[t][i][j][0][k][l], probs,
vp9_cost_tokens((int *)c[t][i][j][k][0][l], probs,
vp9_coef_tree);
vp9_cost_tokens_skip((int *)c[t][i][j][1][k][l], probs,
vp9_cost_tokens_skip((int *)c[t][i][j][k][1][l], probs,
vp9_coef_tree);
assert(c[t][i][j][0][k][l][DCT_EOB_TOKEN] ==
c[t][i][j][1][k][l][DCT_EOB_TOKEN]);
assert(c[t][i][j][k][0][l][DCT_EOB_TOKEN] ==
c[t][i][j][k][1][l][DCT_EOB_TOKEN]);
}
}
......@@ -513,11 +513,16 @@ int64_t vp9_block_error_c(int16_t *coeff, int16_t *dqcoeff,
return error;
}
/* The trailing '0' is a terminator which is used inside cost_coeffs() to
* decide whether to include cost of a trailing EOB node or not (i.e. we
* can skip this if the last coefficient in this transform block, e.g. the
* 16th coefficient in a 4x4 block or the 64th coefficient in a 8x8 block,
* were non-zero). */
static const int16_t band_counts[TX_SIZE_MAX_SB][8] = {
{ 1, 2, 3, 4, 3, 16 - 13 },
{ 1, 2, 3, 4, 11, 64 - 21 },
{ 1, 2, 3, 4, 11, 256 - 21 },
{ 1, 2, 3, 4, 11, 1024 - 21 },
{ 1, 2, 3, 4, 3, 16 - 13, 0 },
{ 1, 2, 3, 4, 11, 64 - 21, 0 },
{ 1, 2, 3, 4, 11, 256 - 21, 0 },
{ 1, 2, 3, 4, 11, 1024 - 21, 0 },
};
static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb,
......@@ -528,11 +533,11 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb,
MACROBLOCKD *const xd = &mb->e_mbd;
MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
int pt, c, cost;
const int16_t *band_count = band_counts[tx_size];
const int16_t *band_count = &band_counts[tx_size][1];
const int eob = xd->plane[plane].eobs[block];
const int16_t *qcoeff_ptr = BLOCK_OFFSET(xd->plane[plane].qcoeff, block, 16);
const int ref = mbmi->ref_frame[0] != INTRA_FRAME;
unsigned int (*token_costs)[COEF_BANDS][PREV_COEF_CONTEXTS]
unsigned int (*token_costs)[2][PREV_COEF_CONTEXTS]
[MAX_ENTROPY_TOKENS] = mb->token_costs[tx_size][type][ref];
ENTROPY_CONTEXT above_ec = !!*A, left_ec = !!*L;
uint8_t token_cache[1024];
......@@ -552,13 +557,14 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb,
cost = token_costs[0][0][pt][DCT_EOB_TOKEN];
c = 0;
} else {
int v, prev_t, band = 1, band_left = band_count[1];
int v, prev_t, band_left = *band_count++;
// dc token
v = qcoeff_ptr[0];
prev_t = vp9_dct_value_tokens_ptr[v].token;
cost = token_costs[0][0][pt][prev_t] + vp9_dct_value_cost_ptr[v];
cost = (*token_costs)[0][pt][prev_t] + vp9_dct_value_cost_ptr[v];
token_cache[0] = vp9_pt_energy_class[prev_t];
++token_costs;
// ac tokens
for (c = 1; c < eob; c++) {
......@@ -568,18 +574,19 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb,
v = qcoeff_ptr[rc];
t = vp9_dct_value_tokens_ptr[v].token;
pt = get_coef_context(nb, token_cache, c);
cost += token_costs[!prev_t][band][pt][t] + vp9_dct_value_cost_ptr[v];
cost += (*token_costs)[!prev_t][pt][t] + vp9_dct_value_cost_ptr[v];
token_cache[rc] = vp9_pt_energy_class[t];
prev_t = t;
if (!--band_left) {
band_left = band_count[++band];
band_left = *band_count++;
++token_costs;
}
}
// eob token
if (band < 6) {
if (band_left) {
pt = get_coef_context(nb, token_cache, c);
cost += token_costs[0][band][pt][DCT_EOB_TOKEN];
cost += (*token_costs)[0][pt][DCT_EOB_TOKEN];
}
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment