Commit c9d2fcc2 authored by Alex Converse's avatar Alex Converse

Port optimize_b cache optimizations to AV1.

Bring the following libvpx commits to aom:
e446ffd Cache optimizations in optimize_b()
50d3629 Repack vp9_token_state

Saves 24600 bytes of stack in the default configuration.

Change-Id: If9d6506cf3fe1c34ab639dedb3ef62a996293781
parent f5323760
......@@ -73,12 +73,13 @@ void av1_subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) {
}
typedef struct av1_token_state {
int rate;
int64_t error;
int next;
int rate;
int16_t next;
int16_t token;
tran_low_t qc;
tran_low_t dqc;
uint8_t best_index;
} av1_token_state;
// These numbers are empirically obtained.
......@@ -114,7 +115,6 @@ int av1_optimize_b(const AV1_COMMON *cm, MACROBLOCK *mb, int plane, int block,
struct macroblockd_plane *const pd = &xd->plane[plane];
const int ref = is_inter_block(&xd->mi[0]->mbmi);
av1_token_state tokens[MAX_TX_SQUARE + 1][2];
unsigned best_index[MAX_TX_SQUARE + 1][2];
uint8_t token_cache[MAX_TX_SQUARE];
const tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
......@@ -242,7 +242,7 @@ int av1_optimize_b(const AV1_COMMON *cm, MACROBLOCK *mb, int plane, int block,
tokens[i][0].next = next;
tokens[i][0].qc = x;
tokens[i][0].dqc = dqcoeff[rc];
best_index[i][0] = best;
tokens[i][0].best_index = best;
/* Evaluate the second possibility for this state. */
rate0 = tokens[next][0].rate;
......@@ -282,7 +282,6 @@ int av1_optimize_b(const AV1_COMMON *cm, MACROBLOCK *mb, int plane, int block,
x -= 2 * sz + 1;
} else {
tokens[i][1] = tokens[i][0];
best_index[i][1] = best_index[i][0];
next = i;
if (UNLIKELY(!(--band_left))) {
......@@ -392,7 +391,7 @@ int av1_optimize_b(const AV1_COMMON *cm, MACROBLOCK *mb, int plane, int block,
tokens[i][1].dqc = 0;
}
best_index[i][1] = best;
tokens[i][1].best_index = best;
/* Finally, make this the new head of the trellis. */
next = i;
} else {
......@@ -411,7 +410,7 @@ int av1_optimize_b(const AV1_COMMON *cm, MACROBLOCK *mb, int plane, int block,
tokens[next][1].rate += get_token_bit_costs(*token_costs, 1, pt, t1);
tokens[next][1].token = ZERO_TOKEN;
}
best_index[i][0] = best_index[i][1] = 0;
tokens[i][0].best_index = tokens[i][1].best_index = 0;
shortcut = (tokens[next][0].rate != tokens[next][1].rate);
/* Don't update next, because we didn't add a new node. */
}
......@@ -445,7 +444,7 @@ int av1_optimize_b(const AV1_COMMON *cm, MACROBLOCK *mb, int plane, int block,
dqcoeff[rc] = tokens[i][best].dqc;
next = tokens[i][best].next;
best = best_index[i][best];
best = tokens[i][best].best_index;
}
final_eob++;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment