Commit 030cea9b authored by Urvang Joshi's avatar Urvang Joshi

16x64 and 64x16 transforms: Reuse scan order, eob

16x64 reuses scan order of 16x32
64x16 reuses scan order of 32x16

max eob is curtailed to 512 (instead of 1024) for both.

Change-Id: Iac2145aa5e3d090009e2a2f5715caa8d84dfb2ee
parent 6fa05dcf
......@@ -446,6 +446,7 @@ void av1_fwd_txfm2d_16x64_c(const int16_t *input, int32_t *output, int stride,
#endif
// Zero out the bottom 16x32 area.
memset(output + 16 * 32, 0, 16 * 32 * sizeof(*output));
// Note: no repacking needed here.
}
void av1_fwd_txfm2d_64x16_c(const int16_t *input, int32_t *output, int stride,
......@@ -458,6 +459,10 @@ void av1_fwd_txfm2d_64x16_c(const int16_t *input, int32_t *output, int stride,
for (int row = 0; row < 16; ++row) {
memset(output + row * 64 + 32, 0, 32 * sizeof(*output));
}
// Re-pack non-zero coeffs in the first 32x16 indices.
for (int row = 1; row < 16; ++row) {
memcpy(output + row * 32, output + row * 64, 32 * sizeof(*output));
}
}
#endif // CONFIG_TX64X64
......
......@@ -413,12 +413,27 @@ void av1_inv_txfm2d_add_32x64_c(const int32_t *input, uint16_t *output,
void av1_inv_txfm2d_add_16x64_c(const int32_t *input, uint16_t *output,
int stride, TX_TYPE tx_type, int bd) {
// Remap 16x32 input into a modified 16x64 input by:
// - Copying over these values in top-left 16x32 locations.
// - Setting the rest of the locations to 0.
int32_t mod_input[16 * 64];
memcpy(mod_input, input, 16 * 32 * sizeof(*mod_input));
memset(mod_input + 16 * 32, 0, 16 * 32 * sizeof(*mod_input));
int txfm_buf[16 * 64 + 64 + 64];
inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_16X64, bd);
inv_txfm2d_add_facade(mod_input, output, stride, txfm_buf, tx_type, TX_16X64,
bd);
}
void av1_inv_txfm2d_add_64x16_c(const int32_t *input, uint16_t *output,
int stride, TX_TYPE tx_type, int bd) {
// Remap 32x16 input into a modified 64x16 by:
// - Copying over these values in top-left 32x16 locations.
// - Setting the rest of the locations to 0.
int32_t mod_input[64 * 16];
for (int row = 0; row < 16; ++row) {
memcpy(mod_input + row * 64, input + row * 32, 32 * sizeof(*mod_input));
memset(mod_input + row * 64 + 32, 0, 32 * sizeof(*mod_input));
}
#if CONFIG_TXMG
int txfm_buf[16 * 64 + 64 + 64];
int32_t rinput[16 * 64];
......@@ -430,13 +445,14 @@ void av1_inv_txfm2d_add_64x16_c(const int32_t *input, uint16_t *output,
int h = tx_size_high[tx_size];
int rw = h;
int rh = w;
transpose_int32(rinput, rw, input, w, w, h);
transpose_int32(rinput, rw, mod_input, w, w, h);
transpose_uint16(routput, rw, output, stride, w, h);
inv_txfm2d_add_facade(rinput, routput, rw, txfm_buf, rtx_type, rtx_size, bd);
transpose_uint16(output, stride, routput, rw, rw, rh);
#else
int txfm_buf[16 * 64 + 64 + 64];
inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_64X16, bd);
inv_txfm2d_add_facade(mod_input, output, stride, txfm_buf, tx_type, TX_64X16,
bd);
#endif // CONFIG_TXMG
}
#endif // CONFIG_TX64X64
......
......@@ -1399,13 +1399,15 @@ static INLINE void transpose_int32(int32_t *dst, int dst_stride,
}
static INLINE int av1_get_max_eob(TX_SIZE tx_size) {
return
#if CONFIG_TX64X64 && !CONFIG_DAALA_TX
tx_size == TX_64X64 || tx_size == TX_64X32 || tx_size == TX_32X64
? 1024
:
if (tx_size == TX_64X64 || tx_size == TX_64X32 || tx_size == TX_32X64) {
return 1024;
}
if (tx_size == TX_16X64 || tx_size == TX_64X16) {
return 512;
}
#endif // CONFIG_TX64X64 && !CONFIG_DAALA_TX
tx_size_2d[tx_size];
return tx_size_2d[tx_size];
}
#ifdef __cplusplus
......
......@@ -487,16 +487,14 @@ void aom_qm_init(AV1_COMMON *cm) {
current = 0;
for (t = 0; t < TX_SIZES_ALL; ++t) {
const int size = tx_size_2d[t];
const int qm_tx_size = get_qm_tx_size(t);
if (q == NUM_QM_LEVELS - 1) {
cm->gqmatrix[q][c][t] = NULL;
cm->giqmatrix[q][c][t] = NULL;
} else if (size > 1024) { // Reuse matrices for TX_32X32
cm->gqmatrix[q][c][t] = cm->gqmatrix[q][c][TX_32X32];
cm->giqmatrix[q][c][t] = cm->giqmatrix[q][c][TX_32X32];
} else if (t != qm_tx_size) { // Reuse matrices for 'qm_tx_size'
cm->gqmatrix[q][c][t] = cm->gqmatrix[q][c][qm_tx_size];
cm->giqmatrix[q][c][t] = cm->giqmatrix[q][c][qm_tx_size];
} else {
#if CONFIG_TX64X64
if (t == TX_16X64 || t == TX_64X16) continue;
#endif // CONFIG_TX64X64
assert(current + size <= QM_TOTAL_SIZE);
cm->gqmatrix[q][c][t] = &wt_matrix_ref[q][c >= 1][current];
cm->giqmatrix[q][c][t] = &iwt_matrix_ref[q][c >= 1][current];
......
......@@ -60,7 +60,22 @@ qm_val_t *aom_iqmatrix(struct AV1Common *cm, int qindex, int comp,
TX_SIZE tx_size);
qm_val_t *aom_qmatrix(struct AV1Common *cm, int qindex, int comp,
TX_SIZE tx_size);
#endif
static inline TX_SIZE get_qm_tx_size(TX_SIZE tx_size) {
#if CONFIG_TX64X64
if (tx_size == TX_64X64 || tx_size == TX_64X32 || tx_size == TX_32X64) {
return TX_32X32;
}
if (tx_size == TX_16X64) {
return TX_16X32;
}
if (tx_size == TX_64X16) {
return TX_32X16;
}
#endif // CONFIG_TX64X64
return tx_size;
}
#endif // CONFIG_AOM_QM
#if CONFIG_NEW_QUANT
......
This diff is collapsed.
......@@ -2666,6 +2666,7 @@ void av1_fht16x64_c(const int16_t *input, tran_low_t *output, int stride,
}
// Zero out the bottom 16x32 area.
memset(output + 2 * n * n, 0, 2 * n * n * sizeof(*output));
// Note: no repacking needed here.
// Note: overall scale factor of transform is 4 times unitary
}
......@@ -2720,6 +2721,10 @@ void av1_fht64x16_c(const int16_t *input, tran_low_t *output, int stride,
for (int row = 0; row < n; ++row) {
memset(output + row * n4 + 2 * n, 0, 2 * n * sizeof(*output));
}
// Re-pack non-zero coeffs in the first 32x16 indices.
for (int row = 1; row < 16; ++row) {
memcpy(output + row * 32, output + row * 64, 32 * sizeof(*output));
}
// Note: overall scale factor of transform is 4 times unitary
}
#endif // CONFIG_TX64X64
......
......@@ -157,13 +157,7 @@ static int optimize_b_greedy(const AV1_COMMON *cm, MACROBLOCK *mb, int plane,
#endif
#if CONFIG_AOM_QM
int seg_id = xd->mi[0]->mbmi.segment_id;
const TX_SIZE qm_tx_size =
#if CONFIG_TX64X64
tx_size == TX_64X64 || tx_size == TX_64X32 || tx_size == TX_32X64
? TX_32X32
:
#endif // CONFIG_TX64X64
tx_size;
const TX_SIZE qm_tx_size = get_qm_tx_size(tx_size);
// Use a flat matrix (i.e. no weighting) for 1D and Identity transforms
const qm_val_t *iqmatrix =
IS_2D_TRANSFORM(tx_type)
......@@ -513,14 +507,8 @@ void av1_xform_quant(const AV1_COMMON *cm, MACROBLOCK *x, int plane, int block,
const int diff_stride = block_size_wide[plane_bsize];
#if CONFIG_AOM_QM
int seg_id = mbmi->segment_id;
const TX_SIZE qm_tx_size = get_qm_tx_size(tx_size);
// Use a flat matrix (i.e. no weighting) for 1D and Identity transforms
const TX_SIZE qm_tx_size =
#if CONFIG_TX64X64
tx_size == TX_64X64 || tx_size == TX_64X32 || tx_size == TX_32X64
? TX_32X32
:
#endif // CONFIG_TX64X64
tx_size;
const qm_val_t *qmatrix =
IS_2D_TRANSFORM(tx_type) ? pd->seg_qmatrix[seg_id][qm_tx_size]
: cm->gqmatrix[NUM_QM_LEVELS - 1][0][qm_tx_size];
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment