Commit 1870382c authored by Thomas Davies's avatar Thomas Davies

AOM_QM: use SIMD for flat matrices and re-enable tests.

When AOM_QM is enabled, by default quantization matrices are
flat unless enabled with --enable-qm=1. Re-use existing SIMD
functions when a flat matrix is used, so that there is no
speed deficit when AOM_QM is enabled.

SIMD for the non-flat case is TBC.

Change-Id: I1bb8da70d3dd5858dac15099610ddf61662e3d0d
parent f3b5ee14
...@@ -368,12 +368,17 @@ void aom_qm_init(AV1_COMMON *cm) { ...@@ -368,12 +368,17 @@ void aom_qm_init(AV1_COMMON *cm) {
current = 0; current = 0;
for (t = 0; t < TX_SIZES_ALL; ++t) { for (t = 0; t < TX_SIZES_ALL; ++t) {
size = tx_size_2d[t]; size = tx_size_2d[t];
if (q == NUM_QM_LEVELS - 1) {
cm->gqmatrix[q][c][f][t] = NULL;
cm->giqmatrix[q][c][f][t] = NULL;
} else {
cm->gqmatrix[q][c][f][t] = &wt_matrix_ref[AOMMIN( cm->gqmatrix[q][c][f][t] = &wt_matrix_ref[AOMMIN(
NUM_QM_LEVELS - 1, f == 0 ? q + DEFAULT_QM_INTER_OFFSET : q)][c] NUM_QM_LEVELS - 1, f == 0 ? q + DEFAULT_QM_INTER_OFFSET : q)][c]
[current]; [current];
cm->giqmatrix[q][c][f][t] = &iwt_matrix_ref[AOMMIN( cm->giqmatrix[q][c][f][t] = &iwt_matrix_ref[AOMMIN(
NUM_QM_LEVELS - 1, f == 0 ? q + DEFAULT_QM_INTER_OFFSET : q)][c] NUM_QM_LEVELS - 1, f == 0 ? q + DEFAULT_QM_INTER_OFFSET : q)][c]
[current]; [current];
}
current += size; current += size;
} }
} }
......
...@@ -195,7 +195,7 @@ static int decode_coefs(MACROBLOCKD *xd, PLANE_TYPE type, tran_low_t *dqcoeff, ...@@ -195,7 +195,7 @@ static int decode_coefs(MACROBLOCKD *xd, PLANE_TYPE type, tran_low_t *dqcoeff,
#else #else
#if CONFIG_AOM_QM #if CONFIG_AOM_QM
// Apply quant matrix only for 2D transforms // Apply quant matrix only for 2D transforms
if (IS_2D_TRANSFORM(tx_type)) if (IS_2D_TRANSFORM(tx_type) && iqmatrix != NULL)
dqv = ((iqmatrix[scan[c]] * (int)dqv) + (1 << (AOM_QM_BITS - 1))) >> dqv = ((iqmatrix[scan[c]] * (int)dqv) + (1 << (AOM_QM_BITS - 1))) >>
AOM_QM_BITS; AOM_QM_BITS;
#endif #endif
......
...@@ -577,7 +577,7 @@ void av1_quantize_fp_facade(const tran_low_t *coeff_ptr, intptr_t n_coeffs, ...@@ -577,7 +577,7 @@ void av1_quantize_fp_facade(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
#if CONFIG_AOM_QM #if CONFIG_AOM_QM
const qm_val_t *qm_ptr = qparam->qmatrix; const qm_val_t *qm_ptr = qparam->qmatrix;
const qm_val_t *iqm_ptr = qparam->iqmatrix; const qm_val_t *iqm_ptr = qparam->iqmatrix;
if (1 /*qm_ptr != NULL || iqm_ptr != NULL*/) { if (qm_ptr != NULL && iqm_ptr != NULL) {
quantize_fp_helper_c(coeff_ptr, n_coeffs, skip_block, p->zbin, p->round_fp, quantize_fp_helper_c(coeff_ptr, n_coeffs, skip_block, p->zbin, p->round_fp,
p->quant_fp, p->quant_shift, qcoeff_ptr, dqcoeff_ptr, p->quant_fp, p->quant_shift, qcoeff_ptr, dqcoeff_ptr,
pd->dequant, eob_ptr, sc->scan, sc->iscan, qm_ptr, pd->dequant, eob_ptr, sc->scan, sc->iscan, qm_ptr,
...@@ -630,7 +630,7 @@ void av1_quantize_b_facade(const tran_low_t *coeff_ptr, intptr_t n_coeffs, ...@@ -630,7 +630,7 @@ void av1_quantize_b_facade(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
#if CONFIG_AOM_QM #if CONFIG_AOM_QM
const qm_val_t *qm_ptr = qparam->qmatrix; const qm_val_t *qm_ptr = qparam->qmatrix;
const qm_val_t *iqm_ptr = qparam->iqmatrix; const qm_val_t *iqm_ptr = qparam->iqmatrix;
if (1 /*qm_ptr != NULL && iqm_ptr != NULL*/) { if (qm_ptr != NULL && iqm_ptr != NULL) {
quantize_b_helper_c(coeff_ptr, n_coeffs, skip_block, p->zbin, p->round, quantize_b_helper_c(coeff_ptr, n_coeffs, skip_block, p->zbin, p->round,
p->quant, p->quant_shift, qcoeff_ptr, dqcoeff_ptr, p->quant, p->quant_shift, qcoeff_ptr, dqcoeff_ptr,
pd->dequant, eob_ptr, sc->scan, sc->iscan, qm_ptr, pd->dequant, eob_ptr, sc->scan, sc->iscan, qm_ptr,
...@@ -845,7 +845,7 @@ void av1_highbd_quantize_fp_facade(const tran_low_t *coeff_ptr, ...@@ -845,7 +845,7 @@ void av1_highbd_quantize_fp_facade(const tran_low_t *coeff_ptr,
#if CONFIG_AOM_QM #if CONFIG_AOM_QM
const qm_val_t *qm_ptr = qparam->qmatrix; const qm_val_t *qm_ptr = qparam->qmatrix;
const qm_val_t *iqm_ptr = qparam->iqmatrix; const qm_val_t *iqm_ptr = qparam->iqmatrix;
if (1 /*qm_ptr != NULL && iqm_ptr != NULL*/) { if (qm_ptr != NULL && iqm_ptr != NULL) {
highbd_quantize_fp_helper_c( highbd_quantize_fp_helper_c(
coeff_ptr, n_coeffs, skip_block, p->zbin, p->round_fp, p->quant_fp, coeff_ptr, n_coeffs, skip_block, p->zbin, p->round_fp, p->quant_fp,
p->quant_shift, qcoeff_ptr, dqcoeff_ptr, pd->dequant, eob_ptr, sc->scan, p->quant_shift, qcoeff_ptr, dqcoeff_ptr, pd->dequant, eob_ptr, sc->scan,
...@@ -884,7 +884,7 @@ void av1_highbd_quantize_b_facade(const tran_low_t *coeff_ptr, ...@@ -884,7 +884,7 @@ void av1_highbd_quantize_b_facade(const tran_low_t *coeff_ptr,
#if CONFIG_AOM_QM #if CONFIG_AOM_QM
const qm_val_t *qm_ptr = qparam->qmatrix; const qm_val_t *qm_ptr = qparam->qmatrix;
const qm_val_t *iqm_ptr = qparam->iqmatrix; const qm_val_t *iqm_ptr = qparam->iqmatrix;
if (1 /*qm_ptr != NULL && iqm_ptr != NULL*/) { if (qm_ptr != NULL && iqm_ptr != NULL) {
highbd_quantize_b_helper_c(coeff_ptr, n_coeffs, skip_block, p->zbin, highbd_quantize_b_helper_c(coeff_ptr, n_coeffs, skip_block, p->zbin,
p->round, p->quant, p->quant_shift, qcoeff_ptr, p->round, p->quant, p->quant_shift, qcoeff_ptr,
dqcoeff_ptr, pd->dequant, eob_ptr, sc->scan, dqcoeff_ptr, pd->dequant, eob_ptr, sc->scan,
......
...@@ -214,9 +214,12 @@ static int optimize_b_greedy(const AV1_COMMON *cm, MACROBLOCK *mb, int plane, ...@@ -214,9 +214,12 @@ static int optimize_b_greedy(const AV1_COMMON *cm, MACROBLOCK *mb, int plane,
const int x_a = x - 2 * sz - 1; const int x_a = x - 2 * sz - 1;
int dqv; int dqv;
#if CONFIG_AOM_QM #if CONFIG_AOM_QM
int iwt = iqmatrix[rc]; int iwt;
dqv = dequant_ptr[rc != 0]; dqv = dequant_ptr[rc != 0];
if (iqmatrix != NULL) {
iwt = iqmatrix[rc];
dqv = ((iwt * (int)dqv) + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS; dqv = ((iwt * (int)dqv) + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS;
}
#else #else
dqv = dequant_ptr[rc != 0]; dqv = dequant_ptr[rc != 0];
#endif #endif
......
...@@ -195,7 +195,6 @@ TEST_P(AV1QuantizeTest, BitExactCheck) { RunQuantizeTest(); } ...@@ -195,7 +195,6 @@ TEST_P(AV1QuantizeTest, BitExactCheck) { RunQuantizeTest(); }
TEST_P(AV1QuantizeTest, EobVerify) { RunEobTest(); } TEST_P(AV1QuantizeTest, EobVerify) { RunEobTest(); }
#if HAVE_SSE4_1 #if HAVE_SSE4_1
#if !CONFIG_AOM_QM
const QuantizeFuncParams qfps[4] = { const QuantizeFuncParams qfps[4] = {
QuantizeFuncParams(av1_highbd_quantize_fp_sse4_1, &av1_highbd_quantize_fp_c, QuantizeFuncParams(av1_highbd_quantize_fp_sse4_1, &av1_highbd_quantize_fp_c,
16), 16),
...@@ -208,6 +207,5 @@ const QuantizeFuncParams qfps[4] = { ...@@ -208,6 +207,5 @@ const QuantizeFuncParams qfps[4] = {
}; };
INSTANTIATE_TEST_CASE_P(SSE4_1, AV1QuantizeTest, ::testing::ValuesIn(qfps)); INSTANTIATE_TEST_CASE_P(SSE4_1, AV1QuantizeTest, ::testing::ValuesIn(qfps));
#endif // !CONFIG_AOM_QM
#endif // HAVE_SSE4_1 #endif // HAVE_SSE4_1
} // namespace } // namespace
...@@ -163,11 +163,9 @@ LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += error_block_test.cc ...@@ -163,11 +163,9 @@ LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += error_block_test.cc
#LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += av1_quantize_test.cc #LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += av1_quantize_test.cc
LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += subtract_test.cc LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += subtract_test.cc
LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += arf_freq_test.cc LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += arf_freq_test.cc
ifneq ($(CONFIG_AOM_QM), yes)
ifneq ($(CONFIG_NEW_QUANT), yes) ifneq ($(CONFIG_NEW_QUANT), yes)
LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += quantize_func_test.cc LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += quantize_func_test.cc
endif endif
endif
LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += block_error_test.cc LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += block_error_test.cc
LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += av1_inv_txfm_test.cc LIBAOM_TEST_SRCS-$(CONFIG_AV1_ENCODER) += av1_inv_txfm_test.cc
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment