From 1c122c24a1fdbe140f40d08b0373e4b6324b1210 Mon Sep 17 00:00:00 2001 From: Thomas <thdavies@cisco.com> Date: Fri, 19 Feb 2016 09:06:12 +0000 Subject: [PATCH] Add quant and dequant functions for new quant matrices. Change-Id: If0ba62428216fa343b9a37a3b349edba4103c00a --- vp10/common/vp10_rtcd_defs.pl | 87 +++++--- vp10/decoder/decodeframe.c | 36 +++- vp10/decoder/decoder.c | 4 + vp10/decoder/detokenize.c | 25 ++- vp10/encoder/dct.c | 30 ++- vp10/encoder/encodemb.c | 348 +++++++++++++++++++++++++++----- vp10/encoder/encoder.c | 3 + vp10/encoder/quantize.c | 160 +++++++++++++-- vpx_dsp/quantize.c | 366 ++++++++++++++++++++++++++++++++++ vpx_dsp/quantize.h | 57 +++++- vpx_dsp/vpx_dsp_rtcd_defs.pl | 38 ++-- 11 files changed, 1034 insertions(+), 120 deletions(-) diff --git a/vp10/common/vp10_rtcd_defs.pl b/vp10/common/vp10_rtcd_defs.pl index 9831bdd580..6f8900ae69 100644 --- a/vp10/common/vp10_rtcd_defs.pl +++ b/vp10/common/vp10_rtcd_defs.pl @@ -309,37 +309,67 @@ if (vpx_config("CONFIG_VP10_ENCODER") eq "yes") { # ENCODEMB INVOKE -if (vpx_config("CONFIG_VPX_HIGHBITDEPTH") eq "yes") { -# the transform coefficients are held in 32-bit -# values, so the assembler code for vp10_block_error can no longer be used. - add_proto qw/int64_t vp10_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz"; - specialize qw/vp10_block_error/; +if (vpx_config("CONFIG_AOM_QM") eq "yes") { + if (vpx_config("CONFIG_VPX_HIGHBITDEPTH") eq "yes") { + # the transform coefficients are held in 32-bit + # values, so the assembler code for vp10_block_error can no longer be used. + add_proto qw/int64_t vp10_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz"; + specialize qw/vp10_block_error/; + + add_proto qw/void vp10_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t *iqm_ptr"; + + add_proto qw/void vp10_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t *iqm_ptr"; + + add_proto qw/void vp10_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t *iqm_ptr"; + specialize qw/vp10_fdct8x8_quant/; + } else { + add_proto qw/int64_t vp10_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz"; + specialize qw/vp10_block_error avx2 msa/, "$sse2_x86inc"; - add_proto qw/void vp10_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; - specialize qw/vp10_quantize_fp/; + add_proto qw/int64_t vp10_block_error_fp/, "const int16_t *coeff, const int16_t *dqcoeff, int block_size"; + specialize qw/vp10_block_error_fp neon/, "$sse2_x86inc"; - add_proto qw/void vp10_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; - specialize qw/vp10_quantize_fp_32x32/; + add_proto qw/void vp10_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t *iqm_ptr"; - add_proto qw/void vp10_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; - specialize qw/vp10_fdct8x8_quant/; + add_proto qw/void vp10_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t *iqm_ptr"; + + add_proto qw/void vp10_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t *iqm_ptr"; + } } else { - add_proto qw/int64_t vp10_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz"; - specialize qw/vp10_block_error avx2 msa/, "$sse2_x86inc"; + if (vpx_config("CONFIG_VPX_HIGHBITDEPTH") eq "yes") { + # the transform coefficients are held in 32-bit + # values, so the assembler code for vp10_block_error can no longer be used. + add_proto qw/int64_t vp10_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz"; + specialize qw/vp10_block_error/; + + add_proto qw/void vp10_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; + specialize qw/vp10_quantize_fp/; + + add_proto qw/void vp10_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; + specialize qw/vp10_quantize_fp_32x32/; - add_proto qw/int64_t vp10_block_error_fp/, "const int16_t *coeff, const int16_t *dqcoeff, int block_size"; - specialize qw/vp10_block_error_fp neon/, "$sse2_x86inc"; + add_proto qw/void vp10_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; + specialize qw/vp10_fdct8x8_quant/; + } else { + add_proto qw/int64_t vp10_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz"; + specialize qw/vp10_block_error avx2 msa/, "$sse2_x86inc"; + + add_proto qw/int64_t vp10_block_error_fp/, "const int16_t *coeff, const int16_t *dqcoeff, int block_size"; + specialize qw/vp10_block_error_fp neon/, "$sse2_x86inc"; - add_proto qw/void vp10_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; - specialize qw/vp10_quantize_fp neon sse2/, "$ssse3_x86_64_x86inc"; + add_proto qw/void vp10_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; + specialize qw/vp10_quantize_fp neon sse2/, "$ssse3_x86_64_x86inc"; - add_proto qw/void vp10_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; - specialize qw/vp10_quantize_fp_32x32/, "$ssse3_x86_64_x86inc"; + add_proto qw/void vp10_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; + specialize qw/vp10_quantize_fp_32x32/, "$ssse3_x86_64_x86inc"; + + add_proto qw/void vp10_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; + specialize qw/vp10_fdct8x8_quant sse2 ssse3 neon/; + } - add_proto qw/void vp10_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; - specialize qw/vp10_fdct8x8_quant sse2 ssse3 neon/; } + # fdct functions if (vpx_config("CONFIG_VPX_HIGHBITDEPTH") eq "yes") { @@ -574,11 +604,18 @@ if (vpx_config("CONFIG_VPX_HIGHBITDEPTH") eq "yes") { add_proto qw/int64_t vp10_highbd_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz, int bd"; specialize qw/vp10_highbd_block_error sse2/; - add_proto qw/void vp10_highbd_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; - specialize qw/vp10_highbd_quantize_fp/; + if (vpx_config("CONFIG_AOM_QM") eq "yes") { + add_proto qw/void vp10_highbd_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr"; - add_proto qw/void vp10_highbd_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; - specialize qw/vp10_highbd_quantize_fp_32x32/; + add_proto qw/void vp10_highbd_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr"; + } else { + add_proto qw/void vp10_highbd_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; + specialize qw/vp10_highbd_quantize_fp/; + + add_proto qw/void vp10_highbd_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; + specialize qw/vp10_highbd_quantize_fp_32x32/; + + } # fdct functions add_proto qw/void vp10_highbd_fht4x4/, "const int16_t *input, tran_low_t *output, int stride, int tx_type"; diff --git a/vp10/decoder/decodeframe.c b/vp10/decoder/decodeframe.c index 59e0c9be0b..3f7f098fac 100644 --- a/vp10/decoder/decodeframe.c +++ b/vp10/decoder/decodeframe.c @@ -712,8 +712,8 @@ static void dec_build_inter_predictors_sb(VP10Decoder *const pbi, } } -static INLINE TX_SIZE dec_get_uv_tx_size(const MB_MODE_INFO *mbmi, int n4_wl, - int n4_hl) { +static INLINE TX_SIZE + dec_get_uv_tx_size(const MB_MODE_INFO *mbmi, int n4_wl, int n4_hl) { // get minimum log2 num4x4s dimension const int x = VPXMIN(n4_wl, n4_hl); return VPXMIN(mbmi->tx_size, x); @@ -1120,8 +1120,13 @@ static void setup_quantization(VP10_COMMON *const cm, static void setup_segmentation_dequant(VP10_COMMON *const cm) { // Build y/uv dequant values based on segmentation. + int i = 0; +#if CONFIG_AOM_QM + int lossless; + int j = 0; + int qmindex; +#endif if (cm->seg.enabled) { - int i; for (i = 0; i < MAX_SEGMENTS; ++i) { const int qindex = vp10_get_qindex(&cm->seg, i, cm->base_qindex); cm->y_dequant[i][0] = @@ -1131,6 +1136,19 @@ static void setup_segmentation_dequant(VP10_COMMON *const cm) { vp10_dc_quant(qindex, cm->uv_dc_delta_q, cm->bit_depth); cm->uv_dequant[i][1] = vp10_ac_quant(qindex, cm->uv_ac_delta_q, cm->bit_depth); +#if CONFIG_AOM_QM + lossless = qindex == 0 && cm->y_dc_delta_q == 0 && + cm->uv_dc_delta_q == 0 && cm->uv_ac_delta_q == 0; + // NB: depends on base index so there is only 1 set per frame + // No quant weighting when lossless + qmindex = lossless ? QINDEX_RANGE - 1 : cm->base_qindex; + for (j = 0; j < TX_SIZES; ++j) { + cm->y_iqmatrix[i][1][j] = aom_iqmatrix(cm, qmindex, 0, j, 1); + cm->y_iqmatrix[i][0][j] = aom_iqmatrix(cm, qmindex, 0, j, 0); + cm->uv_iqmatrix[i][1][j] = aom_iqmatrix(cm, qmindex, 1, j, 1); + cm->uv_iqmatrix[i][0][j] = aom_iqmatrix(cm, qmindex, 1, j, 0); + } +#endif } } else { const int qindex = cm->base_qindex; @@ -1143,6 +1161,18 @@ static void setup_segmentation_dequant(VP10_COMMON *const cm) { vp10_dc_quant(qindex, cm->uv_dc_delta_q, cm->bit_depth); cm->uv_dequant[0][1] = vp10_ac_quant(qindex, cm->uv_ac_delta_q, cm->bit_depth); +#if CONFIG_AOM_QM + lossless = qindex == 0 && cm->y_dc_delta_q == 0 && cm->uv_dc_delta_q == 0 && + cm->uv_ac_delta_q == 0; + // No quant weighting when lossless + qmindex = lossless ? QINDEX_RANGE - 1 : cm->base_qindex; + for (j = 0; j < TX_SIZES; ++j) { + cm->y_iqmatrix[i][1][j] = aom_iqmatrix(cm, qmindex, 0, j, 1); + cm->y_iqmatrix[i][0][j] = aom_iqmatrix(cm, qmindex, 0, j, 0); + cm->uv_iqmatrix[i][1][j] = aom_iqmatrix(cm, qmindex, 1, j, 1); + cm->uv_iqmatrix[i][0][j] = aom_iqmatrix(cm, qmindex, 1, j, 0); + } +#endif } } diff --git a/vp10/decoder/decoder.c b/vp10/decoder/decoder.c index f979abf2f2..438c2da9d4 100644 --- a/vp10/decoder/decoder.c +++ b/vp10/decoder/decoder.c @@ -109,6 +109,10 @@ VP10Decoder *vp10_decoder_create(BufferPool *const pool) { vp10_loop_filter_init(cm); +#if CONFIG_AOM_QM + aom_qm_init(cm); +#endif + cm->error.setjmp = 0; vpx_get_worker_interface()->init(&pbi->lf_worker); diff --git a/vp10/decoder/detokenize.c b/vp10/decoder/detokenize.c index e57d1efde8..d4da4bb8d4 100644 --- a/vp10/decoder/detokenize.c +++ b/vp10/decoder/detokenize.c @@ -43,14 +43,25 @@ static INLINE int read_coeff(const vpx_prob *probs, int n, vpx_reader *r) { return val; } +#if CONFIG_AOM_QM static int decode_coefs(const MACROBLOCKD *xd, PLANE_TYPE type, tran_low_t *dqcoeff, TX_SIZE tx_size, const int16_t *dq, int ctx, const int16_t *scan, const int16_t *nb, - vpx_reader *r) { + vpx_reader *r, const qm_val_t *iqm[2][TX_SIZES]) +#else +static int decode_coefs(const MACROBLOCKD *xd, PLANE_TYPE type, + tran_low_t *dqcoeff, TX_SIZE tx_size, const int16_t *dq, + int ctx, const int16_t *scan, const int16_t *nb, + vpx_reader *r) +#endif +{ FRAME_COUNTS *counts = xd->counts; const int max_eob = 16 << (tx_size << 1); const FRAME_CONTEXT *const fc = xd->fc; const int ref = is_inter_block(&xd->mi[0]->mbmi); +#if CONFIG_AOM_QM + const qm_val_t *iqmatrix = iqm[!ref][tx_size]; +#endif int band, c = 0; const vpx_prob(*coef_probs)[COEFF_CONTEXTS][UNCONSTRAINED_NODES] = fc->coef_probs[tx_size][type][ref]; @@ -183,6 +194,10 @@ static int decode_coefs(const MACROBLOCKD *xd, PLANE_TYPE type, } } } +#if CONFIG_AOM_QM + dqv = ((iqmatrix[scan[c]] * (int)dqv) + (1 << (AOM_QM_BITS - 1))) >> + AOM_QM_BITS; +#endif v = (val * dqv) >> dq_shift; #if CONFIG_COEFFICIENT_RANGE_CHECKING #if CONFIG_VPX_HIGHBITDEPTH @@ -249,8 +264,16 @@ int vp10_decode_block_tokens(MACROBLOCKD *xd, int plane, const scan_order *sc, const int16_t *const dequant = pd->seg_dequant[seg_id]; const int ctx = get_entropy_context(tx_size, pd->above_context + x, pd->left_context + y); +#if CONFIG_AOM_QM + const int eob = + decode_coefs(xd, pd->plane_type, pd->dqcoeff, tx_size, dequant, ctx, + sc->scan, sc->neighbors, r, pd->seg_iqmatrix[seg_id]); +#else const int eob = decode_coefs(xd, pd->plane_type, pd->dqcoeff, tx_size, dequant, ctx, sc->scan, sc->neighbors, r); +#endif dec_set_contexts(xd, pd, tx_size, eob > 0, x, y); return eob; } + + diff --git a/vp10/encoder/dct.c b/vp10/encoder/dct.c index 2a7ba7ef4e..300a742518 100644 --- a/vp10/encoder/dct.c +++ b/vp10/encoder/dct.c @@ -1021,7 +1021,12 @@ void vp10_fdct8x8_quant_c(const int16_t *input, int stride, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, - const int16_t *scan, const int16_t *iscan) { + const int16_t *scan, const int16_t *iscan +#if CONFIG_AOM_QM + , + const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr +#endif + ) { int eob = -1; int i, j; @@ -1107,16 +1112,29 @@ void vp10_fdct8x8_quant_c(const int16_t *input, int stride, for (i = 0; i < n_coeffs; i++) { const int rc = scan[i]; const int coeff = coeff_ptr[rc]; +#if CONFIG_AOM_QM + const qm_val_t wt = qm_ptr[rc]; + const qm_val_t iwt = iqm_ptr[rc]; + const int dequant = + (dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >> + AOM_QM_BITS; +#endif const int coeff_sign = (coeff >> 31); const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; - int tmp = clamp(abs_coeff + round_ptr[rc != 0], INT16_MIN, INT16_MAX); - tmp = (tmp * quant_ptr[rc != 0]) >> 16; - - qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign; + int64_t tmp = clamp(abs_coeff + round_ptr[rc != 0], INT16_MIN, INT16_MAX); + int tmp32; +#if CONFIG_AOM_QM + tmp32 = (tmp * quant_ptr[rc != 0] * wt) >> (16 + AOM_QM_BITS); + qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign; + dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant; +#else + tmp32 = (tmp * quant_ptr[rc != 0]) >> 16; + qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign; dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0]; +#endif - if (tmp) eob = i; + if (tmp32) eob = i; } } *eob_ptr = eob + 1; diff --git a/vp10/encoder/encodemb.c b/vp10/encoder/encodemb.c index b2fbf13a9d..afd38a3272 100644 --- a/vp10/encoder/encodemb.c +++ b/vp10/encoder/encodemb.c @@ -99,6 +99,11 @@ static int optimize_b(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size, const PLANE_TYPE type = pd->plane_type; const int default_eob = 16 << (tx_size << 1); const int mul = 1 + (tx_size == TX_32X32); +#if CONFIG_AOM_QM + int seg_id = xd->mi[0]->mbmi.segment_id; + int is_intra = !is_inter_block(&xd->mi[0]->mbmi); + const qm_val_t *iqmatrix = pd->seg_iqmatrix[seg_id][is_intra][tx_size]; +#endif const int16_t *dequant_ptr = pd->dequant; const uint8_t *const band_translate = get_band_translate(tx_size); TX_TYPE tx_type = get_tx_type(type, xd, block); @@ -138,7 +143,11 @@ static int optimize_b(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size, for (i = eob; i-- > 0;) { int base_bits, d2, dx; + const int rc = scan[i]; +#if CONFIG_AOM_QM + int iwt = iqmatrix[rc]; +#endif int x = qcoeff[rc]; /* Only add a trellis state for non-zero coefficients. */ if (x) { @@ -182,9 +191,16 @@ static int optimize_b(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size, rate0 = tokens[next][0].rate; rate1 = tokens[next][1].rate; +#if CONFIG_AOM_QM + if ((abs(x) * dequant_ptr[rc != 0] * iwt > + ((abs(coeff[rc]) * mul) << AOM_QM_BITS)) && + (abs(x) * dequant_ptr[rc != 0] * iwt < + ((abs(coeff[rc]) * mul + dequant_ptr[rc != 0]) << AOM_QM_BITS))) +#else if ((abs(x) * dequant_ptr[rc != 0] > abs(coeff[rc]) * mul) && (abs(x) * dequant_ptr[rc != 0] < abs(coeff[rc]) * mul + dequant_ptr[rc != 0])) +#endif shortcut = 1; else shortcut = 0; @@ -239,6 +255,7 @@ static int optimize_b(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size, #endif // CONFIG_VPX_HIGHBITDEPTH d2 = dx * dx; } + tokens[i][1].rate = base_bits + (best ? rate1 : rate0); tokens[i][1].error = d2 + (best ? error1 : error0); tokens[i][1].next = next; @@ -288,12 +305,21 @@ static int optimize_b(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size, for (i = next; i < eob; i = next) { const int x = tokens[i][best].qc; const int rc = scan[i]; +#if CONFIG_AOM_QM + const int iwt = iqmatrix[rc]; + const int dequant = + (dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS; +#endif if (x) { final_eob = i; } qcoeff[rc] = x; +#if CONFIG_AOM_QM + dqcoeff[rc] = (x * dequant) / mul; +#else dqcoeff[rc] = (x * dequant_ptr[rc != 0]) / mul; +#endif next = tokens[i][best].next; best = best_index[i][best]; @@ -328,11 +354,17 @@ void vp10_fwd_txfm_4x4(const int16_t *src_diff, tran_low_t *coeff, vp10_fwht4x4(src_diff, coeff, diff_stride); } else { switch (tx_type) { - case DCT_DCT: vpx_fdct4x4(src_diff, coeff, diff_stride); break; + case DCT_DCT: + vpx_fdct4x4(src_diff, coeff, diff_stride); + break; case ADST_DCT: case DCT_ADST: - case ADST_ADST: vp10_fht4x4(src_diff, coeff, diff_stride, tx_type); break; - default: assert(0); break; + case ADST_ADST: + vp10_fht4x4(src_diff, coeff, diff_stride, tx_type); + break; + default: + assert(0); + break; } } } @@ -343,8 +375,12 @@ static void fwd_txfm_8x8(const int16_t *src_diff, tran_low_t *coeff, case DCT_DCT: case ADST_DCT: case DCT_ADST: - case ADST_ADST: vp10_fht8x8(src_diff, coeff, diff_stride, tx_type); break; - default: assert(0); break; + case ADST_ADST: + vp10_fht8x8(src_diff, coeff, diff_stride, tx_type); + break; + default: + assert(0); + break; } } @@ -354,8 +390,12 @@ static void fwd_txfm_16x16(const int16_t *src_diff, tran_low_t *coeff, case DCT_DCT: case ADST_DCT: case DCT_ADST: - case ADST_ADST: vp10_fht16x16(src_diff, coeff, diff_stride, tx_type); break; - default: assert(0); break; + case ADST_ADST: + vp10_fht16x16(src_diff, coeff, diff_stride, tx_type); + break; + default: + assert(0); + break; } } @@ -363,11 +403,17 @@ static void fwd_txfm_32x32(int rd_transform, const int16_t *src_diff, tran_low_t *coeff, int diff_stride, TX_TYPE tx_type) { switch (tx_type) { - case DCT_DCT: fdct32x32(rd_transform, src_diff, coeff, diff_stride); break; + case DCT_DCT: + fdct32x32(rd_transform, src_diff, coeff, diff_stride); + break; case ADST_DCT: case DCT_ADST: - case ADST_ADST: assert(0); break; - default: assert(0); break; + case ADST_ADST: + assert(0); + break; + default: + assert(0); + break; } } @@ -379,13 +425,17 @@ void vp10_highbd_fwd_txfm_4x4(const int16_t *src_diff, tran_low_t *coeff, vp10_highbd_fwht4x4(src_diff, coeff, diff_stride); } else { switch (tx_type) { - case DCT_DCT: vpx_highbd_fdct4x4(src_diff, coeff, diff_stride); break; + case DCT_DCT: + vpx_highbd_fdct4x4(src_diff, coeff, diff_stride); + break; case ADST_DCT: case DCT_ADST: case ADST_ADST: vp10_highbd_fht4x4(src_diff, coeff, diff_stride, tx_type); break; - default: assert(0); break; + default: + assert(0); + break; } } } @@ -393,26 +443,34 @@ void vp10_highbd_fwd_txfm_4x4(const int16_t *src_diff, tran_low_t *coeff, static void highbd_fwd_txfm_8x8(const int16_t *src_diff, tran_low_t *coeff, int diff_stride, TX_TYPE tx_type) { switch (tx_type) { - case DCT_DCT: vpx_highbd_fdct8x8(src_diff, coeff, diff_stride); break; + case DCT_DCT: + vpx_highbd_fdct8x8(src_diff, coeff, diff_stride); + break; case ADST_DCT: case DCT_ADST: case ADST_ADST: vp10_highbd_fht8x8(src_diff, coeff, diff_stride, tx_type); break; - default: assert(0); break; + default: + assert(0); + break; } } static void highbd_fwd_txfm_16x16(const int16_t *src_diff, tran_low_t *coeff, int diff_stride, TX_TYPE tx_type) { switch (tx_type) { - case DCT_DCT: vpx_highbd_fdct16x16(src_diff, coeff, diff_stride); break; + case DCT_DCT: + vpx_highbd_fdct16x16(src_diff, coeff, diff_stride); + break; case ADST_DCT: case DCT_ADST: case ADST_ADST: vp10_highbd_fht16x16(src_diff, coeff, diff_stride, tx_type); break; - default: assert(0); break; + default: + assert(0); + break; } } @@ -425,8 +483,12 @@ static void highbd_fwd_txfm_32x32(int rd_transform, const int16_t *src_diff, break; case ADST_DCT: case DCT_ADST: - case ADST_ADST: assert(0); break; - default: assert(0); break; + case ADST_ADST: + assert(0); + break; + default: + assert(0); + break; } } #endif // CONFIG_VPX_HIGHBITDEPTH @@ -444,6 +506,12 @@ void vp10_xform_quant_fp(MACROBLOCK *x, int plane, int block, int blk_row, tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); uint16_t *const eob = &p->eobs[block]; const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize]; +#if CONFIG_AOM_QM + int seg_id = xd->mi[0]->mbmi.segment_id; + int is_intra = !is_inter_block(&xd->mi[0]->mbmi); + const qm_val_t *qmatrix = pd->seg_qmatrix[seg_id][is_intra][tx_size]; + const qm_val_t *iqmatrix = pd->seg_iqmatrix[seg_id][is_intra][tx_size]; +#endif const int16_t *src_diff; src_diff = &p->src_diff[4 * (blk_row * diff_stride + blk_col)]; @@ -455,21 +523,34 @@ void vp10_xform_quant_fp(MACROBLOCK *x, int plane, int block, int blk_row, vp10_highbd_quantize_fp_32x32(coeff, 1024, x->skip_block, p->zbin, p->round_fp, p->quant_fp, p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob, - scan_order->scan, scan_order->iscan); + scan_order->scan, +#if !CONFIG_AOM_QM + scan_order->iscan); +#else + scan_order->iscan, qmatrix, iqmatrix); +#endif break; case TX_16X16: vpx_highbd_fdct16x16(src_diff, coeff, diff_stride); vp10_highbd_quantize_fp(coeff, 256, x->skip_block, p->zbin, p->round_fp, p->quant_fp, p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan, +#if !CONFIG_AOM_QM scan_order->iscan); +#else + scan_order->iscan, qmatrix, iqmatrix); +#endif break; case TX_8X8: vpx_highbd_fdct8x8(src_diff, coeff, diff_stride); vp10_highbd_quantize_fp(coeff, 64, x->skip_block, p->zbin, p->round_fp, p->quant_fp, p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan, +#if !CONFIG_AOM_QM scan_order->iscan); +#else + scan_order->iscan, qmatrix, iqmatrix); +#endif break; case TX_4X4: if (xd->lossless[xd->mi[0]->mbmi.segment_id]) { @@ -480,9 +561,14 @@ void vp10_xform_quant_fp(MACROBLOCK *x, int plane, int block, int blk_row, vp10_highbd_quantize_fp(coeff, 16, x->skip_block, p->zbin, p->round_fp, p->quant_fp, p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan, +#if !CONFIG_AOM_QM scan_order->iscan); +#else + scan_order->iscan, qmatrix, iqmatrix); +#endif break; - default: assert(0); + default: + assert(0); } return; } @@ -494,19 +580,32 @@ void vp10_xform_quant_fp(MACROBLOCK *x, int plane, int block, int blk_row, vp10_quantize_fp_32x32(coeff, 1024, x->skip_block, p->zbin, p->round_fp, p->quant_fp, p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan, +#if !CONFIG_AOM_QM scan_order->iscan); +#else + scan_order->iscan, qmatrix, iqmatrix); +#endif break; case TX_16X16: vpx_fdct16x16(src_diff, coeff, diff_stride); vp10_quantize_fp(coeff, 256, x->skip_block, p->zbin, p->round_fp, p->quant_fp, p->quant_shift, qcoeff, dqcoeff, - pd->dequant, eob, scan_order->scan, scan_order->iscan); + pd->dequant, eob, scan_order->scan, +#if !CONFIG_AOM_QM + scan_order->iscan); +#else + scan_order->iscan, qmatrix, iqmatrix); +#endif break; case TX_8X8: vp10_fdct8x8_quant(src_diff, diff_stride, coeff, 64, x->skip_block, p->zbin, p->round_fp, p->quant_fp, p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan, +#if !CONFIG_AOM_QM scan_order->iscan); +#else + scan_order->iscan, qmatrix, iqmatrix); +#endif break; case TX_4X4: if (xd->lossless[xd->mi[0]->mbmi.segment_id]) { @@ -516,9 +615,16 @@ void vp10_xform_quant_fp(MACROBLOCK *x, int plane, int block, int blk_row, } vp10_quantize_fp(coeff, 16, x->skip_block, p->zbin, p->round_fp, p->quant_fp, p->quant_shift, qcoeff, dqcoeff, - pd->dequant, eob, scan_order->scan, scan_order->iscan); + pd->dequant, eob, scan_order->scan, +#if !CONFIG_AOM_QM + scan_order->iscan); +#else + scan_order->iscan, qmatrix, iqmatrix); +#endif + break; + default: + assert(0); break; - default: assert(0); break; } } @@ -532,6 +638,12 @@ void vp10_xform_quant_dc(MACROBLOCK *x, int plane, int block, int blk_row, tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); uint16_t *const eob = &p->eobs[block]; const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize]; + int seg_id = xd->mi[0]->mbmi.segment_id; +#if CONFIG_AOM_QM + int is_intra = !is_inter_block(&xd->mi[0]->mbmi); + const qm_val_t *qmatrix = pd->seg_qmatrix[seg_id][is_intra][tx_size]; + const qm_val_t *iqmatrix = pd->seg_iqmatrix[seg_id][is_intra][tx_size]; +#endif const int16_t *src_diff; src_diff = &p->src_diff[4 * (blk_row * diff_stride + blk_col)]; @@ -542,31 +654,49 @@ void vp10_xform_quant_dc(MACROBLOCK *x, int plane, int block, int blk_row, vpx_highbd_fdct32x32_1(src_diff, coeff, diff_stride); vpx_highbd_quantize_dc_32x32(coeff, x->skip_block, p->round, p->quant_fp[0], qcoeff, dqcoeff, - pd->dequant[0], eob); + pd->dequant[0], +#if !CONFIG_AOM_QM + eob); +#else + eob, qmatrix, iqmatrix); +#endif break; case TX_16X16: vpx_highbd_fdct16x16_1(src_diff, coeff, diff_stride); vpx_highbd_quantize_dc(coeff, 256, x->skip_block, p->round, p->quant_fp[0], qcoeff, dqcoeff, pd->dequant[0], +#if !CONFIG_AOM_QM eob); +#else + eob, qmatrix, iqmatrix); +#endif break; case TX_8X8: vpx_highbd_fdct8x8_1(src_diff, coeff, diff_stride); vpx_highbd_quantize_dc(coeff, 64, x->skip_block, p->round, p->quant_fp[0], qcoeff, dqcoeff, pd->dequant[0], +#if !CONFIG_AOM_QM eob); +#else + eob, qmatrix, iqmatrix); +#endif break; case TX_4X4: - if (xd->lossless[xd->mi[0]->mbmi.segment_id]) { + if (xd->lossless[seg_id]) { vp10_highbd_fwht4x4(src_diff, coeff, diff_stride); } else { vpx_highbd_fdct4x4(src_diff, coeff, diff_stride); } vpx_highbd_quantize_dc(coeff, 16, x->skip_block, p->round, p->quant_fp[0], qcoeff, dqcoeff, pd->dequant[0], +#if !CONFIG_AOM_QM eob); +#else + eob, qmatrix, iqmatrix); +#endif break; - default: assert(0); + default: + assert(0); } return; } @@ -576,28 +706,50 @@ void vp10_xform_quant_dc(MACROBLOCK *x, int plane, int block, int blk_row, case TX_32X32: vpx_fdct32x32_1(src_diff, coeff, diff_stride); vpx_quantize_dc_32x32(coeff, x->skip_block, p->round, p->quant_fp[0], - qcoeff, dqcoeff, pd->dequant[0], eob); + qcoeff, dqcoeff, pd->dequant[0], +#if !CONFIG_AOM_QM + eob); +#else + eob, qmatrix, iqmatrix); +#endif break; case TX_16X16: vpx_fdct16x16_1(src_diff, coeff, diff_stride); vpx_quantize_dc(coeff, 256, x->skip_block, p->round, p->quant_fp[0], - qcoeff, dqcoeff, pd->dequant[0], eob); + qcoeff, dqcoeff, pd->dequant[0], +#if !CONFIG_AOM_QM + eob); +#else + eob, qmatrix, iqmatrix); +#endif break; case TX_8X8: vpx_fdct8x8_1(src_diff, coeff, diff_stride); vpx_quantize_dc(coeff, 64, x->skip_block, p->round, p->quant_fp[0], - qcoeff, dqcoeff, pd->dequant[0], eob); + qcoeff, dqcoeff, pd->dequant[0], +#if !CONFIG_AOM_QM + eob); +#else + eob, qmatrix, iqmatrix); +#endif break; case TX_4X4: - if (xd->lossless[xd->mi[0]->mbmi.segment_id]) { + if (xd->lossless[seg_id]) { vp10_fwht4x4(src_diff, coeff, diff_stride); } else { vpx_fdct4x4(src_diff, coeff, diff_stride); } vpx_quantize_dc(coeff, 16, x->skip_block, p->round, p->quant_fp[0], - qcoeff, dqcoeff, pd->dequant[0], eob); + qcoeff, dqcoeff, pd->dequant[0], +#if !CONFIG_AOM_QM + eob); +#else + eob, qmatrix, iqmatrix); +#endif + break; + default: + assert(0); break; - default: assert(0); break; } } @@ -614,6 +766,12 @@ void vp10_xform_quant(MACROBLOCK *x, int plane, int block, int blk_row, tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); uint16_t *const eob = &p->eobs[block]; const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize]; + int seg_id = xd->mi[0]->mbmi.segment_id; +#if CONFIG_AOM_QM + int is_intra = !is_inter_block(&xd->mi[0]->mbmi); + const qm_val_t *qmatrix = pd->seg_qmatrix[seg_id][is_intra][tx_size]; + const qm_val_t *iqmatrix = pd->seg_iqmatrix[seg_id][is_intra][tx_size]; +#endif const int16_t *src_diff; src_diff = &p->src_diff[4 * (blk_row * diff_stride + blk_col)]; @@ -626,31 +784,48 @@ void vp10_xform_quant(MACROBLOCK *x, int plane, int block, int blk_row, vpx_highbd_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round, p->quant, p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan, +#if !CONFIG_AOM_QM scan_order->iscan); +#else + scan_order->iscan, qmatrix, iqmatrix); +#endif break; case TX_16X16: highbd_fwd_txfm_16x16(src_diff, coeff, diff_stride, tx_type); vpx_highbd_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round, p->quant, p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan, +#if !CONFIG_AOM_QM scan_order->iscan); +#else + scan_order->iscan, qmatrix, iqmatrix); +#endif break; case TX_8X8: highbd_fwd_txfm_8x8(src_diff, coeff, diff_stride, tx_type); vpx_highbd_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, p->quant, p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan, +#if !CONFIG_AOM_QM scan_order->iscan); +#else + scan_order->iscan, qmatrix, iqmatrix); +#endif break; case TX_4X4: vp10_highbd_fwd_txfm_4x4(src_diff, coeff, diff_stride, tx_type, - xd->lossless[xd->mi[0]->mbmi.segment_id]); + xd->lossless[seg_id]); vpx_highbd_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant, p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan, +#if !CONFIG_AOM_QM scan_order->iscan); +#else + scan_order->iscan, qmatrix, iqmatrix); +#endif break; - default: assert(0); + default: + assert(0); } return; } @@ -662,28 +837,49 @@ void vp10_xform_quant(MACROBLOCK *x, int plane, int block, int blk_row, vpx_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round, p->quant, p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan, +#if !CONFIG_AOM_QM scan_order->iscan); +#else + scan_order->iscan, qmatrix, iqmatrix); +#endif break; case TX_16X16: fwd_txfm_16x16(src_diff, coeff, diff_stride, tx_type); vpx_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round, p->quant, p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob, - scan_order->scan, scan_order->iscan); + scan_order->scan, +#if !CONFIG_AOM_QM + scan_order->iscan); +#else + scan_order->iscan, qmatrix, iqmatrix); +#endif break; case TX_8X8: fwd_txfm_8x8(src_diff, coeff, diff_stride, tx_type); vpx_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, p->quant, p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob, - scan_order->scan, scan_order->iscan); + scan_order->scan, +#if !CONFIG_AOM_QM + scan_order->iscan); +#else + scan_order->iscan, qmatrix, iqmatrix); +#endif break; case TX_4X4: vp10_fwd_txfm_4x4(src_diff, coeff, diff_stride, tx_type, - xd->lossless[xd->mi[0]->mbmi.segment_id]); + xd->lossless[seg_id]); vpx_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant, p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob, - scan_order->scan, scan_order->iscan); + scan_order->scan, +#if !CONFIG_AOM_QM + scan_order->iscan); +#else + scan_order->iscan, qmatrix, iqmatrix); +#endif + break; + default: + assert(0); break; - default: assert(0); break; } } @@ -780,7 +976,9 @@ static void encode_block(int plane, int block, int blk_row, int blk_col, p->eobs[block], xd->bd, tx_type, xd->lossless[xd->mi[0]->mbmi.segment_id]); break; - default: assert(0 && "Invalid transform size"); break; + default: + assert(0 && "Invalid transform size"); + break; } return; @@ -807,7 +1005,9 @@ static void encode_block(int plane, int block, int blk_row, int blk_col, vp10_inv_txfm_add_4x4(dqcoeff, dst, pd->dst.stride, p->eobs[block], tx_type, xd->lossless[xd->mi[0]->mbmi.segment_id]); break; - default: assert(0 && "Invalid transform size"); break; + default: + assert(0 && "Invalid transform size"); + break; } } @@ -899,6 +1099,12 @@ void vp10_encode_block_intra(int plane, int block, int blk_row, int blk_col, uint8_t *src, *dst; int16_t *src_diff; uint16_t *eob = &p->eobs[block]; + int seg_id = xd->mi[0]->mbmi.segment_id; +#if CONFIG_AOM_QM + int is_intra = !is_inter_block(&xd->mi[0]->mbmi); + const qm_val_t *qmatrix = pd->seg_qmatrix[seg_id][is_intra][tx_size]; + const qm_val_t *iqmatrix = pd->seg_iqmatrix[seg_id][is_intra][tx_size]; +#endif const int src_stride = p->src.stride; const int dst_stride = pd->dst.stride; dst = &pd->dst.buf[4 * (blk_row * dst_stride + blk_col)]; @@ -921,7 +1127,12 @@ void vp10_encode_block_intra(int plane, int block, int blk_row, int blk_col, vpx_highbd_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round, p->quant, p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob, - scan_order->scan, scan_order->iscan); + scan_order->scan, +#if !CONFIG_AOM_QM + scan_order->iscan); +#else + scan_order->iscan, qmatrix, iqmatrix); +#endif } if (*eob) vp10_highbd_inv_txfm_add_32x32(dqcoeff, dst, dst_stride, *eob, xd->bd, @@ -935,7 +1146,11 @@ void vp10_encode_block_intra(int plane, int block, int blk_row, int blk_col, vpx_highbd_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round, p->quant, p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan, +#if !CONFIG_AOM_QM scan_order->iscan); +#else + scan_order->iscan, qmatrix, iqmatrix); +#endif } if (*eob) vp10_highbd_inv_txfm_add_16x16(dqcoeff, dst, dst_stride, *eob, xd->bd, @@ -949,7 +1164,11 @@ void vp10_encode_block_intra(int plane, int block, int blk_row, int blk_col, vpx_highbd_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, p->quant, p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan, +#if !CONFIG_AOM_QM scan_order->iscan); +#else + scan_order->iscan, qmatrix, iqmatrix); +#endif } if (*eob) vp10_highbd_inv_txfm_add_8x8(dqcoeff, dst, dst_stride, *eob, xd->bd, @@ -960,11 +1179,15 @@ void vp10_encode_block_intra(int plane, int block, int blk_row, int blk_col, vpx_highbd_subtract_block(4, 4, src_diff, diff_stride, src, src_stride, dst, dst_stride, xd->bd); vp10_highbd_fwd_txfm_4x4(src_diff, coeff, diff_stride, tx_type, - xd->lossless[mbmi->segment_id]); + xd->lossless[seg_id]); vpx_highbd_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant, p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan, +#if !CONFIG_AOM_QM scan_order->iscan); +#else + scan_order->iscan, qmatrix, iqmatrix); +#endif } if (*eob) @@ -972,9 +1195,11 @@ void vp10_encode_block_intra(int plane, int block, int blk_row, int blk_col, // eob<=1 which is significant (not just an optimization) for the // lossless case. vp10_highbd_inv_txfm_add_4x4(dqcoeff, dst, dst_stride, *eob, xd->bd, - tx_type, xd->lossless[mbmi->segment_id]); + tx_type, xd->lossless[seg_id]); break; - default: assert(0); return; + default: + assert(0); + return; } if (*eob) *(args->skip) = 0; return; @@ -991,7 +1216,11 @@ void vp10_encode_block_intra(int plane, int block, int blk_row, int blk_col, vpx_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round, p->quant, p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan, +#if !CONFIG_AOM_QM scan_order->iscan); +#else + scan_order->iscan, qmatrix, iqmatrix); +#endif } if (*eob) vp10_inv_txfm_add_32x32(dqcoeff, dst, dst_stride, *eob, tx_type); @@ -1003,7 +1232,12 @@ void vp10_encode_block_intra(int plane, int block, int blk_row, int blk_col, fwd_txfm_16x16(src_diff, coeff, diff_stride, tx_type); vpx_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round, p->quant, p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob, - scan_order->scan, scan_order->iscan); + scan_order->scan, +#if !CONFIG_AOM_QM + scan_order->iscan); +#else + scan_order->iscan, qmatrix, iqmatrix); +#endif } if (*eob) vp10_inv_txfm_add_16x16(dqcoeff, dst, dst_stride, *eob, tx_type); @@ -1015,7 +1249,12 @@ void vp10_encode_block_intra(int plane, int block, int blk_row, int blk_col, fwd_txfm_8x8(src_diff, coeff, diff_stride, tx_type); vpx_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, p->quant, p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob, - scan_order->scan, scan_order->iscan); + scan_order->scan, +#if !CONFIG_AOM_QM + scan_order->iscan); +#else + scan_order->iscan, qmatrix, iqmatrix); +#endif } if (*eob) vp10_inv_txfm_add_8x8(dqcoeff, dst, dst_stride, *eob, tx_type); break; @@ -1024,10 +1263,15 @@ void vp10_encode_block_intra(int plane, int block, int blk_row, int blk_col, vpx_subtract_block(4, 4, src_diff, diff_stride, src, src_stride, dst, dst_stride); vp10_fwd_txfm_4x4(src_diff, coeff, diff_stride, tx_type, - xd->lossless[mbmi->segment_id]); + xd->lossless[seg_id]); vpx_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant, p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob, - scan_order->scan, scan_order->iscan); + scan_order->scan, +#if !CONFIG_AOM_QM + scan_order->iscan); +#else + scan_order->iscan, qmatrix, iqmatrix); +#endif } if (*eob) { @@ -1035,10 +1279,12 @@ void vp10_encode_block_intra(int plane, int block, int blk_row, int blk_col, // which is significant (not just an optimization) for the lossless // case. vp10_inv_txfm_add_4x4(dqcoeff, dst, dst_stride, *eob, tx_type, - xd->lossless[mbmi->segment_id]); + xd->lossless[seg_id]); } break; - default: assert(0); break; + default: + assert(0); + break; } if (*eob) *(args->skip) = 0; } diff --git a/vp10/encoder/encoder.c b/vp10/encoder/encoder.c index 4231d4bcf6..b7a18cc765 100644 --- a/vp10/encoder/encoder.c +++ b/vp10/encoder/encoder.c @@ -1613,6 +1613,9 @@ VP10_COMP *vp10_create_compressor(VP10EncoderConfig *oxcf, * vp10_init_quantizer() for every frame. */ vp10_init_quantizer(cpi); +#if CONFIG_AOM_QM + aom_qm_init(cm); +#endif vp10_loop_filter_init(cm); diff --git a/vp10/encoder/quantize.c b/vp10/encoder/quantize.c index 0688a69ca7..820dc4a020 100644 --- a/vp10/encoder/quantize.c +++ b/vp10/encoder/quantize.c @@ -8,7 +8,6 @@ * be found in the AUTHORS file in the root of the source tree. */ -#include <math.h> #include "./vpx_dsp_rtcd.h" #include "vpx_mem/vpx_mem.h" #include "vpx_ports/mem.h" @@ -26,7 +25,12 @@ void vp10_quantize_fp_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, - const int16_t *iscan) { + const int16_t *iscan +#if CONFIG_AOM_QM + , + const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr +#endif + ) { int i, eob = -1; // TODO(jingning) Decide the need of these arguments after the // quantization process is completed. @@ -43,16 +47,29 @@ void vp10_quantize_fp_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, for (i = 0; i < n_coeffs; i++) { const int rc = scan[i]; const int coeff = coeff_ptr[rc]; +#if CONFIG_AOM_QM + const qm_val_t wt = qm_ptr[rc]; + const qm_val_t iwt = iqm_ptr[rc]; + const int dequant = + (dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >> + AOM_QM_BITS; +#endif const int coeff_sign = (coeff >> 31); const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; - int tmp = clamp(abs_coeff + round_ptr[rc != 0], INT16_MIN, INT16_MAX); - tmp = (tmp * quant_ptr[rc != 0]) >> 16; - - qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign; + int64_t tmp = clamp(abs_coeff + round_ptr[rc != 0], INT16_MIN, INT16_MAX); + int tmp32; +#if CONFIG_AOM_QM + tmp32 = (tmp * wt * quant_ptr[rc != 0]) >> (16 + AOM_QM_BITS); + qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign; + dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant; +#else + tmp32 = (tmp * quant_ptr[rc != 0]) >> 16; + qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign; dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0]; +#endif - if (tmp) eob = i; + if (tmp32) eob = i; } } *eob_ptr = eob + 1; @@ -66,7 +83,12 @@ void vp10_highbd_quantize_fp_c(const tran_low_t *coeff_ptr, intptr_t count, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, - const int16_t *scan, const int16_t *iscan) { + const int16_t *scan, const int16_t *iscan +#if CONFIG_AOM_QM + , + const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr +#endif + ) { int i; int eob = -1; // TODO(jingning) Decide the need of these arguments after the @@ -84,12 +106,26 @@ void vp10_highbd_quantize_fp_c(const tran_low_t *coeff_ptr, intptr_t count, for (i = 0; i < count; i++) { const int rc = scan[i]; const int coeff = coeff_ptr[rc]; +#if CONFIG_AOM_QM + const qm_val_t wt = qm_ptr[rc]; + const qm_val_t iwt = iqm_ptr[rc]; + const int dequant = + (dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >> + AOM_QM_BITS; +#endif const int coeff_sign = (coeff >> 31); const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; const int64_t tmp = abs_coeff + round_ptr[rc != 0]; +#if CONFIG_AOM_QM + const uint32_t abs_qcoeff = + (uint32_t)((tmp * quant_ptr[rc != 0] * wt) >> (16 + AOM_QM_BITS)); + qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign); + dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant; +#else const uint32_t abs_qcoeff = (uint32_t)((tmp * quant_ptr[rc != 0]) >> 16); qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign); dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0]; +#endif if (abs_qcoeff) eob = i; } } @@ -106,7 +142,12 @@ void vp10_quantize_fp_32x32_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, - const int16_t *scan, const int16_t *iscan) { + const int16_t *scan, const int16_t *iscan +#if CONFIG_AOM_QM + , + const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr +#endif + ) { int i, eob = -1; (void)zbin_ptr; (void)quant_shift_ptr; @@ -119,19 +160,38 @@ void vp10_quantize_fp_32x32_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, for (i = 0; i < n_coeffs; i++) { const int rc = scan[i]; const int coeff = coeff_ptr[rc]; +#if CONFIG_AOM_QM + const qm_val_t wt = qm_ptr[rc]; + const qm_val_t iwt = iqm_ptr[rc]; + const int dequant = + (dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >> + AOM_QM_BITS; + int64_t tmp = 0; +#endif const int coeff_sign = (coeff >> 31); - int tmp = 0; + int tmp32 = 0; int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; +#if CONFIG_AOM_QM + if (abs_coeff * wt >= (dequant_ptr[rc != 0] << (AOM_QM_BITS - 2))) { +#else if (abs_coeff >= (dequant_ptr[rc != 0] >> 2)) { +#endif abs_coeff += ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1); abs_coeff = clamp(abs_coeff, INT16_MIN, INT16_MAX); - tmp = (abs_coeff * quant_ptr[rc != 0]) >> 15; - qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign; - dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 2; +#if CONFIG_AOM_QM + tmp = abs_coeff * wt; + tmp32 = (int)(tmp * quant_ptr[rc != 0]) >> (AOM_QM_BITS + 15); + qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign; + dqcoeff_ptr[rc] = (qcoeff_ptr[rc] * dequant) / 2; +#else + tmp32 = (abs_coeff * quant_ptr[rc != 0]) >> 15; + qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign; + dqcoeff_ptr[rc] = (qcoeff_ptr[rc] * dequant_ptr[rc != 0]) / 2; +#endif } - if (tmp) eob = i; + if (tmp32) eob = i; } } *eob_ptr = eob + 1; @@ -143,7 +203,12 @@ void vp10_highbd_quantize_fp_32x32_c( const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, - const int16_t *scan, const int16_t *iscan) { + const int16_t *scan, const int16_t *iscan +#if CONFIG_AOM_QM + , + const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr +#endif + ) { int i, eob = -1; (void)zbin_ptr; (void)quant_shift_ptr; @@ -157,15 +222,32 @@ void vp10_highbd_quantize_fp_32x32_c( uint32_t abs_qcoeff = 0; const int rc = scan[i]; const int coeff = coeff_ptr[rc]; +#if CONFIG_AOM_QM + const qm_val_t wt = qm_ptr[rc]; + const qm_val_t iwt = iqm_ptr[rc]; + const int dequant = + (dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >> + AOM_QM_BITS; +#endif const int coeff_sign = (coeff >> 31); const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; - +#if CONFIG_AOM_QM + if (abs_coeff * wt >= (dequant_ptr[rc != 0] << (AOM_QM_BITS - 2))) { +#else if (abs_coeff >= (dequant_ptr[rc != 0] >> 2)) { +#endif const int64_t tmp = abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1); +#if CONFIG_AOM_QM + abs_qcoeff = + (uint32_t)((tmp * wt * quant_ptr[rc != 0]) >> (AOM_QM_BITS + 15)); + qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign); + dqcoeff_ptr[rc] = (qcoeff_ptr[rc] * dequant) / 2; +#else abs_qcoeff = (uint32_t)((tmp * quant_ptr[rc != 0]) >> 15); qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign); - dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 2; + dqcoeff_ptr[rc] = (qcoeff_ptr[rc] * dequant_ptr[rc != 0]) / 2; +#endif } if (abs_qcoeff) eob = i; @@ -180,6 +262,12 @@ void vp10_regular_quantize_b_4x4(MACROBLOCK *x, int plane, int block, MACROBLOCKD *const xd = &x->e_mbd; struct macroblock_plane *p = &x->plane[plane]; struct macroblockd_plane *pd = &xd->plane[plane]; +#if CONFIG_AOM_QM + int seg_id = xd->mi[0]->mbmi.segment_id; + int is_intra = is_inter_block(&xd->mi[0]->mbmi); + const qm_val_t *qmatrix = pd->seg_qmatrix[seg_id][is_intra][0]; + const qm_val_t *iqmatrix = pd->seg_iqmatrix[seg_id][is_intra][0]; +#endif #if CONFIG_VPX_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { @@ -187,7 +275,12 @@ void vp10_regular_quantize_b_4x4(MACROBLOCK *x, int plane, int block, p->zbin, p->round, p->quant, p->quant_shift, BLOCK_OFFSET(p->qcoeff, block), BLOCK_OFFSET(pd->dqcoeff, block), pd->dequant, +#if !CONFIG_AOM_QM &p->eobs[block], scan, iscan); +#else + &p->eobs[block], scan, iscan, + qmatrix, iqmatrix); +#endif return; } #endif @@ -195,7 +288,12 @@ void vp10_regular_quantize_b_4x4(MACROBLOCK *x, int plane, int block, p->round, p->quant, p->quant_shift, BLOCK_OFFSET(p->qcoeff, block), BLOCK_OFFSET(pd->dqcoeff, block), pd->dequant, &p->eobs[block], - scan, iscan); +#if !CONFIG_AOM_QM + scan, iscan); +#else + scan, iscan, + qmatrix, iqmatrix); +#endif } static void invert_quant(int16_t *quant, int16_t *shift, int d) { @@ -212,9 +310,12 @@ static int get_qzbin_factor(int q, vpx_bit_depth_t bit_depth) { const int quant = vp10_dc_quant(q, 0, bit_depth); #if CONFIG_VPX_HIGHBITDEPTH switch (bit_depth) { - case VPX_BITS_8: return q == 0 ? 64 : (quant < 148 ? 84 : 80); - case VPX_BITS_10: return q == 0 ? 64 : (quant < 592 ? 84 : 80); - case VPX_BITS_12: return q == 0 ? 64 : (quant < 2368 ? 84 : 80); + case VPX_BITS_8: + return q == 0 ? 64 : (quant < 148 ? 84 : 80); + case VPX_BITS_10: + return q == 0 ? 64 : (quant < 592 ? 84 : 80); + case VPX_BITS_12: + return q == 0 ? 64 : (quant < 2368 ? 84 : 80); default: assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12"); return -1; @@ -288,6 +389,11 @@ void vp10_init_plane_quantizers(VP10_COMP *cpi, MACROBLOCK *x) { const int qindex = vp10_get_qindex(&cm->seg, segment_id, cm->base_qindex); const int rdmult = vp10_compute_rd_mult(cpi, qindex + cm->y_dc_delta_q); int i; +#if CONFIG_AOM_QM + const int lossless = xd->lossless[segment_id]; + // Quant matrix only depends on the base QP so there is only one set per frame + int qmlevel = lossless ? NUM_QM_LEVELS - 1 : aom_get_qmlevel(cm->base_qindex); +#endif // Y x->plane[0].quant = quants->y_quant[qindex]; @@ -296,6 +402,12 @@ void vp10_init_plane_quantizers(VP10_COMP *cpi, MACROBLOCK *x) { x->plane[0].quant_shift = quants->y_quant_shift[qindex]; x->plane[0].zbin = quants->y_zbin[qindex]; x->plane[0].round = quants->y_round[qindex]; +#if CONFIG_AOM_QM + memcpy(&xd->plane[0].seg_qmatrix[segment_id], cm->gqmatrix[qmlevel][0], + sizeof(cm->gqmatrix[qmlevel][0])); + memcpy(&xd->plane[0].seg_iqmatrix[segment_id], cm->giqmatrix[qmlevel][0], + sizeof(cm->giqmatrix[qmlevel][0])); +#endif xd->plane[0].dequant = cpi->y_dequant[qindex]; x->plane[0].quant_thred[0] = x->plane[0].zbin[0] * x->plane[0].zbin[0]; @@ -309,6 +421,12 @@ void vp10_init_plane_quantizers(VP10_COMP *cpi, MACROBLOCK *x) { x->plane[i].quant_shift = quants->uv_quant_shift[qindex]; x->plane[i].zbin = quants->uv_zbin[qindex]; x->plane[i].round = quants->uv_round[qindex]; +#if CONFIG_AOM_QM + memcpy(&xd->plane[i].seg_qmatrix[segment_id], cm->gqmatrix[qmlevel][1], + sizeof(cm->gqmatrix[qmlevel][1])); + memcpy(&xd->plane[i].seg_iqmatrix[segment_id], cm->giqmatrix[qmlevel][1], + sizeof(cm->giqmatrix[qmlevel][1])); +#endif xd->plane[i].dequant = cpi->uv_dequant[qindex]; x->plane[i].quant_thred[0] = x->plane[i].zbin[0] * x->plane[i].zbin[0]; diff --git a/vpx_dsp/quantize.c b/vpx_dsp/quantize.c index e65f4f3cdd..096bc32ff5 100644 --- a/vpx_dsp/quantize.c +++ b/vpx_dsp/quantize.c @@ -11,6 +11,371 @@ #include "vpx_dsp/quantize.h" #include "vpx_mem/vpx_mem.h" +#if CONFIG_AOM_QM +void vpx_quantize_dc(const tran_low_t *coeff_ptr, int n_coeffs, int skip_block, + const int16_t *round_ptr, const int16_t quant, + tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, + const int16_t dequant_ptr, uint16_t *eob_ptr, + const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr) { + const int rc = 0; + const int coeff = coeff_ptr[rc]; + const int coeff_sign = (coeff >> 31); + const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; + int64_t tmp, eob = -1; + int32_t tmp32; + int dequant = + (dequant_ptr * iqm_ptr[rc] + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS; + + memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); + memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); + + if (!skip_block) { + tmp = clamp(abs_coeff + round_ptr[rc != 0], INT16_MIN, INT16_MAX); + tmp32 = (int32_t)((tmp * qm_ptr[rc] * quant) >> (16 + AOM_QM_BITS)); + qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign; + dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant; + if (tmp32) eob = 0; + } + *eob_ptr = eob + 1; +} + +#if CONFIG_VPX_HIGHBITDEPTH +void vpx_highbd_quantize_dc(const tran_low_t *coeff_ptr, int n_coeffs, + int skip_block, const int16_t *round_ptr, + const int16_t quant, tran_low_t *qcoeff_ptr, + tran_low_t *dqcoeff_ptr, const int16_t dequant_ptr, + uint16_t *eob_ptr, const qm_val_t *qm_ptr, + const qm_val_t *iqm_ptr) { + int eob = -1; + int dequant = + (dequant_ptr * iqm_ptr[0] + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS; + + memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); + memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); + + if (!skip_block) { + const int coeff = coeff_ptr[0]; + const int coeff_sign = (coeff >> 31); + const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; + const int64_t tmp = abs_coeff + round_ptr[0]; + const uint32_t abs_qcoeff = + (uint32_t)((tmp * qm_ptr[0] * quant) >> (16 + AOM_QM_BITS)); + qcoeff_ptr[0] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign); + dqcoeff_ptr[0] = qcoeff_ptr[0] * dequant; + if (abs_qcoeff) eob = 0; + } + *eob_ptr = eob + 1; +} +#endif + +void vpx_quantize_dc_32x32(const tran_low_t *coeff_ptr, int skip_block, + const int16_t *round_ptr, const int16_t quant, + tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, + const int16_t dequant_ptr, uint16_t *eob_ptr, + const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr) { + const int n_coeffs = 1024; + const int rc = 0; + const int coeff = coeff_ptr[rc]; + const int coeff_sign = (coeff >> 31); + const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; + int64_t tmp, eob = -1; + int32_t tmp32; + int dequant; + + memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); + memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); + + if (!skip_block) { + tmp = clamp(abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1), + INT16_MIN, INT16_MAX); + tmp32 = (int32_t)((tmp * qm_ptr[rc] * quant) >> (15 + AOM_QM_BITS)); + qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign; + dequant = + (dequant_ptr * iqm_ptr[rc] + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS; + dqcoeff_ptr[rc] = (qcoeff_ptr[rc] * dequant) / 2; + if (tmp32) eob = 0; + } + *eob_ptr = eob + 1; +} + +#if CONFIG_VPX_HIGHBITDEPTH +void vpx_highbd_quantize_dc_32x32(const tran_low_t *coeff_ptr, int skip_block, + const int16_t *round_ptr, const int16_t quant, + tran_low_t *qcoeff_ptr, + tran_low_t *dqcoeff_ptr, + const int16_t dequant_ptr, uint16_t *eob_ptr, + const qm_val_t *qm_ptr, + const qm_val_t *iqm_ptr) { + const int n_coeffs = 1024; + int eob = -1; + int dequant; + + memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); + memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); + + if (!skip_block) { + const int coeff = coeff_ptr[0]; + const int coeff_sign = (coeff >> 31); + const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; + const int64_t tmp = abs_coeff + ROUND_POWER_OF_TWO(round_ptr[0], 1); + const uint32_t abs_qcoeff = + (uint32_t)((tmp * qm_ptr[0] * quant) >> (15 + AOM_QM_BITS)); + qcoeff_ptr[0] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign); + dequant = + (dequant_ptr * iqm_ptr[0] + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS; + dqcoeff_ptr[0] = (qcoeff_ptr[0] * dequant) / 2; + if (abs_qcoeff) eob = 0; + } + *eob_ptr = eob + 1; +} +#endif + +void vpx_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, + int skip_block, const int16_t *zbin_ptr, + const int16_t *round_ptr, const int16_t *quant_ptr, + const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, + tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, + uint16_t *eob_ptr, const int16_t *scan, + const int16_t *iscan, const qm_val_t *qm_ptr, + const qm_val_t *iqm_ptr) { + int i, non_zero_count = (int)n_coeffs, eob = -1; + const int zbins[2] = { zbin_ptr[0], zbin_ptr[1] }; + const int nzbins[2] = { zbins[0] * -1, zbins[1] * -1 }; + (void)iscan; + + memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); + memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); + + if (!skip_block) { + // Pre-scan pass + for (i = (int)n_coeffs - 1; i >= 0; i--) { + const int rc = scan[i]; + const qm_val_t wt = qm_ptr[rc]; + const int coeff = coeff_ptr[rc] * wt; + + if (coeff < (zbins[rc != 0] << AOM_QM_BITS) && + coeff > (nzbins[rc != 0] << AOM_QM_BITS)) + non_zero_count--; + else + break; + } + + // Quantization pass: All coefficients with index >= zero_flag are + // skippable. Note: zero_flag can be zero. + for (i = 0; i < non_zero_count; i++) { + const int rc = scan[i]; + const qm_val_t wt = qm_ptr[rc]; + const int coeff = coeff_ptr[rc]; + const int coeff_sign = (coeff >> 31); + const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; + int dequant; + + if (abs_coeff * wt >= (zbins[rc != 0] << AOM_QM_BITS)) { + int32_t tmp32; + int64_t tmp = + clamp(abs_coeff + round_ptr[rc != 0], INT16_MIN, INT16_MAX); + tmp = tmp * wt; + tmp32 = ((((tmp * quant_ptr[rc != 0]) >> 16) + tmp) * + quant_shift_ptr[rc != 0]) >> + (16 + AOM_QM_BITS); // quantization + dequant = + (dequant_ptr[rc != 0] * iqm_ptr[rc] + (1 << (AOM_QM_BITS - 1))) >> + AOM_QM_BITS; + qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign; + dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant; + + if (tmp32) eob = i; + } + } + } + *eob_ptr = eob + 1; +} + +#if CONFIG_VPX_HIGHBITDEPTH +void vpx_highbd_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, + int skip_block, const int16_t *zbin_ptr, + const int16_t *round_ptr, const int16_t *quant_ptr, + const int16_t *quant_shift_ptr, + tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, + const int16_t *dequant_ptr, uint16_t *eob_ptr, + const int16_t *scan, const int16_t *iscan, + const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr) { + int i, non_zero_count = (int)n_coeffs, eob = -1; + const int zbins[2] = { zbin_ptr[0], zbin_ptr[1] }; + const int nzbins[2] = { zbins[0] * -1, zbins[1] * -1 }; + int dequant; + (void)iscan; + + memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); + memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); + + if (!skip_block) { + // Pre-scan pass + for (i = (int)n_coeffs - 1; i >= 0; i--) { + const int rc = scan[i]; + const qm_val_t wt = qm_ptr[rc]; + const int coeff = coeff_ptr[rc] * wt; + + if (coeff < (zbins[rc != 0] << AOM_QM_BITS) && + coeff > (nzbins[rc != 0] << AOM_QM_BITS)) + non_zero_count--; + else + break; + } + + // Quantization pass: All coefficients with index >= zero_flag are + // skippable. Note: zero_flag can be zero. + for (i = 0; i < non_zero_count; i++) { + const int rc = scan[i]; + const int coeff = coeff_ptr[rc]; + const qm_val_t wt = qm_ptr[rc]; + const int coeff_sign = (coeff >> 31); + const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; + + if (abs_coeff * wt >= (zbins[rc != 0] << AOM_QM_BITS)) { + const int64_t tmp1 = abs_coeff + round_ptr[rc != 0]; + const int64_t tmpw = tmp1 * wt; + const int64_t tmp2 = ((tmpw * quant_ptr[rc != 0]) >> 16) + tmpw; + const uint32_t abs_qcoeff = + (uint32_t)((tmp2 * quant_shift_ptr[rc != 0]) >> (16 + AOM_QM_BITS)); + qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign); + dequant = + (dequant_ptr[rc != 0] * iqm_ptr[rc] + (1 << (AOM_QM_BITS - 1))) >> + AOM_QM_BITS; + dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant; + if (abs_qcoeff) eob = i; + } + } + } + *eob_ptr = eob + 1; +} +#endif + +void vpx_quantize_b_32x32_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, + int skip_block, const int16_t *zbin_ptr, + const int16_t *round_ptr, const int16_t *quant_ptr, + const int16_t *quant_shift_ptr, + tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, + const int16_t *dequant_ptr, uint16_t *eob_ptr, + const int16_t *scan, const int16_t *iscan, + const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr) { + const int zbins[2] = { ROUND_POWER_OF_TWO(zbin_ptr[0], 1), + ROUND_POWER_OF_TWO(zbin_ptr[1], 1) }; + const int nzbins[2] = { zbins[0] * -1, zbins[1] * -1 }; + + int idx = 0; + int idx_arr[1024]; + int i, eob = -1; + int dequant; + (void)iscan; + + memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); + memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); + + if (!skip_block) { + // Pre-scan pass + for (i = 0; i < n_coeffs; i++) { + const int rc = scan[i]; + const qm_val_t wt = qm_ptr[rc]; + const int coeff = coeff_ptr[rc] * wt; + + // If the coefficient is out of the base ZBIN range, keep it for + // quantization. + if (coeff >= (zbins[rc != 0] << AOM_QM_BITS) || + coeff <= (nzbins[rc != 0] << AOM_QM_BITS)) + idx_arr[idx++] = i; + } + + // Quantization pass: only process the coefficients selected in + // pre-scan pass. Note: idx can be zero. + for (i = 0; i < idx; i++) { + const int rc = scan[idx_arr[i]]; + const int coeff = coeff_ptr[rc]; + const int coeff_sign = (coeff >> 31); + const qm_val_t wt = qm_ptr[rc]; + int64_t tmp; + int tmp32; + int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; + abs_coeff += ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1); + tmp = clamp(abs_coeff, INT16_MIN, INT16_MAX); + tmp = tmp * wt; + tmp32 = ((((tmp * quant_ptr[rc != 0]) >> 16) + tmp) * + quant_shift_ptr[rc != 0]) >> + (15 + AOM_QM_BITS); + + qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign; + dequant = + (dequant_ptr[rc != 0] * iqm_ptr[rc] + (1 << (AOM_QM_BITS - 1))) >> + AOM_QM_BITS; + dqcoeff_ptr[rc] = (qcoeff_ptr[rc] * dequant) / 2; + + if (tmp32) eob = idx_arr[i]; + } + } + *eob_ptr = eob + 1; +} + +#if CONFIG_VPX_HIGHBITDEPTH +void vpx_highbd_quantize_b_32x32_c( + const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, + const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, + const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, + tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, + const int16_t *scan, const int16_t *iscan, const qm_val_t *qm_ptr, + const qm_val_t *iqm_ptr) { + const int zbins[2] = { ROUND_POWER_OF_TWO(zbin_ptr[0], 1), + ROUND_POWER_OF_TWO(zbin_ptr[1], 1) }; + const int nzbins[2] = { zbins[0] * -1, zbins[1] * -1 }; + + int idx = 0; + int idx_arr[1024]; + int i, eob = -1; + int dequant; + (void)iscan; + + memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); + memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); + + if (!skip_block) { + // Pre-scan pass + for (i = 0; i < n_coeffs; i++) { + const int rc = scan[i]; + const qm_val_t wt = qm_ptr[rc]; + const int coeff = coeff_ptr[rc] * wt; + + // If the coefficient is out of the base ZBIN range, keep it for + // quantization. + if (coeff >= (zbins[rc != 0] << AOM_QM_BITS) || + coeff <= (nzbins[rc != 0] << AOM_QM_BITS)) + idx_arr[idx++] = i; + } + + // Quantization pass: only process the coefficients selected in + // pre-scan pass. Note: idx can be zero. + for (i = 0; i < idx; i++) { + const int rc = scan[idx_arr[i]]; + const int coeff = coeff_ptr[rc]; + const int coeff_sign = (coeff >> 31); + const qm_val_t wt = qm_ptr[rc]; + const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; + const int64_t tmp1 = + abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1); + const int64_t tmpw = tmp1 * wt; + const int64_t tmp2 = ((tmpw * quant_ptr[rc != 0]) >> 16) + tmpw; + const uint32_t abs_qcoeff = + (uint32_t)((tmp2 * quant_shift_ptr[rc != 0]) >> (15 + AOM_QM_BITS)); + qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign); + dequant = + (dequant_ptr[rc != 0] * iqm_ptr[rc] + (1 << (AOM_QM_BITS - 1))) >> + AOM_QM_BITS; + dqcoeff_ptr[rc] = (qcoeff_ptr[rc] * dequant) / 2; + if (abs_qcoeff) eob = idx_arr[i]; + } + } + *eob_ptr = eob + 1; +} +#endif +#else void vpx_quantize_dc(const tran_low_t *coeff_ptr, int n_coeffs, int skip_block, const int16_t *round_ptr, const int16_t quant, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, @@ -317,3 +682,4 @@ void vpx_highbd_quantize_b_32x32_c( *eob_ptr = eob + 1; } #endif +#endif diff --git a/vpx_dsp/quantize.h b/vpx_dsp/quantize.h index 75ab9f28bb..229d4dce10 100644 --- a/vpx_dsp/quantize.h +++ b/vpx_dsp/quantize.h @@ -18,6 +18,47 @@ extern "C" { #endif +#if CONFIG_AOM_QM +void vpx_quantize_dc(const tran_low_t *coeff_ptr, int n_coeffs, int skip_block, + const int16_t *round_ptr, const int16_t quant_ptr, + tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, + const int16_t dequant_ptr, uint16_t *eob_ptr, + const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr); +void vpx_quantize_dc_32x32(const tran_low_t *coeff_ptr, int skip_block, + const int16_t *round_ptr, const int16_t quant_ptr, + tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, + const int16_t dequant_ptr, uint16_t *eob_ptr, + const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr); +void vpx_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, + int skip_block, const int16_t *zbin_ptr, + const int16_t *round_ptr, const int16_t *quant_ptr, + const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, + tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, + uint16_t *eob_ptr, const int16_t *scan, + const int16_t *iscan, const qm_val_t *qm_ptr, + const qm_val_t *iqm_ptr); +#if CONFIG_VPX_HIGHBITDEPTH +void vpx_highbd_quantize_dc(const tran_low_t *coeff_ptr, int n_coeffs, + int skip_block, const int16_t *round_ptr, + const int16_t quant_ptr, tran_low_t *qcoeff_ptr, + tran_low_t *dqcoeff_ptr, const int16_t dequant_ptr, + uint16_t *eob_ptr, const qm_val_t *qm_ptr, + const qm_val_t *iqm_ptr); +void vpx_highbd_quantize_dc_32x32( + const tran_low_t *coeff_ptr, int skip_block, const int16_t *round_ptr, + const int16_t quant_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, + const int16_t dequant_ptr, uint16_t *eob_ptr, const qm_val_t *qm_ptr, + const qm_val_t *iqm_ptr); +void vpx_highbd_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, + int skip_block, const int16_t *zbin_ptr, + const int16_t *round_ptr, const int16_t *quant_ptr, + const int16_t *quant_shift_ptr, + tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, + const int16_t *dequant_ptr, uint16_t *eob_ptr, + const int16_t *scan, const int16_t *iscan, + const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr); +#endif +#else void vpx_quantize_dc(const tran_low_t *coeff_ptr, int n_coeffs, int skip_block, const int16_t *round_ptr, const int16_t quant_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, @@ -26,7 +67,13 @@ void vpx_quantize_dc_32x32(const tran_low_t *coeff_ptr, int skip_block, const int16_t *round_ptr, const int16_t quant_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t dequant_ptr, uint16_t *eob_ptr); - +void vpx_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, + int skip_block, const int16_t *zbin_ptr, + const int16_t *round_ptr, const int16_t *quant_ptr, + const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, + tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, + uint16_t *eob_ptr, const int16_t *scan, + const int16_t *iscan); #if CONFIG_VPX_HIGHBITDEPTH void vpx_highbd_quantize_dc(const tran_low_t *coeff_ptr, int n_coeffs, int skip_block, const int16_t *round_ptr, @@ -39,6 +86,14 @@ void vpx_highbd_quantize_dc_32x32(const tran_low_t *coeff_ptr, int skip_block, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t dequant_ptr, uint16_t *eob_ptr); +void vpx_highbd_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, + int skip_block, const int16_t *zbin_ptr, + const int16_t *round_ptr, const int16_t *quant_ptr, + const int16_t *quant_shift_ptr, + tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, + const int16_t *dequant_ptr, uint16_t *eob_ptr, + const int16_t *scan, const int16_t *iscan); +#endif #endif #ifdef __cplusplus diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl index dcdefa45e8..10e33215d6 100644 --- a/vpx_dsp/vpx_dsp_rtcd_defs.pl +++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl @@ -931,21 +931,35 @@ if (vpx_config("CONFIG_VPX_HIGHBITDEPTH") eq "yes") { # # Quantization # -if (vpx_config("CONFIG_VP10_ENCODER") eq "yes") { - add_proto qw/void vpx_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; - specialize qw/vpx_quantize_b sse2/, "$ssse3_x86_64_x86inc", "$avx_x86_64_x86inc"; +if (vpx_config("CONFIG_AOM_QM") eq "yes") { + if (vpx_config("CONFIG_VP10_ENCODER") eq "yes") { + add_proto qw/void vpx_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr"; - add_proto qw/void vpx_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; - specialize qw/vpx_quantize_b_32x32/, "$ssse3_x86_64_x86inc", "$avx_x86_64_x86inc"; + add_proto qw/void vpx_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr"; - if (vpx_config("CONFIG_VPX_HIGHBITDEPTH") eq "yes") { - add_proto qw/void vpx_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; - specialize qw/vpx_highbd_quantize_b sse2/; + if (vpx_config("CONFIG_VPX_HIGHBITDEPTH") eq "yes") { + add_proto qw/void vpx_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr"; - add_proto qw/void vpx_highbd_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; - specialize qw/vpx_highbd_quantize_b_32x32 sse2/; - } # CONFIG_VPX_HIGHBITDEPTH -} # CONFIG_VP10_ENCODER + add_proto qw/void vpx_highbd_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr"; + } # CONFIG_VPX_HIGHBITDEPTH + } # CONFIG_VP10_ENCODER +} else { + if (vpx_config("CONFIG_VP10_ENCODER") eq "yes") { + add_proto qw/void vpx_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; + specialize qw/vpx_quantize_b sse2/, "$ssse3_x86_64_x86inc", "$avx_x86_64_x86inc"; + + add_proto qw/void vpx_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; + specialize qw/vpx_quantize_b_32x32/, "$ssse3_x86_64_x86inc", "$avx_x86_64_x86inc"; + + if (vpx_config("CONFIG_VPX_HIGHBITDEPTH") eq "yes") { + add_proto qw/void vpx_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; + specialize qw/vpx_highbd_quantize_b sse2/; + + add_proto qw/void vpx_highbd_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; + specialize qw/vpx_highbd_quantize_b_32x32 sse2/; + } # CONFIG_VPX_HIGHBITDEPTH + } # CONFIG_VP10_ENCODER +} # CONFIG_AOM_QM if (vpx_config("CONFIG_ENCODERS") eq "yes") { # -- GitLab