Commit 1c122c24 authored by Thomas's avatar Thomas Committed by Yaowu Xu

Add quant and dequant functions for new quant matrices.

Change-Id: If0ba62428216fa343b9a37a3b349edba4103c00a
parent 95a89994
......@@ -309,37 +309,67 @@ if (vpx_config("CONFIG_VP10_ENCODER") eq "yes") {
# ENCODEMB INVOKE
if (vpx_config("CONFIG_VPX_HIGHBITDEPTH") eq "yes") {
# the transform coefficients are held in 32-bit
# values, so the assembler code for vp10_block_error can no longer be used.
add_proto qw/int64_t vp10_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz";
specialize qw/vp10_block_error/;
if (vpx_config("CONFIG_AOM_QM") eq "yes") {
if (vpx_config("CONFIG_VPX_HIGHBITDEPTH") eq "yes") {
# the transform coefficients are held in 32-bit
# values, so the assembler code for vp10_block_error can no longer be used.
add_proto qw/int64_t vp10_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz";
specialize qw/vp10_block_error/;
add_proto qw/void vp10_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t *iqm_ptr";
add_proto qw/void vp10_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t *iqm_ptr";
add_proto qw/void vp10_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t *iqm_ptr";
specialize qw/vp10_fdct8x8_quant/;
} else {
add_proto qw/int64_t vp10_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz";
specialize qw/vp10_block_error avx2 msa/, "$sse2_x86inc";
add_proto qw/void vp10_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vp10_quantize_fp/;
add_proto qw/int64_t vp10_block_error_fp/, "const int16_t *coeff, const int16_t *dqcoeff, int block_size";
specialize qw/vp10_block_error_fp neon/, "$sse2_x86inc";
add_proto qw/void vp10_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vp10_quantize_fp_32x32/;
add_proto qw/void vp10_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t *iqm_ptr";
add_proto qw/void vp10_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vp10_fdct8x8_quant/;
add_proto qw/void vp10_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t *iqm_ptr";
add_proto qw/void vp10_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t *iqm_ptr";
}
} else {
add_proto qw/int64_t vp10_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz";
specialize qw/vp10_block_error avx2 msa/, "$sse2_x86inc";
if (vpx_config("CONFIG_VPX_HIGHBITDEPTH") eq "yes") {
# the transform coefficients are held in 32-bit
# values, so the assembler code for vp10_block_error can no longer be used.
add_proto qw/int64_t vp10_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz";
specialize qw/vp10_block_error/;
add_proto qw/void vp10_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vp10_quantize_fp/;
add_proto qw/void vp10_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vp10_quantize_fp_32x32/;
add_proto qw/int64_t vp10_block_error_fp/, "const int16_t *coeff, const int16_t *dqcoeff, int block_size";
specialize qw/vp10_block_error_fp neon/, "$sse2_x86inc";
add_proto qw/void vp10_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vp10_fdct8x8_quant/;
} else {
add_proto qw/int64_t vp10_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz";
specialize qw/vp10_block_error avx2 msa/, "$sse2_x86inc";
add_proto qw/int64_t vp10_block_error_fp/, "const int16_t *coeff, const int16_t *dqcoeff, int block_size";
specialize qw/vp10_block_error_fp neon/, "$sse2_x86inc";
add_proto qw/void vp10_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vp10_quantize_fp neon sse2/, "$ssse3_x86_64_x86inc";
add_proto qw/void vp10_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vp10_quantize_fp neon sse2/, "$ssse3_x86_64_x86inc";
add_proto qw/void vp10_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vp10_quantize_fp_32x32/, "$ssse3_x86_64_x86inc";
add_proto qw/void vp10_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vp10_quantize_fp_32x32/, "$ssse3_x86_64_x86inc";
add_proto qw/void vp10_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vp10_fdct8x8_quant sse2 ssse3 neon/;
}
add_proto qw/void vp10_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vp10_fdct8x8_quant sse2 ssse3 neon/;
}
# fdct functions
if (vpx_config("CONFIG_VPX_HIGHBITDEPTH") eq "yes") {
......@@ -574,11 +604,18 @@ if (vpx_config("CONFIG_VPX_HIGHBITDEPTH") eq "yes") {
add_proto qw/int64_t vp10_highbd_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz, int bd";
specialize qw/vp10_highbd_block_error sse2/;
add_proto qw/void vp10_highbd_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vp10_highbd_quantize_fp/;
if (vpx_config("CONFIG_AOM_QM") eq "yes") {
add_proto qw/void vp10_highbd_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr";
add_proto qw/void vp10_highbd_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vp10_highbd_quantize_fp_32x32/;
add_proto qw/void vp10_highbd_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr";
} else {
add_proto qw/void vp10_highbd_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vp10_highbd_quantize_fp/;
add_proto qw/void vp10_highbd_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vp10_highbd_quantize_fp_32x32/;
}
# fdct functions
add_proto qw/void vp10_highbd_fht4x4/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
......
......@@ -712,8 +712,8 @@ static void dec_build_inter_predictors_sb(VP10Decoder *const pbi,
}
}
static INLINE TX_SIZE dec_get_uv_tx_size(const MB_MODE_INFO *mbmi, int n4_wl,
int n4_hl) {
static INLINE TX_SIZE
dec_get_uv_tx_size(const MB_MODE_INFO *mbmi, int n4_wl, int n4_hl) {
// get minimum log2 num4x4s dimension
const int x = VPXMIN(n4_wl, n4_hl);
return VPXMIN(mbmi->tx_size, x);
......@@ -1120,8 +1120,13 @@ static void setup_quantization(VP10_COMMON *const cm,
static void setup_segmentation_dequant(VP10_COMMON *const cm) {
// Build y/uv dequant values based on segmentation.
int i = 0;
#if CONFIG_AOM_QM
int lossless;
int j = 0;
int qmindex;
#endif
if (cm->seg.enabled) {
int i;
for (i = 0; i < MAX_SEGMENTS; ++i) {
const int qindex = vp10_get_qindex(&cm->seg, i, cm->base_qindex);
cm->y_dequant[i][0] =
......@@ -1131,6 +1136,19 @@ static void setup_segmentation_dequant(VP10_COMMON *const cm) {
vp10_dc_quant(qindex, cm->uv_dc_delta_q, cm->bit_depth);
cm->uv_dequant[i][1] =
vp10_ac_quant(qindex, cm->uv_ac_delta_q, cm->bit_depth);
#if CONFIG_AOM_QM
lossless = qindex == 0 && cm->y_dc_delta_q == 0 &&
cm->uv_dc_delta_q == 0 && cm->uv_ac_delta_q == 0;
// NB: depends on base index so there is only 1 set per frame
// No quant weighting when lossless
qmindex = lossless ? QINDEX_RANGE - 1 : cm->base_qindex;
for (j = 0; j < TX_SIZES; ++j) {
cm->y_iqmatrix[i][1][j] = aom_iqmatrix(cm, qmindex, 0, j, 1);
cm->y_iqmatrix[i][0][j] = aom_iqmatrix(cm, qmindex, 0, j, 0);
cm->uv_iqmatrix[i][1][j] = aom_iqmatrix(cm, qmindex, 1, j, 1);
cm->uv_iqmatrix[i][0][j] = aom_iqmatrix(cm, qmindex, 1, j, 0);
}
#endif
}
} else {
const int qindex = cm->base_qindex;
......@@ -1143,6 +1161,18 @@ static void setup_segmentation_dequant(VP10_COMMON *const cm) {
vp10_dc_quant(qindex, cm->uv_dc_delta_q, cm->bit_depth);
cm->uv_dequant[0][1] =
vp10_ac_quant(qindex, cm->uv_ac_delta_q, cm->bit_depth);
#if CONFIG_AOM_QM
lossless = qindex == 0 && cm->y_dc_delta_q == 0 && cm->uv_dc_delta_q == 0 &&
cm->uv_ac_delta_q == 0;
// No quant weighting when lossless
qmindex = lossless ? QINDEX_RANGE - 1 : cm->base_qindex;
for (j = 0; j < TX_SIZES; ++j) {
cm->y_iqmatrix[i][1][j] = aom_iqmatrix(cm, qmindex, 0, j, 1);
cm->y_iqmatrix[i][0][j] = aom_iqmatrix(cm, qmindex, 0, j, 0);
cm->uv_iqmatrix[i][1][j] = aom_iqmatrix(cm, qmindex, 1, j, 1);
cm->uv_iqmatrix[i][0][j] = aom_iqmatrix(cm, qmindex, 1, j, 0);
}
#endif
}
}
......
......@@ -109,6 +109,10 @@ VP10Decoder *vp10_decoder_create(BufferPool *const pool) {
vp10_loop_filter_init(cm);
#if CONFIG_AOM_QM
aom_qm_init(cm);
#endif
cm->error.setjmp = 0;
vpx_get_worker_interface()->init(&pbi->lf_worker);
......
......@@ -43,14 +43,25 @@ static INLINE int read_coeff(const vpx_prob *probs, int n, vpx_reader *r) {
return val;
}
#if CONFIG_AOM_QM
static int decode_coefs(const MACROBLOCKD *xd, PLANE_TYPE type,
tran_low_t *dqcoeff, TX_SIZE tx_size, const int16_t *dq,
int ctx, const int16_t *scan, const int16_t *nb,
vpx_reader *r) {
vpx_reader *r, const qm_val_t *iqm[2][TX_SIZES])
#else
static int decode_coefs(const MACROBLOCKD *xd, PLANE_TYPE type,
tran_low_t *dqcoeff, TX_SIZE tx_size, const int16_t *dq,
int ctx, const int16_t *scan, const int16_t *nb,
vpx_reader *r)
#endif
{
FRAME_COUNTS *counts = xd->counts;
const int max_eob = 16 << (tx_size << 1);
const FRAME_CONTEXT *const fc = xd->fc;
const int ref = is_inter_block(&xd->mi[0]->mbmi);
#if CONFIG_AOM_QM
const qm_val_t *iqmatrix = iqm[!ref][tx_size];
#endif
int band, c = 0;
const vpx_prob(*coef_probs)[COEFF_CONTEXTS][UNCONSTRAINED_NODES] =
fc->coef_probs[tx_size][type][ref];
......@@ -183,6 +194,10 @@ static int decode_coefs(const MACROBLOCKD *xd, PLANE_TYPE type,
}
}
}
#if CONFIG_AOM_QM
dqv = ((iqmatrix[scan[c]] * (int)dqv) + (1 << (AOM_QM_BITS - 1))) >>
AOM_QM_BITS;
#endif
v = (val * dqv) >> dq_shift;
#if CONFIG_COEFFICIENT_RANGE_CHECKING
#if CONFIG_VPX_HIGHBITDEPTH
......@@ -249,8 +264,16 @@ int vp10_decode_block_tokens(MACROBLOCKD *xd, int plane, const scan_order *sc,
const int16_t *const dequant = pd->seg_dequant[seg_id];
const int ctx =
get_entropy_context(tx_size, pd->above_context + x, pd->left_context + y);
#if CONFIG_AOM_QM
const int eob =
decode_coefs(xd, pd->plane_type, pd->dqcoeff, tx_size, dequant, ctx,
sc->scan, sc->neighbors, r, pd->seg_iqmatrix[seg_id]);
#else
const int eob = decode_coefs(xd, pd->plane_type, pd->dqcoeff, tx_size,
dequant, ctx, sc->scan, sc->neighbors, r);
#endif
dec_set_contexts(xd, pd, tx_size, eob > 0, x, y);
return eob;
}
......@@ -1021,7 +1021,12 @@ void vp10_fdct8x8_quant_c(const int16_t *input, int stride,
const int16_t *quant_shift_ptr,
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
const int16_t *dequant_ptr, uint16_t *eob_ptr,
const int16_t *scan, const int16_t *iscan) {
const int16_t *scan, const int16_t *iscan
#if CONFIG_AOM_QM
,
const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr
#endif
) {
int eob = -1;
int i, j;
......@@ -1107,16 +1112,29 @@ void vp10_fdct8x8_quant_c(const int16_t *input, int stride,
for (i = 0; i < n_coeffs; i++) {
const int rc = scan[i];
const int coeff = coeff_ptr[rc];
#if CONFIG_AOM_QM
const qm_val_t wt = qm_ptr[rc];
const qm_val_t iwt = iqm_ptr[rc];
const int dequant =
(dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >>
AOM_QM_BITS;
#endif
const int coeff_sign = (coeff >> 31);
const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
int tmp = clamp(abs_coeff + round_ptr[rc != 0], INT16_MIN, INT16_MAX);
tmp = (tmp * quant_ptr[rc != 0]) >> 16;
qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign;
int64_t tmp = clamp(abs_coeff + round_ptr[rc != 0], INT16_MIN, INT16_MAX);
int tmp32;
#if CONFIG_AOM_QM
tmp32 = (tmp * quant_ptr[rc != 0] * wt) >> (16 + AOM_QM_BITS);
qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign;
dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant;
#else
tmp32 = (tmp * quant_ptr[rc != 0]) >> 16;
qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign;
dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0];
#endif
if (tmp) eob = i;
if (tmp32) eob = i;
}
}
*eob_ptr = eob + 1;
......
......@@ -99,6 +99,11 @@ static int optimize_b(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size,
const PLANE_TYPE type = pd->plane_type;
const int default_eob = 16 << (tx_size << 1);
const int mul = 1 + (tx_size == TX_32X32);
#if CONFIG_AOM_QM
int seg_id = xd->mi[0]->mbmi.segment_id;
int is_intra = !is_inter_block(&xd->mi[0]->mbmi);
const qm_val_t *iqmatrix = pd->seg_iqmatrix[seg_id][is_intra][tx_size];
#endif
const int16_t *dequant_ptr = pd->dequant;
const uint8_t *const band_translate = get_band_translate(tx_size);
TX_TYPE tx_type = get_tx_type(type, xd, block);
......@@ -138,7 +143,11 @@ static int optimize_b(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size,
for (i = eob; i-- > 0;) {
int base_bits, d2, dx;
const int rc = scan[i];
#if CONFIG_AOM_QM
int iwt = iqmatrix[rc];
#endif
int x = qcoeff[rc];
/* Only add a trellis state for non-zero coefficients. */
if (x) {
......@@ -182,9 +191,16 @@ static int optimize_b(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size,
rate0 = tokens[next][0].rate;
rate1 = tokens[next][1].rate;
#if CONFIG_AOM_QM
if ((abs(x) * dequant_ptr[rc != 0] * iwt >
((abs(coeff[rc]) * mul) << AOM_QM_BITS)) &&
(abs(x) * dequant_ptr[rc != 0] * iwt <
((abs(coeff[rc]) * mul + dequant_ptr[rc != 0]) << AOM_QM_BITS)))
#else
if ((abs(x) * dequant_ptr[rc != 0] > abs(coeff[rc]) * mul) &&
(abs(x) * dequant_ptr[rc != 0] <
abs(coeff[rc]) * mul + dequant_ptr[rc != 0]))
#endif
shortcut = 1;
else
shortcut = 0;
......@@ -239,6 +255,7 @@ static int optimize_b(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size,
#endif // CONFIG_VPX_HIGHBITDEPTH
d2 = dx * dx;
}
tokens[i][1].rate = base_bits + (best ? rate1 : rate0);
tokens[i][1].error = d2 + (best ? error1 : error0);
tokens[i][1].next = next;
......@@ -288,12 +305,21 @@ static int optimize_b(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size,
for (i = next; i < eob; i = next) {
const int x = tokens[i][best].qc;
const int rc = scan[i];
#if CONFIG_AOM_QM
const int iwt = iqmatrix[rc];
const int dequant =
(dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS;
#endif
if (x) {
final_eob = i;
}
qcoeff[rc] = x;
#if CONFIG_AOM_QM
dqcoeff[rc] = (x * dequant) / mul;
#else
dqcoeff[rc] = (x * dequant_ptr[rc != 0]) / mul;
#endif
next = tokens[i][best].next;
best = best_index[i][best];
......@@ -328,11 +354,17 @@ void vp10_fwd_txfm_4x4(const int16_t *src_diff, tran_low_t *coeff,
vp10_fwht4x4(src_diff, coeff, diff_stride);
} else {
switch (tx_type) {
case DCT_DCT: vpx_fdct4x4(src_diff, coeff, diff_stride); break;
case DCT_DCT:
vpx_fdct4x4(src_diff, coeff, diff_stride);
break;
case ADST_DCT:
case DCT_ADST:
case ADST_ADST: vp10_fht4x4(src_diff, coeff, diff_stride, tx_type); break;
default: assert(0); break;
case ADST_ADST:
vp10_fht4x4(src_diff, coeff, diff_stride, tx_type);
break;
default:
assert(0);
break;
}
}
}
......@@ -343,8 +375,12 @@ static void fwd_txfm_8x8(const int16_t *src_diff, tran_low_t *coeff,
case DCT_DCT:
case ADST_DCT:
case DCT_ADST:
case ADST_ADST: vp10_fht8x8(src_diff, coeff, diff_stride, tx_type); break;
default: assert(0); break;
case ADST_ADST:
vp10_fht8x8(src_diff, coeff, diff_stride, tx_type);
break;
default:
assert(0);
break;
}
}
......@@ -354,8 +390,12 @@ static void fwd_txfm_16x16(const int16_t *src_diff, tran_low_t *coeff,
case DCT_DCT:
case ADST_DCT:
case DCT_ADST:
case ADST_ADST: vp10_fht16x16(src_diff, coeff, diff_stride, tx_type); break;
default: assert(0); break;
case ADST_ADST:
vp10_fht16x16(src_diff, coeff, diff_stride, tx_type);
break;
default:
assert(0);
break;
}
}
......@@ -363,11 +403,17 @@ static void fwd_txfm_32x32(int rd_transform, const int16_t *src_diff,
tran_low_t *coeff, int diff_stride,
TX_TYPE tx_type) {
switch (tx_type) {
case DCT_DCT: fdct32x32(rd_transform, src_diff, coeff, diff_stride); break;
case DCT_DCT:
fdct32x32(rd_transform, src_diff, coeff, diff_stride);
break;
case ADST_DCT:
case DCT_ADST:
case ADST_ADST: assert(0); break;
default: assert(0); break;
case ADST_ADST:
assert(0);
break;
default:
assert(0);
break;
}
}
......@@ -379,13 +425,17 @@ void vp10_highbd_fwd_txfm_4x4(const int16_t *src_diff, tran_low_t *coeff,
vp10_highbd_fwht4x4(src_diff, coeff, diff_stride);
} else {
switch (tx_type) {
case DCT_DCT: vpx_highbd_fdct4x4(src_diff, coeff, diff_stride); break;
case DCT_DCT:
vpx_highbd_fdct4x4(src_diff, coeff, diff_stride);
break;
case ADST_DCT:
case DCT_ADST:
case ADST_ADST:
vp10_highbd_fht4x4(src_diff, coeff, diff_stride, tx_type);
break;
default: assert(0); break;
default:
assert(0);
break;
}
}
}
......@@ -393,26 +443,34 @@ void vp10_highbd_fwd_txfm_4x4(const int16_t *src_diff, tran_low_t *coeff,
static void highbd_fwd_txfm_8x8(const int16_t *src_diff, tran_low_t *coeff,
int diff_stride, TX_TYPE tx_type) {
switch (tx_type) {
case DCT_DCT: vpx_highbd_fdct8x8(src_diff, coeff, diff_stride); break;
case DCT_DCT:
vpx_highbd_fdct8x8(src_diff, coeff, diff_stride);
break;
case ADST_DCT:
case DCT_ADST:
case ADST_ADST:
vp10_highbd_fht8x8(src_diff, coeff, diff_stride, tx_type);
break;
default: assert(0); break;
default:
assert(0);
break;
}
}
static void highbd_fwd_txfm_16x16(const int16_t *src_diff, tran_low_t *coeff,
int diff_stride, TX_TYPE tx_type) {
switch (tx_type) {
case DCT_DCT: vpx_highbd_fdct16x16(src_diff, coeff, diff_stride); break;
case DCT_DCT:
vpx_highbd_fdct16x16(src_diff, coeff, diff_stride);
break;
case ADST_DCT:
case DCT_ADST:
case ADST_ADST:
vp10_highbd_fht16x16(src_diff, coeff, diff_stride, tx_type);
break;
default: assert(0); break;
default:
assert(0);
break;
}
}
......@@ -425,8 +483,12 @@ static void highbd_fwd_txfm_32x32(int rd_transform, const int16_t *src_diff,
break;
case ADST_DCT:
case DCT_ADST:
case ADST_ADST: assert(0); break;
default: assert(0); break;
case ADST_ADST:
assert(0);
break;
default:
assert(0);
break;
}
}
#endif // CONFIG_VPX_HIGHBITDEPTH
......@@ -444,6 +506,12 @@ void vp10_xform_quant_fp(MACROBLOCK *x, int plane, int block, int blk_row,
tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
uint16_t *const eob = &p->eobs[block];
const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
#if CONFIG_AOM_QM
int seg_id = xd->mi[0]->mbmi.segment_id;
int is_intra = !is_inter_block(&xd->mi[0]->mbmi);
const qm_val_t *qmatrix = pd->seg_qmatrix[seg_id][is_intra][tx_size];
const qm_val_t *iqmatrix = pd->seg_iqmatrix[seg_id][is_intra][tx_size];
#endif
const int16_t *src_diff;
src_diff = &p->src_diff[4 * (blk_row * diff_stride + blk_col)];
......@@ -455,21 +523,34 @@ void vp10_xform_quant_fp(MACROBLOCK *x, int plane, int block, int blk_row,
vp10_highbd_quantize_fp_32x32(coeff, 1024, x->skip_block, p->zbin,
p->round_fp, p->quant_fp, p->quant_shift,
qcoeff, dqcoeff, pd->dequant, eob,
scan_order->scan, scan_order->iscan);
scan_order->scan,
#if !CONFIG_AOM_QM
scan_order->iscan);
#else
scan_order->iscan, qmatrix, iqmatrix);
#endif
break;
case TX_16X16:
vpx_highbd_fdct16x16(src_diff, coeff, diff_stride);
vp10_highbd_quantize_fp(coeff, 256, x->skip_block, p->zbin, p->round_fp,
p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
pd->dequant, eob, scan_order->scan,
#if !CONFIG_AOM_QM
scan_order->iscan);
#else
scan_order->iscan, qmatrix, iqmatrix);
#endif
break;
case TX_8X8:
vpx_highbd_fdct8x8(src_diff, coeff, diff_stride);
vp10_highbd_quantize_fp(coeff, 64, x->skip_block, p->zbin, p->round_fp,
p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
pd->dequant, eob, scan_order->scan,
#if !CONFIG_AOM_QM
scan_order->iscan);
#else
scan_order->iscan, qmatrix, iqmatrix);
#endif
break;
case TX_4X4:
if (xd->lossless[xd->mi[0]->mbmi.segment_id]) {
......@@ -480,9 +561,14 @@ void vp10_xform_quant_fp(MACROBLOCK *x, int plane, int block, int blk_row,
vp10_highbd_quantize_fp(coeff, 16, x->skip_block, p->zbin, p->round_fp,
p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
pd->dequant, eob, scan_order->scan,
#if !CONFIG_AOM_QM
scan_order->iscan);
#else
scan_order->iscan, qmatrix, iqmatrix);
#endif
break;
default: assert(0);
default:
assert(0);
}
return;
}
......@@ -494,19 +580,32 @@ void vp10_xform_quant_fp(MACROBLOCK *x, int plane, int block, int blk_row,
vp10_quantize_fp_32x32(coeff, 1024, x->skip_block, p->zbin, p->round_fp,
p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
pd->dequant, eob, scan_order->scan,
#if !CONFIG_AOM_QM
scan_order->iscan);
#else
scan_order->iscan, qmatrix, iqmatrix);
#endif
break;
case TX_16X16:
vpx_fdct16x16(src_diff, coeff, diff_stride);
vp10_quantize_fp(coeff, 256, x->skip_block, p->zbin, p->round_fp,
p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
pd->dequant, eob, scan_order->scan, scan_order->iscan);
pd->dequant, eob, scan_order->scan,
#if !CONFIG_AOM_QM
scan_order->iscan);
#else
scan_order->iscan, qmatrix, iqmatrix);
#endif
break;
case TX_8X8:
vp10_fdct8x8_quant(src_diff, diff_stride, coeff, 64, x->skip_block,
p->zbin, p->round_fp, p->quant_fp, p->quant_shift,
qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan,
#if !CONFIG_AOM_QM
scan_order->iscan);
#else
scan_order->iscan, qmatrix, iqmatrix);
#endif
break;
case TX_4X4:
if (xd->lossless[xd->mi[0]->mbmi.segment_id]) {
......@@ -516,9 +615,16 @@ void vp10_xform_quant_fp(MACROBLOCK *x, int plane, int block, int blk_row,
}
vp10_quantize_fp(coeff, 16, x->skip_block, p->zbin, p->round_fp,
p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
pd->dequant, eob, scan_order->scan, scan_order->iscan);
pd->dequant, eob, scan_order->scan,
#if !CONFIG_AOM_QM
scan_order->iscan);
#else
scan_order->iscan, qmatrix, iqmatrix);
#endif
break;
default:
assert(0);
break;
default: assert(0); break;
}
}
......@@ -532,6 +638,12 @@ void vp10_xform_quant_dc(MACROBLOCK *x, int plane, int block, int blk_row,
tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
uint16_t *const eob = &p->eobs[block];
const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
int seg_id = xd->mi[0]->mbmi.segment_id;
#if CONFIG_AOM_QM
int is_intra = !is_inter_block(&xd->mi[0]->mbmi);
const qm_val_t *qmatrix = pd->seg_qmatrix[seg_id][is_intra][tx_size];
const qm_val_t *iqmatrix = pd->seg_iqmatrix[seg_id][is_intra][tx_size];
#endif
const int16_t *src_diff;
src_diff = &p->src_diff[4 * (blk_row * diff_stride + blk_col)];
......@@ -542,31 +654,49 @@ void vp10_xform_quant_dc(MACROBLOCK *x, int plane, int block, int blk_row,
vpx_highbd_fdct32x32_1(src_diff, coeff, diff_stride);
vpx_highbd_quantize_dc_32x32(coeff, x->skip_block, p->round,
p->quant_fp[0], qcoeff, dqcoeff,
pd->dequant[0], eob);
pd->dequant[0],
#if !CONFIG_AOM_QM
eob);
#else
eob, qmatrix, iqmatrix);
#endif
break;
case TX_16X16:
vpx_highbd_fdct16x16_1(src_diff, coeff, diff_stride);
vpx_highbd_quantize_dc(coeff, 256, x->skip_block, p->round,
p->quant_fp[0], qcoeff, dqcoeff, pd->dequant[0],
#if !CONFIG_AOM_QM
eob);
#else
eob, qmatrix, iqmatrix);
#endif
break;
case TX_8X8:
vpx_highbd_fdct8x8_1(src_diff, coeff, diff_stride);
vpx_highbd_quantize_dc(coeff, 64, x->skip_block, p->round,
p->quant_fp[0], qcoeff, dqcoeff, pd->dequant[0],
#if !CONFIG_AOM_QM
eob);
#else
eob, qmatrix, iqmatrix);
#endif
break;
case TX_4X4:
if (xd->lossless[xd->mi[0]->mbmi.segment_id]) {