Commit 1c122c24 authored by Thomas's avatar Thomas Committed by Yaowu Xu

Add quant and dequant functions for new quant matrices.

Change-Id: If0ba62428216fa343b9a37a3b349edba4103c00a
parent 95a89994
......@@ -309,37 +309,67 @@ if (vpx_config("CONFIG_VP10_ENCODER") eq "yes") {
# ENCODEMB INVOKE
if (vpx_config("CONFIG_VPX_HIGHBITDEPTH") eq "yes") {
# the transform coefficients are held in 32-bit
# values, so the assembler code for vp10_block_error can no longer be used.
add_proto qw/int64_t vp10_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz";
specialize qw/vp10_block_error/;
if (vpx_config("CONFIG_AOM_QM") eq "yes") {
if (vpx_config("CONFIG_VPX_HIGHBITDEPTH") eq "yes") {
# the transform coefficients are held in 32-bit
# values, so the assembler code for vp10_block_error can no longer be used.
add_proto qw/int64_t vp10_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz";
specialize qw/vp10_block_error/;
add_proto qw/void vp10_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t *iqm_ptr";
add_proto qw/void vp10_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t *iqm_ptr";
add_proto qw/void vp10_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t *iqm_ptr";
specialize qw/vp10_fdct8x8_quant/;
} else {
add_proto qw/int64_t vp10_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz";
specialize qw/vp10_block_error avx2 msa/, "$sse2_x86inc";
add_proto qw/void vp10_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vp10_quantize_fp/;
add_proto qw/int64_t vp10_block_error_fp/, "const int16_t *coeff, const int16_t *dqcoeff, int block_size";
specialize qw/vp10_block_error_fp neon/, "$sse2_x86inc";
add_proto qw/void vp10_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vp10_quantize_fp_32x32/;
add_proto qw/void vp10_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t *iqm_ptr";
add_proto qw/void vp10_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vp10_fdct8x8_quant/;
add_proto qw/void vp10_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t *iqm_ptr";
add_proto qw/void vp10_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t *iqm_ptr";
}
} else {
add_proto qw/int64_t vp10_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz";
specialize qw/vp10_block_error avx2 msa/, "$sse2_x86inc";
if (vpx_config("CONFIG_VPX_HIGHBITDEPTH") eq "yes") {
# the transform coefficients are held in 32-bit
# values, so the assembler code for vp10_block_error can no longer be used.
add_proto qw/int64_t vp10_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz";
specialize qw/vp10_block_error/;
add_proto qw/void vp10_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vp10_quantize_fp/;
add_proto qw/void vp10_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vp10_quantize_fp_32x32/;
add_proto qw/int64_t vp10_block_error_fp/, "const int16_t *coeff, const int16_t *dqcoeff, int block_size";
specialize qw/vp10_block_error_fp neon/, "$sse2_x86inc";
add_proto qw/void vp10_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vp10_fdct8x8_quant/;
} else {
add_proto qw/int64_t vp10_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz";
specialize qw/vp10_block_error avx2 msa/, "$sse2_x86inc";
add_proto qw/int64_t vp10_block_error_fp/, "const int16_t *coeff, const int16_t *dqcoeff, int block_size";
specialize qw/vp10_block_error_fp neon/, "$sse2_x86inc";
add_proto qw/void vp10_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vp10_quantize_fp neon sse2/, "$ssse3_x86_64_x86inc";
add_proto qw/void vp10_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vp10_quantize_fp neon sse2/, "$ssse3_x86_64_x86inc";
add_proto qw/void vp10_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vp10_quantize_fp_32x32/, "$ssse3_x86_64_x86inc";
add_proto qw/void vp10_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vp10_quantize_fp_32x32/, "$ssse3_x86_64_x86inc";
add_proto qw/void vp10_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vp10_fdct8x8_quant sse2 ssse3 neon/;
}
add_proto qw/void vp10_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vp10_fdct8x8_quant sse2 ssse3 neon/;
}
# fdct functions
if (vpx_config("CONFIG_VPX_HIGHBITDEPTH") eq "yes") {
......@@ -574,11 +604,18 @@ if (vpx_config("CONFIG_VPX_HIGHBITDEPTH") eq "yes") {
add_proto qw/int64_t vp10_highbd_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz, int bd";
specialize qw/vp10_highbd_block_error sse2/;
add_proto qw/void vp10_highbd_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vp10_highbd_quantize_fp/;
if (vpx_config("CONFIG_AOM_QM") eq "yes") {
add_proto qw/void vp10_highbd_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr";
add_proto qw/void vp10_highbd_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vp10_highbd_quantize_fp_32x32/;
add_proto qw/void vp10_highbd_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr";
} else {
add_proto qw/void vp10_highbd_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vp10_highbd_quantize_fp/;
add_proto qw/void vp10_highbd_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vp10_highbd_quantize_fp_32x32/;
}
# fdct functions
add_proto qw/void vp10_highbd_fht4x4/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
......
......@@ -712,8 +712,8 @@ static void dec_build_inter_predictors_sb(VP10Decoder *const pbi,
}
}
static INLINE TX_SIZE dec_get_uv_tx_size(const MB_MODE_INFO *mbmi, int n4_wl,
int n4_hl) {
static INLINE TX_SIZE
dec_get_uv_tx_size(const MB_MODE_INFO *mbmi, int n4_wl, int n4_hl) {
// get minimum log2 num4x4s dimension
const int x = VPXMIN(n4_wl, n4_hl);
return VPXMIN(mbmi->tx_size, x);
......@@ -1120,8 +1120,13 @@ static void setup_quantization(VP10_COMMON *const cm,
static void setup_segmentation_dequant(VP10_COMMON *const cm) {
// Build y/uv dequant values based on segmentation.
int i = 0;
#if CONFIG_AOM_QM
int lossless;
int j = 0;
int qmindex;
#endif
if (cm->seg.enabled) {
int i;
for (i = 0; i < MAX_SEGMENTS; ++i) {
const int qindex = vp10_get_qindex(&cm->seg, i, cm->base_qindex);
cm->y_dequant[i][0] =
......@@ -1131,6 +1136,19 @@ static void setup_segmentation_dequant(VP10_COMMON *const cm) {
vp10_dc_quant(qindex, cm->uv_dc_delta_q, cm->bit_depth);
cm->uv_dequant[i][1] =
vp10_ac_quant(qindex, cm->uv_ac_delta_q, cm->bit_depth);
#if CONFIG_AOM_QM
lossless = qindex == 0 && cm->y_dc_delta_q == 0 &&
cm->uv_dc_delta_q == 0 && cm->uv_ac_delta_q == 0;
// NB: depends on base index so there is only 1 set per frame
// No quant weighting when lossless
qmindex = lossless ? QINDEX_RANGE - 1 : cm->base_qindex;
for (j = 0; j < TX_SIZES; ++j) {
cm->y_iqmatrix[i][1][j] = aom_iqmatrix(cm, qmindex, 0, j, 1);
cm->y_iqmatrix[i][0][j] = aom_iqmatrix(cm, qmindex, 0, j, 0);
cm->uv_iqmatrix[i][1][j] = aom_iqmatrix(cm, qmindex, 1, j, 1);
cm->uv_iqmatrix[i][0][j] = aom_iqmatrix(cm, qmindex, 1, j, 0);
}
#endif
}
} else {
const int qindex = cm->base_qindex;
......@@ -1143,6 +1161,18 @@ static void setup_segmentation_dequant(VP10_COMMON *const cm) {
vp10_dc_quant(qindex, cm->uv_dc_delta_q, cm->bit_depth);
cm->uv_dequant[0][1] =
vp10_ac_quant(qindex, cm->uv_ac_delta_q, cm->bit_depth);
#if CONFIG_AOM_QM
lossless = qindex == 0 && cm->y_dc_delta_q == 0 && cm->uv_dc_delta_q == 0 &&
cm->uv_ac_delta_q == 0;
// No quant weighting when lossless
qmindex = lossless ? QINDEX_RANGE - 1 : cm->base_qindex;
for (j = 0; j < TX_SIZES; ++j) {
cm->y_iqmatrix[i][1][j] = aom_iqmatrix(cm, qmindex, 0, j, 1);
cm->y_iqmatrix[i][0][j] = aom_iqmatrix(cm, qmindex, 0, j, 0);
cm->uv_iqmatrix[i][1][j] = aom_iqmatrix(cm, qmindex, 1, j, 1);
cm->uv_iqmatrix[i][0][j] = aom_iqmatrix(cm, qmindex, 1, j, 0);
}
#endif
}
}
......
......@@ -109,6 +109,10 @@ VP10Decoder *vp10_decoder_create(BufferPool *const pool) {
vp10_loop_filter_init(cm);
#if CONFIG_AOM_QM
aom_qm_init(cm);
#endif
cm->error.setjmp = 0;
vpx_get_worker_interface()->init(&pbi->lf_worker);
......
......@@ -43,14 +43,25 @@ static INLINE int read_coeff(const vpx_prob *probs, int n, vpx_reader *r) {
return val;
}
#if CONFIG_AOM_QM
static int decode_coefs(const MACROBLOCKD *xd, PLANE_TYPE type,
tran_low_t *dqcoeff, TX_SIZE tx_size, const int16_t *dq,
int ctx, const int16_t *scan, const int16_t *nb,
vpx_reader *r) {
vpx_reader *r, const qm_val_t *iqm[2][TX_SIZES])
#else
static int decode_coefs(const MACROBLOCKD *xd, PLANE_TYPE type,
tran_low_t *dqcoeff, TX_SIZE tx_size, const int16_t *dq,
int ctx, const int16_t *scan, const int16_t *nb,
vpx_reader *r)
#endif
{
FRAME_COUNTS *counts = xd->counts;
const int max_eob = 16 << (tx_size << 1);
const FRAME_CONTEXT *const fc = xd->fc;
const int ref = is_inter_block(&xd->mi[0]->mbmi);
#if CONFIG_AOM_QM
const qm_val_t *iqmatrix = iqm[!ref][tx_size];
#endif
int band, c = 0;
const vpx_prob(*coef_probs)[COEFF_CONTEXTS][UNCONSTRAINED_NODES] =
fc->coef_probs[tx_size][type][ref];
......@@ -183,6 +194,10 @@ static int decode_coefs(const MACROBLOCKD *xd, PLANE_TYPE type,
}
}
}
#if CONFIG_AOM_QM
dqv = ((iqmatrix[scan[c]] * (int)dqv) + (1 << (AOM_QM_BITS - 1))) >>
AOM_QM_BITS;
#endif
v = (val * dqv) >> dq_shift;
#if CONFIG_COEFFICIENT_RANGE_CHECKING
#if CONFIG_VPX_HIGHBITDEPTH
......@@ -249,8 +264,16 @@ int vp10_decode_block_tokens(MACROBLOCKD *xd, int plane, const scan_order *sc,
const int16_t *const dequant = pd->seg_dequant[seg_id];
const int ctx =
get_entropy_context(tx_size, pd->above_context + x, pd->left_context + y);
#if CONFIG_AOM_QM
const int eob =
decode_coefs(xd, pd->plane_type, pd->dqcoeff, tx_size, dequant, ctx,
sc->scan, sc->neighbors, r, pd->seg_iqmatrix[seg_id]);
#else
const int eob = decode_coefs(xd, pd->plane_type, pd->dqcoeff, tx_size,
dequant, ctx, sc->scan, sc->neighbors, r);
#endif
dec_set_contexts(xd, pd, tx_size, eob > 0, x, y);
return eob;
}
......@@ -1021,7 +1021,12 @@ void vp10_fdct8x8_quant_c(const int16_t *input, int stride,
const int16_t *quant_shift_ptr,
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
const int16_t *dequant_ptr, uint16_t *eob_ptr,
const int16_t *scan, const int16_t *iscan) {
const int16_t *scan, const int16_t *iscan
#if CONFIG_AOM_QM
,
const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr
#endif
) {
int eob = -1;
int i, j;
......@@ -1107,16 +1112,29 @@ void vp10_fdct8x8_quant_c(const int16_t *input, int stride,
for (i = 0; i < n_coeffs; i++) {
const int rc = scan[i];
const int coeff = coeff_ptr[rc];
#if CONFIG_AOM_QM
const qm_val_t wt = qm_ptr[rc];
const qm_val_t iwt = iqm_ptr[rc];
const int dequant =
(dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >>
AOM_QM_BITS;
#endif
const int coeff_sign = (coeff >> 31);
const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
int tmp = clamp(abs_coeff + round_ptr[rc != 0], INT16_MIN, INT16_MAX);
tmp = (tmp * quant_ptr[rc != 0]) >> 16;
qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign;
int64_t tmp = clamp(abs_coeff + round_ptr[rc != 0], INT16_MIN, INT16_MAX);
int tmp32;
#if CONFIG_AOM_QM
tmp32 = (tmp * quant_ptr[rc != 0] * wt) >> (16 + AOM_QM_BITS);
qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign;
dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant;
#else
tmp32 = (tmp * quant_ptr[rc != 0]) >> 16;
qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign;
dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0];
#endif
if (tmp) eob = i;
if (tmp32) eob = i;
}
}
*eob_ptr = eob + 1;
......
This diff is collapsed.
......@@ -1613,6 +1613,9 @@ VP10_COMP *vp10_create_compressor(VP10EncoderConfig *oxcf,
* vp10_init_quantizer() for every frame.
*/
vp10_init_quantizer(cpi);
#if CONFIG_AOM_QM
aom_qm_init(cm);
#endif
vp10_loop_filter_init(cm);
......
......@@ -8,7 +8,6 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include <math.h>
#include "./vpx_dsp_rtcd.h"
#include "vpx_mem/vpx_mem.h"
#include "vpx_ports/mem.h"
......@@ -26,7 +25,12 @@ void vp10_quantize_fp_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr,
uint16_t *eob_ptr, const int16_t *scan,
const int16_t *iscan) {
const int16_t *iscan
#if CONFIG_AOM_QM
,
const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr
#endif
) {
int i, eob = -1;
// TODO(jingning) Decide the need of these arguments after the
// quantization process is completed.
......@@ -43,16 +47,29 @@ void vp10_quantize_fp_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
for (i = 0; i < n_coeffs; i++) {
const int rc = scan[i];
const int coeff = coeff_ptr[rc];
#if CONFIG_AOM_QM
const qm_val_t wt = qm_ptr[rc];
const qm_val_t iwt = iqm_ptr[rc];
const int dequant =
(dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >>
AOM_QM_BITS;
#endif
const int coeff_sign = (coeff >> 31);
const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
int tmp = clamp(abs_coeff + round_ptr[rc != 0], INT16_MIN, INT16_MAX);
tmp = (tmp * quant_ptr[rc != 0]) >> 16;
qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign;
int64_t tmp = clamp(abs_coeff + round_ptr[rc != 0], INT16_MIN, INT16_MAX);
int tmp32;
#if CONFIG_AOM_QM
tmp32 = (tmp * wt * quant_ptr[rc != 0]) >> (16 + AOM_QM_BITS);
qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign;
dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant;
#else
tmp32 = (tmp * quant_ptr[rc != 0]) >> 16;
qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign;
dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0];
#endif
if (tmp) eob = i;
if (tmp32) eob = i;
}
}
*eob_ptr = eob + 1;
......@@ -66,7 +83,12 @@ void vp10_highbd_quantize_fp_c(const tran_low_t *coeff_ptr, intptr_t count,
const int16_t *quant_shift_ptr,
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
const int16_t *dequant_ptr, uint16_t *eob_ptr,
const int16_t *scan, const int16_t *iscan) {
const int16_t *scan, const int16_t *iscan
#if CONFIG_AOM_QM
,
const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr
#endif
) {
int i;
int eob = -1;
// TODO(jingning) Decide the need of these arguments after the
......@@ -84,12 +106,26 @@ void vp10_highbd_quantize_fp_c(const tran_low_t *coeff_ptr, intptr_t count,
for (i = 0; i < count; i++) {
const int rc = scan[i];
const int coeff = coeff_ptr[rc];
#if CONFIG_AOM_QM
const qm_val_t wt = qm_ptr[rc];
const qm_val_t iwt = iqm_ptr[rc];
const int dequant =
(dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >>
AOM_QM_BITS;
#endif
const int coeff_sign = (coeff >> 31);
const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
const int64_t tmp = abs_coeff + round_ptr[rc != 0];
#if CONFIG_AOM_QM
const uint32_t abs_qcoeff =
(uint32_t)((tmp * quant_ptr[rc != 0] * wt) >> (16 + AOM_QM_BITS));
qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant;
#else
const uint32_t abs_qcoeff = (uint32_t)((tmp * quant_ptr[rc != 0]) >> 16);
qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0];
#endif
if (abs_qcoeff) eob = i;
}
}
......@@ -106,7 +142,12 @@ void vp10_quantize_fp_32x32_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
const int16_t *quant_shift_ptr,
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
const int16_t *dequant_ptr, uint16_t *eob_ptr,
const int16_t *scan, const int16_t *iscan) {
const int16_t *scan, const int16_t *iscan
#if CONFIG_AOM_QM
,
const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr
#endif
) {
int i, eob = -1;
(void)zbin_ptr;
(void)quant_shift_ptr;
......@@ -119,19 +160,38 @@ void vp10_quantize_fp_32x32_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
for (i = 0; i < n_coeffs; i++) {
const int rc = scan[i];
const int coeff = coeff_ptr[rc];
#if CONFIG_AOM_QM
const qm_val_t wt = qm_ptr[rc];
const qm_val_t iwt = iqm_ptr[rc];
const int dequant =
(dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >>
AOM_QM_BITS;
int64_t tmp = 0;
#endif
const int coeff_sign = (coeff >> 31);
int tmp = 0;
int tmp32 = 0;
int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
#if CONFIG_AOM_QM
if (abs_coeff * wt >= (dequant_ptr[rc != 0] << (AOM_QM_BITS - 2))) {
#else
if (abs_coeff >= (dequant_ptr[rc != 0] >> 2)) {
#endif
abs_coeff += ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1);
abs_coeff = clamp(abs_coeff, INT16_MIN, INT16_MAX);
tmp = (abs_coeff * quant_ptr[rc != 0]) >> 15;
qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign;
dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 2;
#if CONFIG_AOM_QM
tmp = abs_coeff * wt;
tmp32 = (int)(tmp * quant_ptr[rc != 0]) >> (AOM_QM_BITS + 15);
qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign;
dqcoeff_ptr[rc] = (qcoeff_ptr[rc] * dequant) / 2;
#else
tmp32 = (abs_coeff * quant_ptr[rc != 0]) >> 15;
qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign;
dqcoeff_ptr[rc] = (qcoeff_ptr[rc] * dequant_ptr[rc != 0]) / 2;
#endif
}
if (tmp) eob = i;
if (tmp32) eob = i;
}
}
*eob_ptr = eob + 1;
......@@ -143,7 +203,12 @@ void vp10_highbd_quantize_fp_32x32_c(
const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr,
const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
const int16_t *scan, const int16_t *iscan) {
const int16_t *scan, const int16_t *iscan
#if CONFIG_AOM_QM
,
const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr
#endif
) {
int i, eob = -1;
(void)zbin_ptr;
(void)quant_shift_ptr;
......@@ -157,15 +222,32 @@ void vp10_highbd_quantize_fp_32x32_c(
uint32_t abs_qcoeff = 0;
const int rc = scan[i];
const int coeff = coeff_ptr[rc];
#if CONFIG_AOM_QM
const qm_val_t wt = qm_ptr[rc];
const qm_val_t iwt = iqm_ptr[rc];
const int dequant =
(dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >>
AOM_QM_BITS;
#endif
const int coeff_sign = (coeff >> 31);
const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
#if CONFIG_AOM_QM
if (abs_coeff * wt >= (dequant_ptr[rc != 0] << (AOM_QM_BITS - 2))) {
#else
if (abs_coeff >= (dequant_ptr[rc != 0] >> 2)) {
#endif
const int64_t tmp =
abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1);
#if CONFIG_AOM_QM
abs_qcoeff =
(uint32_t)((tmp * wt * quant_ptr[rc != 0]) >> (AOM_QM_BITS + 15));
qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
dqcoeff_ptr[rc] = (qcoeff_ptr[rc] * dequant) / 2;
#else
abs_qcoeff = (uint32_t)((tmp * quant_ptr[rc != 0]) >> 15);
qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 2;
dqcoeff_ptr[rc] = (qcoeff_ptr[rc] * dequant_ptr[rc != 0]) / 2;
#endif
}
if (abs_qcoeff) eob = i;
......@@ -180,6 +262,12 @@ void vp10_regular_quantize_b_4x4(MACROBLOCK *x, int plane, int block,
MACROBLOCKD *const xd = &x->e_mbd;
struct macroblock_plane *p = &x->plane[plane];
struct macroblockd_plane *pd = &xd->plane[plane];
#if CONFIG_AOM_QM
int seg_id = xd->mi[0]->mbmi.segment_id;
int is_intra = is_inter_block(&xd->mi[0]->mbmi);
const qm_val_t *qmatrix = pd->seg_qmatrix[seg_id][is_intra][0];
const qm_val_t *iqmatrix = pd->seg_iqmatrix[seg_id][is_intra][0];
#endif
#if CONFIG_VPX_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
......@@ -187,7 +275,12 @@ void vp10_regular_quantize_b_4x4(MACROBLOCK *x, int plane, int block,
p->zbin, p->round, p->quant, p->quant_shift,
BLOCK_OFFSET(p->qcoeff, block),
BLOCK_OFFSET(pd->dqcoeff, block), pd->dequant,
#if !CONFIG_AOM_QM
&p->eobs[block], scan, iscan);
#else
&p->eobs[block], scan, iscan,
qmatrix, iqmatrix);
#endif
return;
}
#endif
......@@ -195,7 +288,12 @@ void vp10_regular_quantize_b_4x4(MACROBLOCK *x, int plane, int block,
p->round, p->quant, p->quant_shift,
BLOCK_OFFSET(p->qcoeff, block),
BLOCK_OFFSET(pd->dqcoeff, block), pd->dequant, &p->eobs[block],
scan, iscan);
#if !CONFIG_AOM_QM
scan, iscan);
#else
scan, iscan,
qmatrix, iqmatrix);
#endif
}
static void invert_quant(int16_t *quant, int16_t *shift, int d) {
......@@ -212,9 +310,12 @@ static int get_qzbin_factor(int q, vpx_bit_depth_t bit_depth) {
const int quant = vp10_dc_quant(q, 0, bit_depth);
#if CONFIG_VPX_HIGHBITDEPTH
switch (bit_depth) {
case VPX_BITS_8: return q == 0 ? 64 : (quant < 148 ? 84 : 80);
case VPX_BITS_10: return q == 0 ? 64 : (quant < 592 ? 84 : 80);
case VPX_BITS_12: return q == 0 ? 64 : (quant < 2368 ? 84 : 80);
case VPX_BITS_8:
return q == 0 ? 64 : (quant < 148 ? 84 : 80);
case VPX_BITS_10:
return q == 0 ? 64 : (quant < 592 ? 84 : 80);
case VPX_BITS_12:
return q == 0 ? 64 : (quant < 2368 ? 84 : 80);
default:
assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12");
return -1;
......@@ -288,6 +389,11 @@ void vp10_init_plane_quantizers(VP10_COMP *cpi, MACROBLOCK *x) {
const int qindex = vp10_get_qindex(&cm->seg, segment_id, cm->base_qindex);
const int rdmult = vp10_compute_rd_mult(cpi, qindex + cm->y_dc_delta_q);
int i;
#if CONFIG_AOM_QM
const int lossless = xd->lossless[segment_id];
// Quant matrix only depends on the base QP so there is only one set per frame
int qmlevel = lossless ? NUM_QM_LEVELS - 1 : aom_get_qmlevel(cm->base_qindex);
#endif
// Y
x->plane[0].quant = quants->y_quant[qindex];
......@@ -296,6 +402,12 @@ void vp10_init_plane_quantizers(VP10_COMP *cpi, MACROBLOCK *x) {
x->plane[0].quant_shift = quants->y_quant_shift[qindex];
x->plane[0].zbin = quants->y_zbin[qindex];
x->plane[0].round = quants->y_round[qindex];
#if CONFIG_AOM_QM
memcpy(&xd->plane[0].seg_qmatrix[segment_id], cm->gqmatrix[qmlevel][0],
sizeof(cm->gqmatrix[qmlevel][0]));
memcpy(&xd->plane[0].seg_iqmatrix[segment_id], cm->giqmatrix[qmlevel][0],
sizeof(cm->giqmatrix[qmlevel][0]));
#endif
xd->plane[0].dequant = cpi->y_dequant[qindex];
x->plane[0].quant_thred[0] = x->plane[0].zbin[0] * x->plane[0].zbin[0];
......@@ -309,6 +421,12 @@ void vp10_init_plane_quantizers(VP10_COMP *cpi, MACROBLOCK *x) {
x->plane[i].quant_shift = quants->uv_quant_shift[qindex];
x->plane[i].zbin = quants->uv_zbin[qindex];
x->plane[i].round = quants->uv_round[qindex];
#if CONFIG_AOM_QM
memcpy(&xd->plane[i].seg_qmatrix[segment_id], cm->gqmatrix[qmlevel][1],
sizeof(cm->gqmatrix[qmlevel][1]));
memcpy(&xd->plane[i].seg_iqmatrix[segment_id], cm->giqmatrix[qmlevel][1],
sizeof(cm->giqmatrix[qmlevel][1]));
#endif
xd->plane[i].dequant = cpi->uv_dequant[qindex];
x->plane[i].quant_thred[0] = x->plane[i].zbin[0] * x->plane[i].zbin[0];
......
......@@ -11,6 +11,371 @@
#include "vpx_dsp/quantize.h"
#include "vpx_mem/vpx_mem.h"
#if CONFIG_AOM_QM
void vpx_quantize_dc(const tran_low_t *coeff_ptr, int n_coeffs, int skip_block,
const int16_t *round_ptr, const int16_t quant,
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
const int16_t dequant_ptr, uint16_t *eob_ptr,
const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr) {
const int rc = 0;
const int coeff = coeff_ptr[rc];
const int coeff_sign = (coeff >> 31);
const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
int64_t tmp, eob = -1;
int32_t tmp32;
int dequant =
(dequant_ptr * iqm_ptr[rc] + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS;