Commit 0e11912a authored by Debargha Mukherjee's avatar Debargha Mukherjee

Support 64x64 quantizer functions

Also includes some refactoring and cleanups.

Change-Id: I2c2528c434a1e9e9b898251fa69489d884463929
parent 23b120db
......@@ -1027,13 +1027,23 @@ if (aom_config("CONFIG_AOM_HIGHBITDEPTH") eq "yes") {
if (aom_config("CONFIG_AOM_QM") eq "yes") {
if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
add_proto qw/void aom_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr";
specialize qw/aom_quantize_b/;
add_proto qw/void aom_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr";
specialize qw/aom_quantize_b_32x32/;
add_proto qw/void aom_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr";
specialize qw/aom_quantize_b_64x64/;
if (aom_config("CONFIG_AOM_HIGHBITDEPTH") eq "yes") {
add_proto qw/void aom_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr";
specialize qw/aom_highbd_quantize_b/;
add_proto qw/void aom_highbd_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr";
specialize qw/aom_highbd_quantize_b_32x32/;
add_proto qw/void aom_highbd_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr";
specialize qw/aom_highbd_quantize_b_64x64/;
} # CONFIG_AOM_HIGHBITDEPTH
} # CONFIG_AV1_ENCODER
} else {
......@@ -1044,12 +1054,18 @@ if (aom_config("CONFIG_AOM_QM") eq "yes") {
add_proto qw/void aom_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/aom_quantize_b_32x32/, "$ssse3_x86_64", "$avx_x86_64";
add_proto qw/void aom_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/aom_quantize_b_64x64/;
if (aom_config("CONFIG_AOM_HIGHBITDEPTH") eq "yes") {
add_proto qw/void aom_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/aom_highbd_quantize_b sse2/;
add_proto qw/void aom_highbd_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/aom_highbd_quantize_b_32x32 sse2/;
add_proto qw/void aom_highbd_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/aom_highbd_quantize_b_64x64/;
} # CONFIG_AOM_HIGHBITDEPTH
} # CONFIG_AV1_ENCODER
} # CONFIG_AOM_QM
......
......@@ -99,6 +99,38 @@ void aom_quantize_dc_32x32(const tran_low_t *coeff_ptr, int skip_block,
*eob_ptr = eob + 1;
}
#if CONFIG_TX64X64
void aom_quantize_dc_64x64(const tran_low_t *coeff_ptr, int skip_block,
const int16_t *round_ptr, const int16_t quant,
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
const int16_t dequant_ptr, uint16_t *eob_ptr,
const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr) {
const int n_coeffs = 1024;
const int rc = 0;
const int coeff = coeff_ptr[rc];
const int coeff_sign = (coeff >> 31);
const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
int64_t tmp, eob = -1;
int32_t tmp32;
int dequant;
memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
if (!skip_block) {
tmp = clamp(abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], 2),
INT16_MIN, INT16_MAX);
tmp32 = (int32_t)((tmp * qm_ptr[rc] * quant) >> (14 + AOM_QM_BITS));
qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign;
dequant =
(dequant_ptr * iqm_ptr[rc] + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS;
dqcoeff_ptr[rc] = (qcoeff_ptr[rc] * dequant) / 4;
if (tmp32) eob = 0;
}
*eob_ptr = eob + 1;
}
#endif // CONFIG_TX64X64
#if CONFIG_AOM_HIGHBITDEPTH
void aom_highbd_quantize_dc_32x32(const tran_low_t *coeff_ptr, int skip_block,
const int16_t *round_ptr, const int16_t quant,
......@@ -129,6 +161,38 @@ void aom_highbd_quantize_dc_32x32(const tran_low_t *coeff_ptr, int skip_block,
}
*eob_ptr = eob + 1;
}
#if CONFIG_TX64X64
void aom_highbd_quantize_dc_64x64(const tran_low_t *coeff_ptr, int skip_block,
const int16_t *round_ptr, const int16_t quant,
tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr,
const int16_t dequant_ptr, uint16_t *eob_ptr,
const qm_val_t *qm_ptr,
const qm_val_t *iqm_ptr) {
const int n_coeffs = 1024;
int eob = -1;
int dequant;
memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
if (!skip_block) {
const int coeff = coeff_ptr[0];
const int coeff_sign = (coeff >> 31);
const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
const int64_t tmp = abs_coeff + ROUND_POWER_OF_TWO(round_ptr[0], 2);
const uint32_t abs_qcoeff =
(uint32_t)((tmp * qm_ptr[0] * quant) >> (14 + AOM_QM_BITS));
qcoeff_ptr[0] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
dequant =
(dequant_ptr * iqm_ptr[0] + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS;
dqcoeff_ptr[0] = (qcoeff_ptr[0] * dequant) / 4;
if (abs_qcoeff) eob = 0;
}
*eob_ptr = eob + 1;
}
#endif // CONFIG_TX64X64
#endif
void aom_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
......@@ -316,6 +380,72 @@ void aom_quantize_b_32x32_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
*eob_ptr = eob + 1;
}
#if CONFIG_TX64X64
void aom_quantize_b_64x64_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
int skip_block, const int16_t *zbin_ptr,
const int16_t *round_ptr, const int16_t *quant_ptr,
const int16_t *quant_shift_ptr,
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
const int16_t *dequant_ptr, uint16_t *eob_ptr,
const int16_t *scan, const int16_t *iscan,
const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr) {
const int zbins[2] = { ROUND_POWER_OF_TWO(zbin_ptr[0], 2),
ROUND_POWER_OF_TWO(zbin_ptr[1], 2) };
const int nzbins[2] = { zbins[0] * -1, zbins[1] * -1 };
int idx = 0;
int idx_arr[4096];
int i, eob = -1;
int dequant;
(void)iscan;
memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
if (!skip_block) {
// Pre-scan pass
for (i = 0; i < n_coeffs; i++) {
const int rc = scan[i];
const qm_val_t wt = qm_ptr[rc];
const int coeff = coeff_ptr[rc] * wt;
// If the coefficient is out of the base ZBIN range, keep it for
// quantization.
if (coeff >= (zbins[rc != 0] << AOM_QM_BITS) ||
coeff <= (nzbins[rc != 0] << AOM_QM_BITS))
idx_arr[idx++] = i;
}
// Quantization pass: only process the coefficients selected in
// pre-scan pass. Note: idx can be zero.
for (i = 0; i < idx; i++) {
const int rc = scan[idx_arr[i]];
const int coeff = coeff_ptr[rc];
const int coeff_sign = (coeff >> 31);
const qm_val_t wt = qm_ptr[rc];
int64_t tmp;
int tmp32;
int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
abs_coeff += ROUND_POWER_OF_TWO(round_ptr[rc != 0], 2);
tmp = clamp(abs_coeff, INT16_MIN, INT16_MAX);
tmp = tmp * wt;
tmp32 = ((((tmp * quant_ptr[rc != 0]) >> 16) + tmp) *
quant_shift_ptr[rc != 0]) >>
(14 + AOM_QM_BITS);
qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign;
dequant =
(dequant_ptr[rc != 0] * iqm_ptr[rc] + (1 << (AOM_QM_BITS - 1))) >>
AOM_QM_BITS;
dqcoeff_ptr[rc] = (qcoeff_ptr[rc] * dequant) / 4;
if (tmp32) eob = idx_arr[i];
}
}
*eob_ptr = eob + 1;
}
#endif // CONFIG_TX64X64
#if CONFIG_AOM_HIGHBITDEPTH
void aom_highbd_quantize_b_32x32_c(
const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block,
......@@ -375,8 +505,71 @@ void aom_highbd_quantize_b_32x32_c(
}
*eob_ptr = eob + 1;
}
#endif
#if CONFIG_TX64X64
void aom_highbd_quantize_b_64x64_c(
const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block,
const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr,
const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
const int16_t *scan, const int16_t *iscan, const qm_val_t *qm_ptr,
const qm_val_t *iqm_ptr) {
const int zbins[2] = { ROUND_POWER_OF_TWO(zbin_ptr[0], 2),
ROUND_POWER_OF_TWO(zbin_ptr[1], 2) };
const int nzbins[2] = { zbins[0] * -1, zbins[1] * -1 };
int idx = 0;
int idx_arr[4096];
int i, eob = -1;
int dequant;
(void)iscan;
memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
if (!skip_block) {
// Pre-scan pass
for (i = 0; i < n_coeffs; i++) {
const int rc = scan[i];
const qm_val_t wt = qm_ptr[rc];
const int coeff = coeff_ptr[rc] * wt;
// If the coefficient is out of the base ZBIN range, keep it for
// quantization.
if (coeff >= (zbins[rc != 0] << AOM_QM_BITS) ||
coeff <= (nzbins[rc != 0] << AOM_QM_BITS))
idx_arr[idx++] = i;
}
// Quantization pass: only process the coefficients selected in
// pre-scan pass. Note: idx can be zero.
for (i = 0; i < idx; i++) {
const int rc = scan[idx_arr[i]];
const int coeff = coeff_ptr[rc];
const int coeff_sign = (coeff >> 31);
const qm_val_t wt = qm_ptr[rc];
const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
const int64_t tmp1 =
abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], 2);
const int64_t tmpw = tmp1 * wt;
const int64_t tmp2 = ((tmpw * quant_ptr[rc != 0]) >> 16) + tmpw;
const uint32_t abs_qcoeff =
(uint32_t)((tmp2 * quant_shift_ptr[rc != 0]) >> (14 + AOM_QM_BITS));
qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
dequant =
(dequant_ptr[rc != 0] * iqm_ptr[rc] + (1 << (AOM_QM_BITS - 1))) >>
AOM_QM_BITS;
dqcoeff_ptr[rc] = (qcoeff_ptr[rc] * dequant) / 4;
if (abs_qcoeff) eob = idx_arr[i];
}
}
*eob_ptr = eob + 1;
}
#endif // CONFIG_TX64X64
#endif // CONFIG_AOM_HIGHBITDEPTH
#else
void aom_quantize_dc(const tran_low_t *coeff_ptr, int n_coeffs, int skip_block,
const int16_t *round_ptr, const int16_t quant,
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
......@@ -450,6 +643,33 @@ void aom_quantize_dc_32x32(const tran_low_t *coeff_ptr, int skip_block,
*eob_ptr = eob + 1;
}
#if CONFIG_TX64X64
void aom_quantize_dc_64x64(const tran_low_t *coeff_ptr, int skip_block,
const int16_t *round_ptr, const int16_t quant,
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
const int16_t dequant_ptr, uint16_t *eob_ptr) {
const int n_coeffs = 4096;
const int rc = 0;
const int coeff = coeff_ptr[rc];
const int coeff_sign = (coeff >> 31);
const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
int tmp, eob = -1;
memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
if (!skip_block) {
tmp = clamp(abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], 2),
INT16_MIN, INT16_MAX);
tmp = (tmp * quant) >> 14;
qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign;
dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr / 4;
if (tmp) eob = 0;
}
*eob_ptr = eob + 1;
}
#endif // CONFIG_TX64X64
#if CONFIG_AOM_HIGHBITDEPTH
void aom_highbd_quantize_dc_32x32(const tran_low_t *coeff_ptr, int skip_block,
const int16_t *round_ptr, const int16_t quant,
......@@ -475,6 +695,33 @@ void aom_highbd_quantize_dc_32x32(const tran_low_t *coeff_ptr, int skip_block,
}
*eob_ptr = eob + 1;
}
#if CONFIG_TX64X64
void aom_highbd_quantize_dc_64x64(const tran_low_t *coeff_ptr, int skip_block,
const int16_t *round_ptr, const int16_t quant,
tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr,
const int16_t dequant_ptr,
uint16_t *eob_ptr) {
const int n_coeffs = 4096;
int eob = -1;
memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
if (!skip_block) {
const int coeff = coeff_ptr[0];
const int coeff_sign = (coeff >> 31);
const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
const int64_t tmp = abs_coeff + ROUND_POWER_OF_TWO(round_ptr[0], 2);
const uint32_t abs_qcoeff = (uint32_t)((tmp * quant) >> 14);
qcoeff_ptr[0] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
dqcoeff_ptr[0] = qcoeff_ptr[0] * dequant_ptr / 4;
if (abs_qcoeff) eob = 0;
}
*eob_ptr = eob + 1;
}
#endif // CONFIG_TX64X64
#endif
void aom_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
......@@ -632,6 +879,62 @@ void aom_quantize_b_32x32_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
*eob_ptr = eob + 1;
}
#if CONFIG_TX64X64
void aom_quantize_b_64x64_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
int skip_block, const int16_t *zbin_ptr,
const int16_t *round_ptr, const int16_t *quant_ptr,
const int16_t *quant_shift_ptr,
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
const int16_t *dequant_ptr, uint16_t *eob_ptr,
const int16_t *scan, const int16_t *iscan) {
const int zbins[2] = { ROUND_POWER_OF_TWO(zbin_ptr[0], 2),
ROUND_POWER_OF_TWO(zbin_ptr[1], 2) };
const int nzbins[2] = { zbins[0] * -1, zbins[1] * -1 };
int idx = 0;
int idx_arr[4096];
int i, eob = -1;
(void)iscan;
memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
if (!skip_block) {
// Pre-scan pass
for (i = 0; i < n_coeffs; i++) {
const int rc = scan[i];
const int coeff = coeff_ptr[rc];
// If the coefficient is out of the base ZBIN range, keep it for
// quantization.
if (coeff >= zbins[rc != 0] || coeff <= nzbins[rc != 0])
idx_arr[idx++] = i;
}
// Quantization pass: only process the coefficients selected in
// pre-scan pass. Note: idx can be zero.
for (i = 0; i < idx; i++) {
const int rc = scan[idx_arr[i]];
const int coeff = coeff_ptr[rc];
const int coeff_sign = (coeff >> 31);
int tmp;
int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
abs_coeff += ROUND_POWER_OF_TWO(round_ptr[rc != 0], 2);
abs_coeff = clamp(abs_coeff, INT16_MIN, INT16_MAX);
tmp = ((((abs_coeff * quant_ptr[rc != 0]) >> 16) + abs_coeff) *
quant_shift_ptr[rc != 0]) >>
14;
qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign;
dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 4;
if (tmp) eob = idx_arr[i];
}
}
*eob_ptr = eob + 1;
}
#endif // CONFIG_TX64X64
#if CONFIG_AOM_HIGHBITDEPTH
void aom_highbd_quantize_b_32x32_c(
const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block,
......@@ -682,5 +985,57 @@ void aom_highbd_quantize_b_32x32_c(
}
*eob_ptr = eob + 1;
}
#if CONFIG_TX64X64
void aom_highbd_quantize_b_64x64_c(
const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block,
const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr,
const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
const int16_t *scan, const int16_t *iscan) {
const int zbins[2] = { ROUND_POWER_OF_TWO(zbin_ptr[0], 2),
ROUND_POWER_OF_TWO(zbin_ptr[1], 2) };
const int nzbins[2] = { zbins[0] * -1, zbins[1] * -1 };
int idx = 0;
int idx_arr[4096];
int i, eob = -1;
(void)iscan;
memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
if (!skip_block) {
// Pre-scan pass
for (i = 0; i < n_coeffs; i++) {
const int rc = scan[i];
const int coeff = coeff_ptr[rc];
// If the coefficient is out of the base ZBIN range, keep it for
// quantization.
if (coeff >= zbins[rc != 0] || coeff <= nzbins[rc != 0])
idx_arr[idx++] = i;
}
// Quantization pass: only process the coefficients selected in
// pre-scan pass. Note: idx can be zero.
for (i = 0; i < idx; i++) {
const int rc = scan[idx_arr[i]];
const int coeff = coeff_ptr[rc];
const int coeff_sign = (coeff >> 31);
const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
const int64_t tmp1 =
abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], 2);
const int64_t tmp2 = ((tmp1 * quant_ptr[rc != 0]) >> 16) + tmp1;
const uint32_t abs_qcoeff =
(uint32_t)((tmp2 * quant_shift_ptr[rc != 0]) >> 14);
qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 4;
if (abs_qcoeff) eob = idx_arr[i];
}
}
*eob_ptr = eob + 1;
}
#endif // CONFIG_TX64X64
#endif
#endif
......@@ -30,6 +30,13 @@ void aom_quantize_dc_32x32(const tran_low_t *coeff_ptr, int skip_block,
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
const int16_t dequant_ptr, uint16_t *eob_ptr,
const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr);
#if CONFIG_TX64X64
void aom_quantize_dc_64x64(const tran_low_t *coeff_ptr, int skip_block,
const int16_t *round_ptr, const int16_t quant_ptr,
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
const int16_t dequant_ptr, uint16_t *eob_ptr,
const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr);
#endif // CONFIG_TX64X64
void aom_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
int skip_block, const int16_t *zbin_ptr,
const int16_t *round_ptr, const int16_t *quant_ptr,
......@@ -50,6 +57,13 @@ void aom_highbd_quantize_dc_32x32(
const int16_t quant_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
const int16_t dequant_ptr, uint16_t *eob_ptr, const qm_val_t *qm_ptr,
const qm_val_t *iqm_ptr);
#if CONFIG_TX64X64
void aom_highbd_quantize_dc_64x64(
const tran_low_t *coeff_ptr, int skip_block, const int16_t *round_ptr,
const int16_t quant_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
const int16_t dequant_ptr, uint16_t *eob_ptr, const qm_val_t *qm_ptr,
const qm_val_t *iqm_ptr);
#endif // CONFIG_TX64X64
void aom_highbd_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
int skip_block, const int16_t *zbin_ptr,
const int16_t *round_ptr, const int16_t *quant_ptr,
......@@ -58,8 +72,10 @@ void aom_highbd_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
const int16_t *dequant_ptr, uint16_t *eob_ptr,
const int16_t *scan, const int16_t *iscan,
const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr);
#endif
#else
#endif // CONFIG_AOM_HIGHBITDEPTH
#else // CONFIG_AOM_QM
void aom_quantize_dc(const tran_low_t *coeff_ptr, int n_coeffs, int skip_block,
const int16_t *round_ptr, const int16_t quant_ptr,
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
......@@ -68,7 +84,12 @@ void aom_quantize_dc_32x32(const tran_low_t *coeff_ptr, int skip_block,
const int16_t *round_ptr, const int16_t quant_ptr,
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
const int16_t dequant_ptr, uint16_t *eob_ptr);
#if CONFIG_TX64X64
void aom_quantize_dc_64x64(const tran_low_t *coeff_ptr, int skip_block,
const int16_t *round_ptr, const int16_t quant_ptr,
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
const int16_t dequant_ptr, uint16_t *eob_ptr);
#endif // CONFIG_TX64X64
#if CONFIG_AOM_HIGHBITDEPTH
void aom_highbd_quantize_dc(const tran_low_t *coeff_ptr, int n_coeffs,
int skip_block, const int16_t *round_ptr,
......@@ -81,8 +102,16 @@ void aom_highbd_quantize_dc_32x32(const tran_low_t *coeff_ptr, int skip_block,
tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr,
const int16_t dequant_ptr, uint16_t *eob_ptr);
#endif
#endif
#if CONFIG_TX64X64
void aom_highbd_quantize_dc_64x64(const tran_low_t *coeff_ptr, int skip_block,
const int16_t *round_ptr,
const int16_t quant_ptr,
tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr,
const int16_t dequant_ptr, uint16_t *eob_ptr);
#endif // CONFIG_TX64X64
#endif // CONFIG_AOM_HIGHBITDEPTH
#endif // CONFIG_AOM_QM
#ifdef __cplusplus
} // extern "C"
......
......@@ -206,6 +206,14 @@ if (aom_config("CONFIG_NEW_QUANT") eq "yes") {
add_proto qw/void quantize_32x32_fp_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *dequant_ptr, const cuml_bins_type_nuq *cuml_bins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band";
specialize qw/quantize_32x32_fp_nuq/;
if (aom_config("CONFIG_TX64X64") eq "yes") {
add_proto qw/void quantize_64x64_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, const int16_t *dequant_ptr, const cuml_bins_type_nuq *cuml_bins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band";
specialize qw/quantize_64x64_nuq/;
add_proto qw/void quantize_64x64_fp_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *dequant_ptr, const cuml_bins_type_nuq *cuml_bins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band";
specialize qw/quantize_64x64_fp_nuq/;
}
}
# FILTER_INTRA predictor functions
......@@ -332,8 +340,15 @@ if (aom_config("CONFIG_AOM_QM") eq "yes") {
specialize qw/av1_block_error/;
add_proto qw/void av1_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t *iqm_ptr";
specialize qw/av1_quantize_fp/;
add_proto qw/void av1_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t *iqm_ptr";
specialize qw/av1_quantize_fp_32x32/;
if (aom_config("CONFIG_TX64X64") eq "yes") {
add_proto qw/void av1_quantize_fp_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t *iqm_ptr";
specialize qw/av1_quantize_fp_64x64/;
}
add_proto qw/void av1_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t *iqm_ptr";
specialize qw/av1_fdct8x8_quant/;
......@@ -345,10 +360,18 @@ if (aom_config("CONFIG_AOM_QM") eq "yes") {
specialize qw/av1_block_error_fp neon/, "$sse2_x86inc";
add_proto qw/void av1_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t *iqm_ptr";
specialize qw/av1_quantize_fp/;
add_proto qw/void av1_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t *iqm_ptr";
specialize qw/av1_quantize_fp_32x32/;
if (aom_config("CONFIG_TX64X64") eq "yes") {
add_proto qw/void av1_quantize_fp_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t *iqm_ptr";
specialize qw/av1_quantize_fp_64x64/;
}
add_proto qw/void av1_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t *iqm_ptr";
specialize qw/av1_fdct8x8_quant/;
}
} else {
if (aom_config("CONFIG_AOM_HIGHBITDEPTH") eq "yes") {
......@@ -363,6 +386,11 @@ if (aom_config("CONFIG_AOM_QM") eq "yes") {
add_proto qw/void av1_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/av1_quantize_fp_32x32/;
if (aom_config("CONFIG_TX64X64") eq "yes") {
add_proto qw/void av1_quantize_fp_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/av1_quantize_fp_64x64/;
}
add_proto qw/void av1_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/av1_fdct8x8_quant/;
} else {
......@@ -378,6 +406,11 @@ if (aom_config("CONFIG_AOM_QM") eq "yes") {
add_proto qw/void av1_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/av1_quantize_fp_32x32/, "$ssse3_x86_64";
if (aom_config("CONFIG_TX64X64") eq "yes") {
add_proto qw/void av1_quantize_fp_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/av1_quantize_fp_64x64/;
}
add_proto qw/void av1_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/av1_fdct8x8_quant sse2 ssse3 neon/;
}
......@@ -478,6 +511,14 @@ specialize qw/av1_full_range_search/;
add_proto qw/void av1_temporal_filter_apply/, "uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count";
specialize qw/av1_temporal_filter_apply sse2 msa/;
if (aom_config("CONFIG_AOM_QM") eq "yes") {
add_proto qw/void av1_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr, int log_scale";
specialize qw/av1_quantize_b/;
} else {
add_proto qw/void av1_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, int log_scale";
specialize qw/av1_quantize_b/;
}
if (aom_config("CONFIG_AOM_HIGHBITDEPTH") eq "yes") {
# ENCODEMB INVOKE
......@@ -493,6 +534,14 @@ if (aom_config("CONFIG_AOM_HIGHBITDEPTH") eq "yes") {
add_proto qw/void highbd_quantize_32x32_fp_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *dequant_ptr, const cuml_bins_type_nuq *cuml_bins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band";
specialize qw/highbd_quantize_32x32_fp_nuq/;
if (aom_config("CONFIG_TX64X64") eq "yes") {
add_proto qw/void highbd_quantize_64x64_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, const int16_t *dequant_ptr, const cuml_bins_type_nuq *cuml_bins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band";