Commit abff6788 authored by Jingning Han's avatar Jingning Han
Browse files

Fix overflow issue in SSSE3 32x32 quantization

The 32x32 quantization process can potentially have the intermediate
stacks over 16-bit range, thereby causing enc/dec mismatch. This commit
fixes this overflow issue in the SSSE3 implementation, as well as the
prototype, of 32x32 quantization.

This fixes issue 607 from webm@googlecode.

Change-Id: I85635e6ca236b90c3dcfc40d449215c7b9caa806
parent 166dc85b
......@@ -701,7 +701,7 @@ prototype void vp9_quantize_b "int16_t *coeff_ptr, intptr_t n_coeffs, int skip_b
specialize vp9_quantize_b $ssse3_x86_64
prototype void vp9_quantize_b_32x32 "int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, int16_t *zbin_ptr, int16_t *round_ptr, int16_t *quant_ptr, int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"
specialize vp9_quantize_b_32x32
specialize vp9_quantize_b_32x32 $ssse3_x86_64
#
# Structured Similarity (SSIM)
......
......@@ -84,7 +84,6 @@ void vp9_quantize_b_c(int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block,
*eob_ptr = eob + 1;
}
// This function works well for large transform size.
void vp9_quantize_b_32x32_c(int16_t *coeff_ptr, intptr_t n_coeffs,
int skip_block,
int16_t *zbin_ptr, int16_t *round_ptr,
......@@ -105,8 +104,8 @@ void vp9_quantize_b_32x32_c(int16_t *coeff_ptr, intptr_t n_coeffs,
eob = -1;
// Base ZBIN
zbins[0] = zbin_ptr[0] + zbin_oq_value;
zbins[1] = zbin_ptr[1] + zbin_oq_value;
zbins[0] = ROUND_POWER_OF_TWO(zbin_ptr[0] + zbin_oq_value, 1);
zbins[1] = ROUND_POWER_OF_TWO(zbin_ptr[1] + zbin_oq_value, 1);
nzbins[0] = zbins[0] * -1;
nzbins[1] = zbins[1] * -1;
......@@ -114,7 +113,7 @@ void vp9_quantize_b_32x32_c(int16_t *coeff_ptr, intptr_t n_coeffs,
// Pre-scan pass
for (i = 0; i < n_coeffs; i++) {
rc = scan[i];
z = coeff_ptr[rc] * 2;
z = coeff_ptr[rc];
// If the coefficient is out of the base ZBIN range, keep it for
// quantization.
......@@ -130,14 +129,14 @@ void vp9_quantize_b_32x32_c(int16_t *coeff_ptr, intptr_t n_coeffs,
// Calculate ZBIN
zbin = (zbins[rc != 0]);
z = coeff_ptr[rc] * 2;
z = coeff_ptr[rc];
sz = (z >> 31); // sign of z
x = (z ^ sz) - sz; // x = abs(z)
if (x >= zbin) {
x += (round_ptr[rc != 0]);
x += ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1);
y = (((int)(((int)(x * quant_ptr[rc != 0]) >> 16) + x)) *
quant_shift_ptr[rc != 0]) >> 16; // quantize (x)
quant_shift_ptr[rc != 0]) >> 15; // quantize (x)
x = (y ^ sz) - sz; // get the sign back
qcoeff_ptr[rc] = x; // write to destination
......
......@@ -36,6 +36,14 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \
pshufd m4, m4, 0
mova m2, [quantq] ; m2 = quant
paddw m0, m4 ; m0 = zbin + zbin_oq
%ifidn %1, b_32x32
pcmpeqw m5, m5
psrlw m5, 15
paddw m0, m5
paddw m1, m5
psrlw m0, 1 ; m0 = (m0 + 1) / 2
psrlw m1, 1 ; m1 = (m1 + 1) / 2
%endif
mova m3, [r2q] ; m3 = dequant
psubw m0, [pw_1]
mov r2, shiftmp
......@@ -43,6 +51,9 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \
mova m4, [r2] ; m4 = shift
mov r4, dqcoeffmp
mov r5, iscanmp
%ifidn %1, b_32x32
psllw m4, 1
%endif
pxor m5, m5 ; m5 = dedicated zero
DEFINE_ARGS coeff, ncoeff, d1, qcoeff, dqcoeff, iscan, d2, d3, d4, d5, d6, eob
lea coeffq, [ coeffq+ncoeffq*2]
......@@ -56,10 +67,6 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \
mova m10, [ coeffq+ncoeffq*2+16] ; m10 = c[i]
pabsw m6, m9 ; m6 = abs(m9)
pabsw m11, m10 ; m11 = abs(m10)
%ifidn %1, b_32x32
paddw m6, m6
paddw m11, m11
%endif
pcmpgtw m7, m6, m0 ; m7 = c[i] >= zbin
punpckhqdq m0, m0
pcmpgtw m12, m11, m0 ; m12 = c[i] >= zbin
......@@ -112,10 +119,6 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \
mova m10, [ coeffq+ncoeffq*2+16] ; m10 = c[i]
pabsw m6, m9 ; m6 = abs(m9)
pabsw m11, m10 ; m11 = abs(m10)
%ifidn %1, b_32x32
paddw m6, m6
paddw m11, m11
%endif
pcmpgtw m7, m6, m0 ; m7 = c[i] >= zbin
pcmpgtw m12, m11, m0 ; m12 = c[i] >= zbin
%ifidn %1, b_32x32
......@@ -164,6 +167,7 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \
pmaxsw m8, m13
add ncoeffq, mmsize
jl .ac_only_loop
%ifidn %1, b_32x32
jmp .accumulate_eob
.skip_iter:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment