Commit fabc7836 authored by Jingning Han's avatar Jingning Han
Browse files

Fix an overflow issue in SSE2 forward ADST

The step that sums three input samples could potentially cause the
intermediate result go beyond 16 bit limit, when operating as the
second 1-D transform. This commit fixes the issue.

Change-Id: Iaf512449ac2d25ddd8a806d760afab362c62a516
parent b3c75a2d
......@@ -206,12 +206,12 @@ void fadst4_1d_sse2(__m128i *in) {
const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING);
__m128i u[8], v[8];
__m128i in7 = _mm_add_epi16(in[0], in[1]);
in7 = _mm_sub_epi16(in7, in[3]);
u[0] = _mm_unpacklo_epi16(in[0], in[1]);
u[1] = _mm_unpacklo_epi16(in[2], in[3]);
u[2] = _mm_unpacklo_epi16(in7, kZero);
u[3] = _mm_unpacklo_epi16(in[2], kZero);
u[4] = _mm_unpacklo_epi16(in[3], kZero);
v[0] = _mm_madd_epi16(u[0], k__sinpi_p01_p02); // s0 + s2
v[1] = _mm_madd_epi16(u[1], k__sinpi_p03_p04); // s4 + s5
......@@ -219,9 +219,10 @@ void fadst4_1d_sse2(__m128i *in) {
v[3] = _mm_madd_epi16(u[0], k__sinpi_p04_m01); // s1 - s3
v[4] = _mm_madd_epi16(u[1], k__sinpi_m03_p02); // -s4 + s6
v[5] = _mm_madd_epi16(u[3], k__sinpi_p03_p03); // s4
v[6] = _mm_madd_epi16(u[4], k__sinpi_p03_p03);
u[0] = _mm_add_epi32(v[0], v[1]);
u[1] = v[2];
u[1] = _mm_sub_epi32(v[2], v[6]);
u[2] = _mm_add_epi32(v[3], v[4]);
u[3] = _mm_sub_epi32(u[2], u[0]);
u[4] = _mm_slli_epi32(v[5], 2);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment